regex.hGo to the documentation of this file.00001 #ifndef _REGEX_H_ 00002 #define _REGEX_H_ /* never again */ 00003 /* 00004 * regular expressions 00005 * 00006 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 00007 * 00008 * Development of this software was funded, in part, by Cray Research Inc., 00009 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 00010 * Corporation, none of whom are responsible for the results. The author 00011 * thanks all of them. 00012 * 00013 * Redistribution and use in source and binary forms -- with or without 00014 * modification -- are permitted for any purpose, provided that 00015 * redistributions in source form retain this entire copyright notice and 00016 * indicate the origin and nature of any modifications. 00017 * 00018 * I'd appreciate being given credit for this package in the documentation 00019 * of software which uses it, but that is not a requirement. 00020 * 00021 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 00022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00023 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00024 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00027 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00028 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00029 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00030 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 * 00032 * 00033 * 00034 * Prototypes etc. marked with "^" within comments get gathered up (and 00035 * possibly edited) by the regfwd program and inserted near the bottom of 00036 * this file. 00037 * 00038 * We offer the option of declaring one wide-character version of the 00039 * RE functions as well as the char versions. To do that, define 00040 * __REG_WIDE_T to the type of wide characters (unfortunately, there 00041 * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and 00042 * __REG_WIDE_EXEC to the names to be used for the compile and execute 00043 * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter 00044 * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). 00045 * For cranky old compilers, it may be necessary to do something like: 00046 * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) 00047 * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) 00048 * rather than just #defining the names as parameterless macros. 00049 * 00050 * For some specialized purposes, it may be desirable to suppress the 00051 * declarations of the "front end" functions, regcomp() and regexec(), 00052 * or of the char versions of the compile and execute functions. To 00053 * suppress the front-end functions, define __REG_NOFRONT. To suppress 00054 * the char versions, define __REG_NOCHAR. 00055 * 00056 * The right place to do those defines (and some others you may want, see 00057 * below) would be <sys/types.h>. If you don't have control of that file, 00058 * the right place to add your own defines to this file is marked below. 00059 * This is normally done automatically, by the makefile and regmkhdr, based 00060 * on the contents of regcustom.h. 00061 */ 00062 00063 00064 00065 /* 00066 * voodoo for C++ 00067 */ 00068 #ifdef __cplusplus 00069 extern "C" { 00070 #endif 00071 00072 00073 00074 /* 00075 * Add your own defines, if needed, here. 00076 */ 00077 00078 00079 00080 /* 00081 * Location where a chunk of regcustom.h is automatically spliced into 00082 * this file (working from its prototype, regproto.h). 00083 */ 00084 /* --- begin --- */ 00085 /* ensure certain things don't sneak in from system headers */ 00086 #ifdef __REG_WIDE_T 00087 #undef __REG_WIDE_T 00088 #endif 00089 #ifdef __REG_WIDE_COMPILE 00090 #undef __REG_WIDE_COMPILE 00091 #endif 00092 #ifdef __REG_WIDE_EXEC 00093 #undef __REG_WIDE_EXEC 00094 #endif 00095 #ifdef __REG_REGOFF_T 00096 #undef __REG_REGOFF_T 00097 #endif 00098 #ifdef __REG_VOID_T 00099 #undef __REG_VOID_T 00100 #endif 00101 #ifdef __REG_CONST 00102 #undef __REG_CONST 00103 #endif 00104 #ifdef __REG_NOFRONT 00105 #undef __REG_NOFRONT 00106 #endif 00107 #ifdef __REG_NOCHAR 00108 #undef __REG_NOCHAR 00109 #endif 00110 /* interface types */ 00111 #define __REG_WIDE_T Tcl_UniChar 00112 #define __REG_REGOFF_T long /* not really right, but good enough... */ 00113 #define __REG_VOID_T VOID 00114 #define __REG_CONST CONST 00115 /* names and declarations */ 00116 #define __REG_WIDE_COMPILE TclReComp 00117 #define __REG_WIDE_EXEC TclReExec 00118 #define __REG_NOFRONT /* don't want regcomp() and regexec() */ 00119 #define __REG_NOCHAR /* or the char versions */ 00120 #define regfree TclReFree 00121 #define regerror TclReError 00122 /* --- end --- */ 00123 00124 00125 /* 00126 * interface types etc. 00127 */ 00128 00129 /* 00130 * regoff_t has to be large enough to hold either off_t or ssize_t, 00131 * and must be signed; it's only a guess that long is suitable, so we 00132 * offer <sys/types.h> an override. 00133 */ 00134 #ifdef __REG_REGOFF_T 00135 typedef __REG_REGOFF_T regoff_t; 00136 #else 00137 typedef long regoff_t; 00138 #endif 00139 00140 /* 00141 * For benefit of old compilers, we offer <sys/types.h> the option of 00142 * overriding the `void' type used to declare nonexistent return types. 00143 */ 00144 #ifdef __REG_VOID_T 00145 typedef __REG_VOID_T re_void; 00146 #else 00147 typedef void re_void; 00148 #endif 00149 00150 /* 00151 * Also for benefit of old compilers, <sys/types.h> can supply a macro 00152 * which expands to a substitute for `const'. 00153 */ 00154 #ifndef __REG_CONST 00155 #define __REG_CONST const 00156 #endif 00157 00158 00159 00160 /* 00161 * other interface types 00162 */ 00163 00164 /* the biggie, a compiled RE (or rather, a front end to same) */ 00165 typedef struct { 00166 int re_magic; /* magic number */ 00167 size_t re_nsub; /* number of subexpressions */ 00168 long re_info; /* information about RE */ 00169 # define REG_UBACKREF 000001 00170 # define REG_ULOOKAHEAD 000002 00171 # define REG_UBOUNDS 000004 00172 # define REG_UBRACES 000010 00173 # define REG_UBSALNUM 000020 00174 # define REG_UPBOTCH 000040 00175 # define REG_UBBS 000100 00176 # define REG_UNONPOSIX 000200 00177 # define REG_UUNSPEC 000400 00178 # define REG_UUNPORT 001000 00179 # define REG_ULOCALE 002000 00180 # define REG_UEMPTYMATCH 004000 00181 # define REG_UIMPOSSIBLE 010000 00182 # define REG_USHORTEST 020000 00183 int re_csize; /* sizeof(character) */ 00184 char *re_endp; /* backward compatibility kludge */ 00185 /* the rest is opaque pointers to hidden innards */ 00186 char *re_guts; /* `char *' is more portable than `void *' */ 00187 char *re_fns; 00188 } regex_t; 00189 00190 /* result reporting (may acquire more fields later) */ 00191 typedef struct { 00192 regoff_t rm_so; /* start of substring */ 00193 regoff_t rm_eo; /* end of substring */ 00194 } regmatch_t; 00195 00196 /* supplementary control and reporting */ 00197 typedef struct { 00198 regmatch_t rm_extend; /* see REG_EXPECT */ 00199 } rm_detail_t; 00200 00201 00202 00203 /* 00204 * compilation 00205 ^ #ifndef __REG_NOCHAR 00206 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); 00207 ^ #endif 00208 ^ #ifndef __REG_NOFRONT 00209 ^ int regcomp(regex_t *, __REG_CONST char *, int); 00210 ^ #endif 00211 ^ #ifdef __REG_WIDE_T 00212 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); 00213 ^ #endif 00214 */ 00215 #define REG_BASIC 000000 /* BREs (convenience) */ 00216 #define REG_EXTENDED 000001 /* EREs */ 00217 #define REG_ADVF 000002 /* advanced features in EREs */ 00218 #define REG_ADVANCED 000003 /* AREs (which are also EREs) */ 00219 #define REG_QUOTE 000004 /* no special characters, none */ 00220 #define REG_NOSPEC REG_QUOTE /* historical synonym */ 00221 #define REG_ICASE 000010 /* ignore case */ 00222 #define REG_NOSUB 000020 /* don't care about subexpressions */ 00223 #define REG_EXPANDED 000040 /* expanded format, white space & comments */ 00224 #define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ 00225 #define REG_NLANCH 000200 /* ^ matches after \n, $ before */ 00226 #define REG_NEWLINE 000300 /* newlines are line terminators */ 00227 #define REG_PEND 000400 /* ugh -- backward-compatibility hack */ 00228 #define REG_EXPECT 001000 /* report details on partial/limited matches */ 00229 #define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ 00230 #define REG_DUMP 004000 /* none of your business :-) */ 00231 #define REG_FAKE 010000 /* none of your business :-) */ 00232 #define REG_PROGRESS 020000 /* none of your business :-) */ 00233 00234 00235 00236 /* 00237 * execution 00238 ^ #ifndef __REG_NOCHAR 00239 ^ int re_exec(regex_t *, __REG_CONST char *, size_t, 00240 ^ rm_detail_t *, size_t, regmatch_t [], int); 00241 ^ #endif 00242 ^ #ifndef __REG_NOFRONT 00243 ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); 00244 ^ #endif 00245 ^ #ifdef __REG_WIDE_T 00246 ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, 00247 ^ rm_detail_t *, size_t, regmatch_t [], int); 00248 ^ #endif 00249 */ 00250 #define REG_NOTBOL 0001 /* BOS is not BOL */ 00251 #define REG_NOTEOL 0002 /* EOS is not EOL */ 00252 #define REG_STARTEND 0004 /* backward compatibility kludge */ 00253 #define REG_FTRACE 0010 /* none of your business */ 00254 #define REG_MTRACE 0020 /* none of your business */ 00255 #define REG_SMALL 0040 /* none of your business */ 00256 00257 00258 00259 /* 00260 * misc generics (may be more functions here eventually) 00261 ^ re_void regfree(regex_t *); 00262 */ 00263 00264 00265 00266 /* 00267 * error reporting 00268 * Be careful if modifying the list of error codes -- the table used by 00269 * regerror() is generated automatically from this file! 00270 * 00271 * Note that there is no wide-char variant of regerror at this time; what 00272 * kind of character is used for error reports is independent of what kind 00273 * is used in matching. 00274 * 00275 ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); 00276 */ 00277 #define REG_OKAY 0 /* no errors detected */ 00278 #define REG_NOMATCH 1 /* failed to match */ 00279 #define REG_BADPAT 2 /* invalid regexp */ 00280 #define REG_ECOLLATE 3 /* invalid collating element */ 00281 #define REG_ECTYPE 4 /* invalid character class */ 00282 #define REG_EESCAPE 5 /* invalid escape \ sequence */ 00283 #define REG_ESUBREG 6 /* invalid backreference number */ 00284 #define REG_EBRACK 7 /* brackets [] not balanced */ 00285 #define REG_EPAREN 8 /* parentheses () not balanced */ 00286 #define REG_EBRACE 9 /* braces {} not balanced */ 00287 #define REG_BADBR 10 /* invalid repetition count(s) */ 00288 #define REG_ERANGE 11 /* invalid character range */ 00289 #define REG_ESPACE 12 /* out of memory */ 00290 #define REG_BADRPT 13 /* quantifier operand invalid */ 00291 #define REG_ASSERT 15 /* "can't happen" -- you found a bug */ 00292 #define REG_INVARG 16 /* invalid argument to regex function */ 00293 #define REG_MIXED 17 /* character widths of regex and string differ */ 00294 #define REG_BADOPT 18 /* invalid embedded option */ 00295 /* two specials for debugging and testing */ 00296 #define REG_ATOI 101 /* convert error-code name to number */ 00297 #define REG_ITOA 102 /* convert error-code number to name */ 00298 00299 00300 00301 /* 00302 * the prototypes, as possibly munched by regfwd 00303 */ 00304 /* =====^!^===== begin forwards =====^!^===== */ 00305 /* automatically gathered by fwd; do not hand-edit */ 00306 /* === regproto.h === */ 00307 #ifndef __REG_NOCHAR 00308 int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); 00309 #endif 00310 #ifndef __REG_NOFRONT 00311 int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); 00312 #endif 00313 #ifdef __REG_WIDE_T 00314 int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); 00315 #endif 00316 #ifndef __REG_NOCHAR 00317 int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); 00318 #endif 00319 #ifndef __REG_NOFRONT 00320 int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); 00321 #endif 00322 #ifdef __REG_WIDE_T 00323 int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); 00324 #endif 00325 re_void regfree _ANSI_ARGS_((regex_t *)); 00326 extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); 00327 /* automatically gathered by fwd; do not hand-edit */ 00328 /* =====^!^===== end forwards =====^!^===== */ 00329 00330 00331 00332 /* 00333 * more C++ voodoo 00334 */ 00335 #ifdef __cplusplus 00336 } 00337 #endif 00338 00339 00340 00341 #endif |