regex.h

Go to the documentation of this file.
00001 #ifndef _REGEX_H_
00002 #define _REGEX_H_       /* never again */
00003 /*
00004  * regular expressions
00005  *
00006  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
00007  * 
00008  * Development of this software was funded, in part, by Cray Research Inc.,
00009  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
00010  * Corporation, none of whom are responsible for the results.  The author
00011  * thanks all of them. 
00012  * 
00013  * Redistribution and use in source and binary forms -- with or without
00014  * modification -- are permitted for any purpose, provided that
00015  * redistributions in source form retain this entire copyright notice and
00016  * indicate the origin and nature of any modifications.
00017  *
00018  * I'd appreciate being given credit for this package in the documentation
00019  * of software which uses it, but that is not a requirement.
00020  * 
00021  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
00022  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
00023  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
00024  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00025  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00026  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
00027  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00028  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
00029  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
00030  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  *
00032  *
00033  *
00034  * Prototypes etc. marked with "^" within comments get gathered up (and
00035  * possibly edited) by the regfwd program and inserted near the bottom of
00036  * this file.
00037  *
00038  * We offer the option of declaring one wide-character version of the
00039  * RE functions as well as the char versions.  To do that, define
00040  * __REG_WIDE_T to the type of wide characters (unfortunately, there
00041  * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
00042  * __REG_WIDE_EXEC to the names to be used for the compile and execute
00043  * functions (suggestion:  re_Xcomp and re_Xexec, where X is a letter
00044  * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
00045  * For cranky old compilers, it may be necessary to do something like:
00046  * #define      __REG_WIDE_COMPILE(a,b,c,d)     re_Xcomp(a,b,c,d)
00047  * #define      __REG_WIDE_EXEC(a,b,c,d,e,f,g)  re_Xexec(a,b,c,d,e,f,g)
00048  * rather than just #defining the names as parameterless macros.
00049  *
00050  * For some specialized purposes, it may be desirable to suppress the
00051  * declarations of the "front end" functions, regcomp() and regexec(),
00052  * or of the char versions of the compile and execute functions.  To
00053  * suppress the front-end functions, define __REG_NOFRONT.  To suppress
00054  * the char versions, define __REG_NOCHAR.
00055  *
00056  * The right place to do those defines (and some others you may want, see
00057  * below) would be <sys/types.h>.  If you don't have control of that file,
00058  * the right place to add your own defines to this file is marked below.
00059  * This is normally done automatically, by the makefile and regmkhdr, based
00060  * on the contents of regcustom.h.
00061  */
00062 
00063 
00064 
00065 /*
00066  * voodoo for C++
00067  */
00068 #ifdef __cplusplus
00069 extern "C" {
00070 #endif
00071 
00072 
00073 
00074 /*
00075  * Add your own defines, if needed, here.
00076  */
00077 
00078 
00079 
00080 /*
00081  * Location where a chunk of regcustom.h is automatically spliced into
00082  * this file (working from its prototype, regproto.h).
00083  */
00084 /* --- begin --- */
00085 /* ensure certain things don't sneak in from system headers */
00086 #ifdef __REG_WIDE_T
00087 #undef __REG_WIDE_T
00088 #endif
00089 #ifdef __REG_WIDE_COMPILE
00090 #undef __REG_WIDE_COMPILE
00091 #endif
00092 #ifdef __REG_WIDE_EXEC
00093 #undef __REG_WIDE_EXEC
00094 #endif
00095 #ifdef __REG_REGOFF_T
00096 #undef __REG_REGOFF_T
00097 #endif
00098 #ifdef __REG_VOID_T
00099 #undef __REG_VOID_T
00100 #endif
00101 #ifdef __REG_CONST
00102 #undef __REG_CONST
00103 #endif
00104 #ifdef __REG_NOFRONT
00105 #undef __REG_NOFRONT
00106 #endif
00107 #ifdef __REG_NOCHAR
00108 #undef __REG_NOCHAR
00109 #endif
00110 /* interface types */
00111 #define __REG_WIDE_T    Tcl_UniChar
00112 #define __REG_REGOFF_T  long    /* not really right, but good enough... */
00113 #define __REG_VOID_T    VOID
00114 #define __REG_CONST     CONST
00115 /* names and declarations */
00116 #define __REG_WIDE_COMPILE      TclReComp
00117 #define __REG_WIDE_EXEC         TclReExec
00118 #define __REG_NOFRONT           /* don't want regcomp() and regexec() */
00119 #define __REG_NOCHAR            /* or the char versions */
00120 #define regfree         TclReFree
00121 #define regerror        TclReError
00122 /* --- end --- */
00123 
00124 
00125 /*
00126  * interface types etc.
00127  */
00128 
00129 /*
00130  * regoff_t has to be large enough to hold either off_t or ssize_t,
00131  * and must be signed; it's only a guess that long is suitable, so we
00132  * offer <sys/types.h> an override.
00133  */
00134 #ifdef __REG_REGOFF_T
00135 typedef __REG_REGOFF_T regoff_t;
00136 #else
00137 typedef long regoff_t;
00138 #endif
00139 
00140 /*
00141  * For benefit of old compilers, we offer <sys/types.h> the option of
00142  * overriding the `void' type used to declare nonexistent return types.
00143  */
00144 #ifdef __REG_VOID_T
00145 typedef __REG_VOID_T re_void;
00146 #else
00147 typedef void re_void;
00148 #endif
00149 
00150 /*
00151  * Also for benefit of old compilers, <sys/types.h> can supply a macro
00152  * which expands to a substitute for `const'.
00153  */
00154 #ifndef __REG_CONST
00155 #define __REG_CONST     const
00156 #endif
00157 
00158 
00159 
00160 /*
00161  * other interface types
00162  */
00163 
00164 /* the biggie, a compiled RE (or rather, a front end to same) */
00165 typedef struct {
00166         int re_magic;           /* magic number */
00167         size_t re_nsub;         /* number of subexpressions */
00168         long re_info;           /* information about RE */
00169 #               define  REG_UBACKREF            000001
00170 #               define  REG_ULOOKAHEAD          000002
00171 #               define  REG_UBOUNDS             000004
00172 #               define  REG_UBRACES             000010
00173 #               define  REG_UBSALNUM            000020
00174 #               define  REG_UPBOTCH             000040
00175 #               define  REG_UBBS                000100
00176 #               define  REG_UNONPOSIX           000200
00177 #               define  REG_UUNSPEC             000400
00178 #               define  REG_UUNPORT             001000
00179 #               define  REG_ULOCALE             002000
00180 #               define  REG_UEMPTYMATCH         004000
00181 #               define  REG_UIMPOSSIBLE         010000
00182 #               define  REG_USHORTEST           020000
00183         int re_csize;           /* sizeof(character) */
00184         char *re_endp;          /* backward compatibility kludge */
00185         /* the rest is opaque pointers to hidden innards */
00186         char *re_guts;          /* `char *' is more portable than `void *' */
00187         char *re_fns;
00188 } regex_t;
00189 
00190 /* result reporting (may acquire more fields later) */
00191 typedef struct {
00192         regoff_t rm_so;         /* start of substring */
00193         regoff_t rm_eo;         /* end of substring */
00194 } regmatch_t;
00195 
00196 /* supplementary control and reporting */
00197 typedef struct {
00198         regmatch_t rm_extend;   /* see REG_EXPECT */
00199 } rm_detail_t;
00200 
00201 
00202 
00203 /*
00204  * compilation
00205  ^ #ifndef __REG_NOCHAR
00206  ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
00207  ^ #endif
00208  ^ #ifndef __REG_NOFRONT
00209  ^ int regcomp(regex_t *, __REG_CONST char *, int);
00210  ^ #endif
00211  ^ #ifdef __REG_WIDE_T
00212  ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
00213  ^ #endif
00214  */
00215 #define REG_BASIC       000000  /* BREs (convenience) */
00216 #define REG_EXTENDED    000001  /* EREs */
00217 #define REG_ADVF        000002  /* advanced features in EREs */
00218 #define REG_ADVANCED    000003  /* AREs (which are also EREs) */
00219 #define REG_QUOTE       000004  /* no special characters, none */
00220 #define REG_NOSPEC      REG_QUOTE       /* historical synonym */
00221 #define REG_ICASE       000010  /* ignore case */
00222 #define REG_NOSUB       000020  /* don't care about subexpressions */
00223 #define REG_EXPANDED    000040  /* expanded format, white space & comments */
00224 #define REG_NLSTOP      000100  /* \n doesn't match . or [^ ] */
00225 #define REG_NLANCH      000200  /* ^ matches after \n, $ before */
00226 #define REG_NEWLINE     000300  /* newlines are line terminators */
00227 #define REG_PEND        000400  /* ugh -- backward-compatibility hack */
00228 #define REG_EXPECT      001000  /* report details on partial/limited matches */
00229 #define REG_BOSONLY     002000  /* temporary kludge for BOS-only matches */
00230 #define REG_DUMP        004000  /* none of your business :-) */
00231 #define REG_FAKE        010000  /* none of your business :-) */
00232 #define REG_PROGRESS    020000  /* none of your business :-) */
00233 
00234 
00235 
00236 /*
00237  * execution
00238  ^ #ifndef __REG_NOCHAR
00239  ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
00240  ^                              rm_detail_t *, size_t, regmatch_t [], int);
00241  ^ #endif
00242  ^ #ifndef __REG_NOFRONT
00243  ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
00244  ^ #endif
00245  ^ #ifdef __REG_WIDE_T
00246  ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
00247  ^                              rm_detail_t *, size_t, regmatch_t [], int);
00248  ^ #endif
00249  */
00250 #define REG_NOTBOL      0001    /* BOS is not BOL */
00251 #define REG_NOTEOL      0002    /* EOS is not EOL */
00252 #define REG_STARTEND    0004    /* backward compatibility kludge */
00253 #define REG_FTRACE      0010    /* none of your business */
00254 #define REG_MTRACE      0020    /* none of your business */
00255 #define REG_SMALL       0040    /* none of your business */
00256 
00257 
00258 
00259 /*
00260  * misc generics (may be more functions here eventually)
00261  ^ re_void regfree(regex_t *);
00262  */
00263 
00264 
00265 
00266 /*
00267  * error reporting
00268  * Be careful if modifying the list of error codes -- the table used by
00269  * regerror() is generated automatically from this file!
00270  *
00271  * Note that there is no wide-char variant of regerror at this time; what
00272  * kind of character is used for error reports is independent of what kind
00273  * is used in matching.
00274  *
00275  ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
00276  */
00277 #define REG_OKAY         0      /* no errors detected */
00278 #define REG_NOMATCH      1      /* failed to match */
00279 #define REG_BADPAT       2      /* invalid regexp */
00280 #define REG_ECOLLATE     3      /* invalid collating element */
00281 #define REG_ECTYPE       4      /* invalid character class */
00282 #define REG_EESCAPE      5      /* invalid escape \ sequence */
00283 #define REG_ESUBREG      6      /* invalid backreference number */
00284 #define REG_EBRACK       7      /* brackets [] not balanced */
00285 #define REG_EPAREN       8      /* parentheses () not balanced */
00286 #define REG_EBRACE       9      /* braces {} not balanced */
00287 #define REG_BADBR       10      /* invalid repetition count(s) */
00288 #define REG_ERANGE      11      /* invalid character range */
00289 #define REG_ESPACE      12      /* out of memory */
00290 #define REG_BADRPT      13      /* quantifier operand invalid */
00291 #define REG_ASSERT      15      /* "can't happen" -- you found a bug */
00292 #define REG_INVARG      16      /* invalid argument to regex function */
00293 #define REG_MIXED       17      /* character widths of regex and string differ */
00294 #define REG_BADOPT      18      /* invalid embedded option */
00295 /* two specials for debugging and testing */
00296 #define REG_ATOI        101     /* convert error-code name to number */
00297 #define REG_ITOA        102     /* convert error-code number to name */
00298 
00299 
00300 
00301 /*
00302  * the prototypes, as possibly munched by regfwd
00303  */
00304 /* =====^!^===== begin forwards =====^!^===== */
00305 /* automatically gathered by fwd; do not hand-edit */
00306 /* === regproto.h === */
00307 #ifndef __REG_NOCHAR
00308 int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
00309 #endif
00310 #ifndef __REG_NOFRONT
00311 int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
00312 #endif
00313 #ifdef __REG_WIDE_T
00314 int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
00315 #endif
00316 #ifndef __REG_NOCHAR
00317 int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
00318 #endif
00319 #ifndef __REG_NOFRONT
00320 int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
00321 #endif
00322 #ifdef __REG_WIDE_T
00323 int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
00324 #endif
00325 re_void regfree _ANSI_ARGS_((regex_t *));
00326 extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
00327 /* automatically gathered by fwd; do not hand-edit */
00328 /* =====^!^===== end forwards =====^!^===== */
00329 
00330 
00331 
00332 /*
00333  * more C++ voodoo
00334  */
00335 #ifdef __cplusplus
00336 }
00337 #endif
00338 
00339 
00340 
00341 #endif

Generated on Mon Oct 23 15:05:30 2006 for OpenSees by doxygen 1.5.0