/********************************************************************** oniguruma.h - Oniguruma (regular expression library) Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #ifndef ONIGURUMA_H #define ONIGURUMA_H #include "php_compat.h" #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 1 #define ONIGURUMA_VERSION_MINOR 9 #define ONIGURUMA_VERSION_TEENY 1 /* config parameters */ #ifndef RE_NREGS #define RE_NREGS 10 #endif #define REG_NREGION RE_NREGS #define REG_MAX_BACKREF_NUM 1000 #define REG_MAX_REPEAT_NUM 100000 #define REG_MAX_MULTI_BYTE_RANGES_NUM 1000 /* constants */ #define REG_MAX_ERROR_MESSAGE_LEN 90 #ifndef P_ #ifdef __STDC__ # define P_(args) args #else # define P_(args) () #endif #endif #ifndef PV_ #ifdef HAVE_STDARG_PROTOTYPES # define PV_(args) args #else # define PV_(args) () #endif #endif #ifndef REG_EXTERN #if defined(_WIN32) && !defined(__CYGWIN__) #if defined(EXPORT) || defined(RUBY_EXPORT) #define REG_EXTERN extern __declspec(dllexport) #elif defined(IMPORT) #define REG_EXTERN extern __declspec(dllimport) #endif #endif #endif #ifndef REG_EXTERN #define REG_EXTERN extern #endif #define REG_CHAR_TABLE_SIZE 256 #define REGCODE_UNDEF ((RegCharEncoding )0) #if defined(RUBY_PLATFORM) && defined(M17N_H) #define REG_RUBY_M17N typedef m17n_encoding* RegCharEncoding; #define REGCODE_DEFAULT REGCODE_UNDEF #else typedef const char* RegCharEncoding; #define MBCTYPE_ASCII 0 #define MBCTYPE_EUC 1 #define MBCTYPE_SJIS 2 #define MBCTYPE_UTF8 3 #define REGCODE_ASCII REG_MBLEN_TABLE[MBCTYPE_ASCII] #define REGCODE_UTF8 REG_MBLEN_TABLE[MBCTYPE_UTF8] #define REGCODE_EUCJP REG_MBLEN_TABLE[MBCTYPE_EUC] #define REGCODE_SJIS REG_MBLEN_TABLE[MBCTYPE_SJIS] #define REGCODE_DEFAULT REGCODE_ASCII REG_EXTERN const char REG_MBLEN_TABLE[][REG_CHAR_TABLE_SIZE]; #endif /* else RUBY && M17N */ REG_EXTERN RegCharEncoding RegDefaultCharEncoding; #if defined(RUBY_PLATFORM) && !defined(M17N_H) #undef ismbchar #define ismbchar(c) (mbclen((c)) != 1) #define mbclen(c) RegDefaultCharEncoding[(unsigned char )(c)] #endif typedef unsigned int RegOptionType; typedef unsigned char* RegTransTableType; typedef unsigned int RegDistance; typedef unsigned char UChar; #define REG_OPTION_DEFAULT REG_OPTION_NONE /* GNU regex options */ #define RE_OPTION_IGNORECASE (1L) #define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE << 1) #define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED << 1) #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE << 1) #define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) #define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE << 1) /* options */ #define REG_OPTION_NONE 0 #define REG_OPTION_SINGLELINE RE_OPTION_SINGLELINE #define REG_OPTION_MULTILINE RE_OPTION_MULTILINE #define REG_OPTION_IGNORECASE RE_OPTION_IGNORECASE #define REG_OPTION_EXTEND RE_OPTION_EXTENDED #define REG_OPTION_FIND_LONGEST RE_OPTION_LONGEST #define REG_OPTION_FIND_NOT_EMPTY (REG_OPTION_FIND_LONGEST << 1) #define REG_OPTION_NEGATE_SINGLELINE (REG_OPTION_FIND_NOT_EMPTY << 1) #define REG_OPTION_CAPTURE_ONLY_NAMED_GROUP (REG_OPTION_NEGATE_SINGLELINE << 1) /* options (search time) */ #define REG_OPTION_NOTBOL (REG_OPTION_CAPTURE_ONLY_NAMED_GROUP << 1) #define REG_OPTION_NOTEOL (REG_OPTION_NOTBOL << 1) #define REG_OPTION_POSIX_REGION (REG_OPTION_NOTEOL << 1) #define REG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define REG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) #define IS_REG_OPTION_ON(options,option) ((options) & (option)) /* syntax */ typedef struct { unsigned int op; unsigned int op2; unsigned int behavior; RegOptionType options; /* default option */ } RegSyntaxType; REG_EXTERN RegSyntaxType RegSyntaxPosixBasic; REG_EXTERN RegSyntaxType RegSyntaxPosixExtended; REG_EXTERN RegSyntaxType RegSyntaxEmacs; REG_EXTERN RegSyntaxType RegSyntaxGrep; REG_EXTERN RegSyntaxType RegSyntaxGnuRegex; REG_EXTERN RegSyntaxType RegSyntaxJava; REG_EXTERN RegSyntaxType RegSyntaxPerl; REG_EXTERN RegSyntaxType RegSyntaxRuby; /* predefined syntaxes (see regcomp.c) */ #define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic) #define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended) #define REG_SYNTAX_EMACS (&RegSyntaxEmacs) #define REG_SYNTAX_GREP (&RegSyntaxGrep) #define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex) #define REG_SYNTAX_JAVA (&RegSyntaxJava) #define REG_SYNTAX_PERL (&RegSyntaxPerl) #define REG_SYNTAX_RUBY (&RegSyntaxRuby) /* default syntax */ #define REG_SYNTAX_DEFAULT RegDefaultSyntax REG_EXTERN RegSyntaxType* RegDefaultSyntax; /* syntax (operators) */ #define REG_SYN_OP_ANYCHAR 1 /* . */ #define REG_SYN_OP_0INF (1<<1) /* * */ #define REG_SYN_OP_ESC_0INF (1<<2) #define REG_SYN_OP_1INF (1<<3) /* + */ #define REG_SYN_OP_ESC_1INF (1<<4) #define REG_SYN_OP_01 (1<<5) /* ? */ #define REG_SYN_OP_ESC_01 (1<<6) #define REG_SYN_OP_INTERVAL (1<<7) /* {lower,upper} */ #define REG_SYN_OP_ESC_INTERVAL (1<<8) #define REG_SYN_OP_ALT (1<<9) /* | */ #define REG_SYN_OP_ESC_ALT (1<<10) #define REG_SYN_OP_SUBEXP (1<<11) /* (...) */ #define REG_SYN_OP_ESC_SUBEXP (1<<12) #define REG_SYN_OP_ESC_BUF_ANCHOR (1<<13) /* \A, \Z, \z */ #define REG_SYN_OP_ESC_GNU_BUF_ANCHOR (1<<14) /* \`, \' */ #define REG_SYN_OP_BACK_REF (1<<15) /* \num */ #define REG_SYN_OP_CC (1<<16) /* [...] */ #define REG_SYN_OP_ESC_WORD (1<<17) /* \w, \W */ #define REG_SYN_OP_ESC_WORD_BEGIN_END (1<<18) /* \<. \> */ #define REG_SYN_OP_ESC_WORD_BOUND (1<<19) /* \b, \B */ #define REG_SYN_OP_ESC_WHITE_SPACE (1<<20) /* \s, \S */ #define REG_SYN_OP_ESC_DIGIT (1<<21) /* \d, \D */ #define REG_SYN_OP_LINE_ANCHOR (1<<22) /* ^, $ */ #define REG_SYN_OP_POSIX_BRACKET (1<<23) /* [:xxxx:] */ #define REG_SYN_OP_NON_GREEDY (1<<24) /* ??,*?,+?,{n,m}? */ #define REG_SYN_OP_ESC_CONTROL_CHAR (1<<25) /* \n,\r,\t,\a ... */ #define REG_SYN_OP_ESC_C_CONTROL (1<<26) /* \cx */ #define REG_SYN_OP_ESC_OCTAL3 (1<<27) /* \OOO */ #define REG_SYN_OP_ESC_X_HEX2 (1<<28) /* \xHH */ #define REG_SYN_OP_ESC_X_BRACE_HEX8 (1<<29) /* \x{7HHHHHHH} */ #define REG_SYN_OP_SUBEXP_EFFECT (1<<30) /* (?...) */ #define REG_SYN_OP_QUOTE (1<<31) /* \Q...\E */ #define REG_SYN_OP2_OPTION_PERL (1<<0) /* (?imsx), (?-imsx) */ #define REG_SYN_OP2_OPTION_RUBY (1<<1) /* (?imx), (?-imx) */ #define REG_SYN_OP2_POSSESSIVE_REPEAT (1<<2) /* ?+,*+,++ */ #define REG_SYN_OP2_POSSESSIVE_INTERVAL (1<<3) /* {n,m}+ */ #define REG_SYN_OP2_CCLASS_SET (1<<4) /* [...&&..[..].] */ #define REG_SYN_OP2_NAMED_SUBEXP (1<<5) /*(?.),\k*/ #define REG_SYN_OP2_SUBEXP_CALL (1<<6) /* \g */ #define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<7) /* \C-x */ #define REG_SYN_OP2_ESC_M_BAR_META (1<<8) /* \M-x */ #define REG_SYN_OP2_ESC_V_VTAB (1<<9) /* \v as VTAB */ #define REG_SYN_OP2_ESC_U_HEX4 (1<<10) /* \uHHHH */ /* syntax (behavior) */ #define REG_SYN_CONTEXT_INDEP_ANCHORS (1<<0) /* not implemented */ #define REG_SYN_CONTEXT_INDEP_OPS (1<<1) /* ?, *, +, {n,m} */ #define REG_SYN_CONTEXT_INVALID_OPS (1<<2) /* error or ignore */ #define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<3) /* ...)... */ #define REG_SYN_ALLOW_INVALID_INTERVAL (1<<4) /* {??? */ #define REG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ etc.*/ #define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ /* syntax in char class [...] */ #define REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED (1<<10) /* [,-,] */ #define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<11) #define REG_SYN_ESCAPE_IN_CC (1<<12) /* [...\w..] etc.. */ #define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<13) #define REG_SYN_ALLOW_RANGE_OP_IN_CC (1<<14) /* [0-9-a] */ /* error codes */ #define REG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -300) /* normal return */ #define REG_NORMAL 0 #define REG_MISMATCH -1 #define REG_NO_SUPPORT_CONFIG -2 /* internal error */ #define REGERR_MEMORY -5 #define REGERR_MATCH_STACK_LIMIT_OVER -6 #define REGERR_TYPE_BUG -10 #define REGERR_PARSER_BUG -11 #define REGERR_STACK_BUG -12 #define REGERR_UNDEFINED_BYTECODE -13 #define REGERR_UNEXPECTED_BYTECODE -14 #define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 /* syntax error */ #define REGERR_END_PATTERN_AT_LEFT_BRACE -100 #define REGERR_END_PATTERN_AT_LEFT_BRACKET -101 #define REGERR_EMPTY_CHAR_CLASS -102 #define REGERR_PREMATURE_END_OF_CHAR_CLASS -103 #define REGERR_END_PATTERN_AT_BACKSLASH -104 #define REGERR_END_PATTERN_AT_META -105 #define REGERR_END_PATTERN_AT_CONTROL -106 #define REGERR_META_CODE_SYNTAX -108 #define REGERR_CONTROL_CODE_SYNTAX -109 #define REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 #define REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 #define REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 #define REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 #define REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 #define REGERR_NESTED_REPEAT_OPERATOR -115 #define REGERR_UNMATCHED_CLOSE_PARENTHESIS -116 #define REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 #define REGERR_END_PATTERN_IN_GROUP -118 #define REGERR_UNDEFINED_GROUP_OPTION -119 #define REGERR_INVALID_POSIX_BRACKET_TYPE -121 #define REGERR_INVALID_LOOK_BEHIND_PATTERN -122 #define REGERR_INVALID_REPEAT_RANGE_PATTERN -123 /* values error (syntax error) */ #define REGERR_TOO_BIG_NUMBER -200 #define REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 #define REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 #define REGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 #define REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 #define REGERR_TOO_MANY_MULTI_BYTE_RANGES -205 #define REGERR_TOO_SHORT_MULTI_BYTE_STRING -206 #define REGERR_TOO_BIG_BACKREF_NUMBER -207 #define REGERR_INVALID_BACKREF -208 #define REGERR_TOO_BIG_WIDE_CHAR_VALUE -209 #define REGERR_TOO_LONG_WIDE_CHAR_VALUE -210 #define REGERR_INVALID_WIDE_CHAR_VALUE -211 #define REGERR_INVALID_SUBEXP_NAME -212 #define REGERR_UNDEFINED_NAME_REFERENCE -213 #define REGERR_UNDEFINED_GROUP_REFERENCE -214 #define REGERR_MULTIPLEX_DEFINITION_NAME_CALL -215 #define REGERR_NEVER_ENDING_RECURSION -216 /* errors related to thread */ #define REGERR_OVER_THREAD_PASS_LIMIT_COUNT -301 /* match result region type */ struct re_registers { int allocated; int num_regs; int* beg; int* end; }; #define REG_REGION_NOTPOS -1 typedef struct re_registers RegRegion; typedef struct { UChar* par; UChar* par_end; } RegErrorInfo; typedef struct { int lower; int upper; } RegRepeatRange; /* regex_t state */ #define REG_STATE_NORMAL 0 #define REG_STATE_SEARCHING 1 #define REG_STATE_COMPILING -1 #define REG_STATE_MODIFY -2 #define REG_STATE(regex) \ ((regex)->state > 0 ? REG_STATE_SEARCHING : (regex)->state) typedef struct re_pattern_buffer { /* common members in BBuf(bytes-buffer) type */ unsigned char* p; /* compiled pattern */ unsigned int used; /* used space for p */ unsigned int alloc; /* allocated space for p */ int state; /* normal, searching, compiling */ int num_mem; /* used memory(...) num counted from 1 */ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int num_null_check; /* OP_NULL_CHECK_START/END id counter */ int num_call; /* number of subexp call */ unsigned int backtrack_mem; int stack_pop_level; int repeat_range_alloc; RegRepeatRange* repeat_range; RegCharEncoding enc; RegOptionType options; RegSyntaxType* syntax; void* name_table; /* optimize info (string search and char-map and anchor) */ int optimize; /* optimize flag */ int threshold_len; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ RegDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ RegDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ int sub_anchor; /* start-anchor for exact or map */ unsigned char *exact; unsigned char *exact_end; unsigned char map[REG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ int *int_map; /* BM skip for exact_len > 255 */ int *int_map_backward; /* BM skip for backward search */ RegDistance dmin; /* min-distance of exact or map */ RegDistance dmax; /* max-distance of exact or map */ /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict on multi-thread */ } regex_t; #ifdef RUBY_PLATFORM #define re_mbcinit ruby_re_mbcinit #define re_compile_pattern ruby_re_compile_pattern #define re_recompile_pattern ruby_re_recompile_pattern #define re_free_pattern ruby_re_free_pattern #define re_adjust_startpos ruby_re_adjust_startpos #define re_search ruby_re_search #define re_match ruby_re_match #define re_set_casetable ruby_re_set_casetable #define re_copy_registers ruby_re_copy_registers #define re_free_registers ruby_re_free_registers #define register_info_type ruby_register_info_type #define re_error_code_to_str ruby_error_code_to_str #define ruby_error_code_to_str regex_error_code_to_str #define ruby_re_copy_registers regex_region_copy #else #define re_error_code_to_str regex_error_code_to_str #define re_copy_registers regex_region_copy #endif /* Oniguruma Native API */ REG_EXTERN int regex_init P_((void)); REG_EXTERN int regex_error_code_to_str PV_((UChar* s, int err_code, ...)); REG_EXTERN int regex_new P_((regex_t**, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo)); REG_EXTERN void regex_free P_((regex_t*)); REG_EXTERN int regex_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo)); REG_EXTERN int regex_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, RegRegion* region, RegOptionType option)); REG_EXTERN int regex_match P_((regex_t*, UChar* str, UChar* end, UChar* at, RegRegion* region, RegOptionType option)); REG_EXTERN RegRegion* regex_region_new P_((void)); REG_EXTERN void regex_region_free P_((RegRegion* region, int free_self)); REG_EXTERN void regex_region_copy P_((RegRegion* to, RegRegion* from)); REG_EXTERN void regex_region_clear P_((RegRegion* region)); REG_EXTERN int regex_region_resize P_((RegRegion* region, int n)); REG_EXTERN int regex_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, int** nums)); REG_EXTERN int regex_foreach_name P_((regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)); REG_EXTERN UChar* regex_get_prev_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); REG_EXTERN UChar* regex_get_left_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); REG_EXTERN UChar* regex_get_right_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); REG_EXTERN void regex_set_default_trans_table P_((UChar* table)); REG_EXTERN int regex_set_default_syntax P_((RegSyntaxType* syntax)); REG_EXTERN int regex_end P_((void)); REG_EXTERN const char* regex_version P_((void)); /* GNU regex API */ #ifdef REG_RUBY_M17N REG_EXTERN void re_mbcinit P_((RegCharEncoding)); #else REG_EXTERN void re_mbcinit P_((int)); #endif REG_EXTERN int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); REG_EXTERN int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); REG_EXTERN void re_free_pattern P_((struct re_pattern_buffer*)); REG_EXTERN int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); REG_EXTERN int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); REG_EXTERN int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); REG_EXTERN void re_set_casetable P_((const char*)); REG_EXTERN void re_free_registers P_((struct re_registers*)); REG_EXTERN int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ #endif /* ONIGURUMA_H */