diff --git a/ext/standard/Makefile.in b/ext/standard/Makefile.in index 8af72fa3175..38501f0d5f6 100644 --- a/ext/standard/Makefile.in +++ b/ext/standard/Makefile.in @@ -13,5 +13,3 @@ include $(top_srcdir)/build/dynlib.mk parsedate.c: $(srcdir)/parsedate.y -$(srcdir)/url_scanner.c: $(srcdir)/url_scanner.re - -re2c $< > $@.new && mv $@.new $@ diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 53acc21c977..c5d1ba0799c 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -729,6 +729,10 @@ PHP_RINIT_FUNCTION(basic) PHP_RINIT(assert)(INIT_FUNC_ARGS_PASSTHRU); PHP_RINIT(dir)(INIT_FUNC_ARGS_PASSTHRU); +#ifdef TRANS_SID + PHP_RINIT(url_scanner)(INIT_FUNC_ARGS_PASSTHRU); +#endif + return SUCCESS; } @@ -754,6 +758,10 @@ PHP_RSHUTDOWN_FUNCTION(basic) PHP_RSHUTDOWN(syslog)(SHUTDOWN_FUNC_ARGS_PASSTHRU); PHP_RSHUTDOWN(assert)(SHUTDOWN_FUNC_ARGS_PASSTHRU); +#ifdef TRANS_SID + PHP_RSHUTDOWN(url_scanner)(INIT_FUNC_ARGS_PASSTHRU); +#endif + return SUCCESS; } diff --git a/ext/standard/basic_functions.h b/ext/standard/basic_functions.h index 5f41801439d..32d90c9aaf4 100644 --- a/ext/standard/basic_functions.h +++ b/ext/standard/basic_functions.h @@ -26,6 +26,10 @@ #include "zend_highlight.h" +#ifdef TRANS_SID +# include "url_scanner.h" +#endif + extern zend_module_entry basic_functions_module; #define basic_functions_module_ptr &basic_functions_module @@ -161,6 +165,11 @@ typedef struct { /* var.c */ zend_class_entry *incomplete_class; + +#ifdef TRANS_SID + /* url_scanner.c */ + url_adapt_state_t url_adapt_state; +#endif } php_basic_globals; #ifdef ZTS diff --git a/ext/standard/url_scanner.c b/ext/standard/url_scanner.c index eed19f4db9f..8a1c72c6726 100644 --- a/ext/standard/url_scanner.c +++ b/ext/standard/url_scanner.c @@ -1,5 +1,3 @@ -/* Generated by re2c 0.5 on Sat Nov 27 16:22:34 1999 */ -#line 1 "../../../php4/ext/standard/url_scanner.re" /* +----------------------------------------------------------------------+ | PHP version 4.0 | @@ -14,7 +12,8 @@ | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ - | Author: Sascha Schumann | + | Author: Sascha Schumann | + | Hartmut Holzgraefe | +----------------------------------------------------------------------+ */ /* $Id$ */ @@ -27,518 +26,270 @@ #include #include #include +#include "php.h" +#include "basic_functions.h" +#include "url_scanner.h" -#undef MIN -#define MIN(a,b) (a)<(b)?(a):(b) +#define BUFSIZE 256 -#define YYCTYPE char -#define YYCURSOR state->crs -#define YYLIMIT state->end -#define YYMARKER state->ptr -#define YYFILL(n) - -typedef enum { - INITIAL, - REF -} state; - -typedef struct { - state state; - const char *crs; - const char *end; - const char *ptr; - const char *start; - char *target; - size_t targetsize; - const char *data; -} lexdata; - -#define FINISH { catchup(state); goto finish; } - -#define BEGIN(x) \ - switch(state->state) { \ - case INITIAL: \ - catchup(state); \ - break; \ - case REF: \ - screw_url(state); \ - break; \ - } \ - state->state = x; \ - state->start = state->crs; \ - goto nextiter - -#define ATTACH(s, n) \ -{ \ - size_t _newlen = state->targetsize + n; \ - state->target = realloc(state->target, _newlen + 1); \ - memcpy(state->target + state->targetsize, s, n); \ - state->targetsize = _newlen; \ - state->target[_newlen] = '\0'; \ -} - -#define URLLEN 512 - -static void screw_url(lexdata *state) -{ - int len; - char buf[URLLEN]; - char url[URLLEN]; - const char *p, *q; - char c; - - - /* search outer limits for URI */ - for(p = state->start; p < state->crs && (c = *p); p++) - if(!isspace(c)) break; - if(c=='"') p++; - for(; p < state->crs && (c = *p); p++) - if(!isspace(c)) break; - - /* - * we look at q-1, because q points to the character behind the last - * character we are going to copy and the decision is based on that last - * character - */ - - for(q = state->crs; q > state->start && (c = *(q-1)); q--) - if(!isspace(c)) break; - if(c=='"') q--; - for(; q > state->start && (c = *(q-1)); q--) - if(!isspace(c)) break; - - if(qstart; q=state->crs; } - - /* attach beginning */ - ATTACH(state->start, p-state->start); - - /* copy old URI */ - len = MIN(q - p, sizeof(buf) - 1); - - memcpy(url, p, len); - url[len] = '\0'; - - /* construct new URI */ - len = snprintf(buf, sizeof(buf), "%s%c%s", url, - memchr(state->start, '?', len) ? '&' : '?', - state->data); - - /* attach new URI */ - ATTACH(buf, len); - - /* attach rest */ - ATTACH(q, state->crs - q); +PHP_RINIT_FUNCTION(url_scanner) { + url_adapt(NULL,0,NULL,NULL); } -static void catchup(lexdata *state) -{ - ATTACH(state->start, (state->crs - state->start)); +PHP_RSHUTDOWN_FUNCTION(url_scanner) { + url_adapt(NULL,0,NULL,NULL); } -#line 144 +static char *url_attr_addon(const char *tag,const char *attr,const char *val,const char *buf) { + int flag = 0; -static void url_scanner(lexdata *state) -{ - while(state->crs < state->end) { - - switch(state->state) { - case INITIAL: -{ - YYCTYPE yych; - unsigned int yyaccept; - goto yy0; -yy1: ++YYCURSOR; -yy0: - if((YYLIMIT - YYCURSOR) < 7) YYFILL(7); - yych = *YYCURSOR; - switch(yych){ - case '\000': goto yy7; - case '<': goto yy2; - default: goto yy4; + if(!strcasecmp(tag,"a")&&!strcasecmp(attr,"href")) { + flag = 1; + } else if(!strcasecmp(tag,"area")&&!strcasecmp(attr,"href")) { + flag = 1; + } else if(!strcasecmp(tag,"form")&&!strcasecmp(attr,"action")) { + flag = 1; + } else if(!strcasecmp(tag,"frame")&&!strcasecmp(attr,"source")) { + flag = 1; + } else if(!strcasecmp(tag,"img")&&!strcasecmp(attr,"action")) { + flag = 1; } -yy2: yych = *++YYCURSOR; - switch(yych){ - case 'A': case 'a': goto yy9; - case 'F': case 'f': goto yy10; - default: goto yy3; - } -yy3:yy4: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy5: switch(yych){ - case '\000': case '<': goto yy6; - default: goto yy4; - } -yy6: -#line 157 - { BEGIN(INITIAL); } -yy7: yych = *++YYCURSOR; -yy8: -#line 158 - { FINISH; } -yy9: yych = *++YYCURSOR; - switch(yych){ - case 'H': case 'h': goto yy3; - case 'R': case 'r': goto yy41; - default: goto yy40; - } -yy10: yych = *++YYCURSOR; - switch(yych){ - case 'O': case 'o': goto yy12; - case 'R': case 'r': goto yy11; - default: goto yy3; - } -yy11: yych = *++YYCURSOR; - switch(yych){ - case 'A': case 'a': goto yy27; - default: goto yy3; - } -yy12: yych = *++YYCURSOR; - switch(yych){ - case 'R': case 'r': goto yy13; - default: goto yy3; - } -yy13: yych = *++YYCURSOR; - switch(yych){ - case 'M': case 'm': goto yy14; - default: goto yy3; - } -yy14: yych = *++YYCURSOR; - switch(yych){ - case 'A': case 'a': goto yy3; - default: goto yy16; - } -yy15: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy16: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy15; - case 'A': case 'a': goto yy17; - default: goto yy3; - } -yy17: yych = *++YYCURSOR; - switch(yych){ - case 'C': case 'c': goto yy18; - default: goto yy3; - } -yy18: yych = *++YYCURSOR; - switch(yych){ - case 'T': case 't': goto yy19; - default: goto yy3; - } -yy19: yych = *++YYCURSOR; - switch(yych){ - case 'I': case 'i': goto yy20; - default: goto yy3; - } -yy20: yych = *++YYCURSOR; - switch(yych){ - case 'O': case 'o': goto yy21; - default: goto yy3; - } -yy21: yych = *++YYCURSOR; - switch(yych){ - case 'N': case 'n': goto yy22; - default: goto yy3; - } -yy22: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy23: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy22; - case '=': goto yy24; - default: goto yy3; - } -yy24: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy25: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy24; - default: goto yy26; - } -yy26: -#line 155 - { BEGIN(REF); } -yy27: yych = *++YYCURSOR; - switch(yych){ - case 'M': case 'm': goto yy28; - default: goto yy3; - } -yy28: yych = *++YYCURSOR; - switch(yych){ - case 'E': case 'e': goto yy29; - default: goto yy3; - } -yy29: yych = *++YYCURSOR; - switch(yych){ - case 'S': case 's': goto yy3; - default: goto yy31; - } -yy30: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy31: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy30; - case 'S': case 's': goto yy32; - default: goto yy3; - } -yy32: yych = *++YYCURSOR; - switch(yych){ - case 'R': case 'r': goto yy33; - default: goto yy3; - } -yy33: yych = *++YYCURSOR; - switch(yych){ - case 'C': case 'c': goto yy34; - default: goto yy3; - } -yy34: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy35: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy34; - case '=': goto yy36; - default: goto yy3; - } -yy36: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy37: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy36; - default: goto yy38; - } -yy38: -#line 153 - { BEGIN(REF); } -yy39: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy40: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy39; - case 'H': case 'h': goto yy54; - default: goto yy3; - } -yy41: yych = *++YYCURSOR; - switch(yych){ - case 'E': case 'e': goto yy42; - default: goto yy3; - } -yy42: yych = *++YYCURSOR; - switch(yych){ - case 'A': case 'a': goto yy43; - default: goto yy3; - } -yy43: yych = *++YYCURSOR; - switch(yych){ - case 'H': case 'h': goto yy3; - default: goto yy45; - } -yy44: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy45: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy44; - case 'H': case 'h': goto yy46; - default: goto yy3; - } -yy46: yych = *++YYCURSOR; - switch(yych){ - case 'R': case 'r': goto yy47; - default: goto yy3; - } -yy47: yych = *++YYCURSOR; - switch(yych){ - case 'E': case 'e': goto yy48; - default: goto yy3; - } -yy48: yych = *++YYCURSOR; - switch(yych){ - case 'F': case 'f': goto yy49; - default: goto yy3; - } -yy49: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy50: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy49; - case '=': goto yy51; - default: goto yy3; - } -yy51: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy52: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy51; - default: goto yy53; - } -yy53: -#line 156 - { BEGIN(REF); } -yy54: yych = *++YYCURSOR; - switch(yych){ - case 'R': case 'r': goto yy55; - default: goto yy3; - } -yy55: yych = *++YYCURSOR; - switch(yych){ - case 'E': case 'e': goto yy56; - default: goto yy3; - } -yy56: yych = *++YYCURSOR; - switch(yych){ - case 'F': case 'f': goto yy57; - default: goto yy3; - } -yy57: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy58: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy57; - case '=': goto yy59; - default: goto yy3; - } -yy59: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy60: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy59; - default: goto yy61; - } -yy61: -#line 154 - { BEGIN(REF); } + if(flag) { + if(!strstr(val,buf)) + { + char *p = (char *)emalloc(strlen(buf)+2); + *p=strchr(val,'?')?'&':'?'; + strcpy(p+1,buf); + return p; + } + } + return NULL; } -#line 159 - break; - case REF: -{ - YYCTYPE yych; - unsigned int yyaccept; - goto yy62; -yy63: ++YYCURSOR; -yy62: - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - switch(yych){ - case '\000': case '>': goto yy64; - case '\t': case '\v': - case '\f': case ' ': case '"': goto yy65; - case '#': goto yy69; - case ':': goto yy71; - default: goto yy67; - } -yy64: -#line 163 - { BEGIN(INITIAL); } -yy65: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy66: switch(yych){ - case '\000': case '>': goto yy64; - case '\t': case '\v': - case '\f': case ' ': goto yy65; - case '"': goto yy79; - case '#': goto yy69; - case ':': goto yy71; - default: goto yy67; - } -yy67: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy68: switch(yych){ - case '\000': case '>': goto yy64; - case '\t': case '\v': - case '\f': case ' ': goto yy77; - case '"': goto yy79; - case '#': goto yy69; - case ':': goto yy71; - default: goto yy67; - } -yy69: yych = *++YYCURSOR; -yy70: YYCURSOR -= 1; -#line 164 - { BEGIN(INITIAL); } -yy71: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy72: switch(yych){ - case '\000': case '#': case '>': goto yy73; - case '\t': case '\v': - case '\f': case ' ': goto yy74; - case '"': goto yy76; - default: goto yy71; - } -yy73: -#line 165 - { - /* don't modify absolute links */ - state->state = INITIAL; BEGIN(INITIAL); - } -yy74: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy75: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy74; - case '"': goto yy76; - default: goto yy73; - } -yy76: yych = *++YYCURSOR; - goto yy73; -yy77: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy78: switch(yych){ - case '\t': case '\v': - case '\f': case ' ': goto yy77; - case '"': goto yy79; - default: goto yy64; - } -yy79: yych = *++YYCURSOR; - goto yy64; -} -#line 169 - - break; - } -nextiter: - ; - } -finish: - ; -} +#define US BG(url_adapt_state) char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen) { - lexdata state; + char *out,*outp; + int maxl,n,no_output; - state.state = INITIAL; - state.start = state.crs = src; - state.end = src + srclen; - state.ptr = NULL; - state.target = NULL; - state.targetsize = 0; - state.data = data; + if(src==NULL) { + US.state=STATE_NORMAL; + if(US.tag) {efree(US.tag); US.tag =NULL; } + if(US.attr) {efree(US.attr); US.attr=NULL; } + if(US.val) {efree(US.val); US.val =NULL; } + return NULL; + } - url_scanner(&state); + if(srclen==0) + srclen=strlen(src); - if(newlen) *newlen = state.targetsize; + out=malloc(srclen+1); + maxl=srclen; + n=srclen; + no_output=0; - return state.target; + *newlen=0; + outp=out; + + while(n--) { + switch(US.state) { + case STATE_NORMAL: + if(*src=='<') + US.state=STATE_TAG_START; + break; + + case STATE_TAG_START: + if(! isalnum(*src)) + US.state=STATE_NORMAL; + US.state=STATE_TAG; + US.ml=BUFSIZE; + US.p=US.tag=erealloc(US.tag,US.ml); + *(US.p)++=*src; + US.l=1; + break; + + case STATE_TAG: + if(isalnum(*src)) { + *(US.p)++ = *src; + US.l++; + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.tag=erealloc(US.tag,US.ml); + US.p = US.tag+US.l; + } + } else if (isspace(*src)) { + US.state = STATE_IN_TAG; + *US.p='\0'; + US.tag=erealloc(US.tag,US.l); + } else { + US.state = STATE_NORMAL; + efree(US.tag); + US.tag=NULL; + } + break; + + case STATE_IN_TAG: + if(isalnum(*src)) { + US.state=STATE_TAG_ATTR; + US.ml=BUFSIZE; + US.p=US.attr=erealloc(US.attr,US.ml); + *(US.p)++=*src; + US.l=1; + } else if (! isspace(*src)) { + US.state = STATE_NORMAL; + efree(US.tag); + US.tag=NULL; + } + break; + + case STATE_TAG_ATTR: + if(isalnum(*src)) { + *US.p++=*src; + ++US.l; + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.attr=erealloc(US.attr,US.ml); + US.p = US.attr+US.l; + } + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.attr=erealloc(US.attr,US.ml); + US.p = US.attr+US.l; + } + } else if(isspace(*src)||(*src=='=')){ + US.state=STATE_TAG_IS; + *US.p=0; + US.attr=erealloc(US.attr,US.l); + } else if(*src=='>') { + US.state=STATE_NORMAL; + } else { + efree(US.attr); + US.attr=NULL; + US.state=STATE_IN_TAG; + } + break; + + case STATE_TAG_IS: + case STATE_TAG_IS2: + if(!isspace(*src)) { + US.ml=BUFSIZE; + US.p=US.val=erealloc(US.val,US.ml); + US.l=0; + if((*src=='"')||(*src=='\'')) { + US.state=STATE_TAG_QVAL2; + US.delim=*src; + } else { + US.state=STATE_TAG_VAL; + *US.p++=*src; + US.l++; + } + } + break; + + + case STATE_TAG_QVAL2: + if(*src==US.delim) { + char *p; + US.state=STATE_IN_TAG; + *US.p='\0'; + p=url_attr_addon(US.tag,US.attr,US.val,data); + if(p) { + int l= strlen(p); + maxl+=l; + out=realloc(out,maxl); + outp=out+*newlen; + strcpy(outp,p); + outp+=l; + *newlen+=l; + efree(p); + } + break; + } else if(*src=='\\') { + no_output=1; + US.state=STATE_TAG_QVAL2b; + } else if (*src=='>') { + US.state=STATE_NORMAL; + } + + *US.p++=*src; + ++US.l; + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.val=erealloc(US.val,US.ml); + US.p = US.val+US.l; + } + + break; + + case STATE_TAG_QVAL2b: + US.state=STATE_TAG_QVAL2; + *US.p++=*src; + ++US.l; + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.val=erealloc(US.val,US.ml); + US.p = US.val+US.l; + } + break; + + case STATE_TAG_VAL: + if(!isspace(*src)) { + if((*src=='"')||(*src=='\'')) { + US.state=STATE_TAG_QVAL2; + US.delim=*src; + } else { + *US.p++=*src; + US.l++; + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.val=erealloc(US.val,US.ml); + US.p = US.val+US.l; + } + US.state=STATE_TAG_VAL2; + } + } + break; + + case STATE_TAG_VAL2: + if(isspace(*src)||(*src=='>')) { + char *p; + US.state=(*src=='>')?STATE_NORMAL:STATE_IN_TAG; + *US.p='\0'; + p=url_attr_addon(US.tag,US.attr,US.val,data); + if(p) { + int l= strlen(p); + maxl+=l; + out=realloc(out,maxl); + outp=out+*newlen; + strcpy(outp,p); + outp+=l; + *newlen+=l; + efree(p); + } + } else { + *US.p++=*src; + US.l++; + if(US.l==US.ml) { + US.ml+=BUFSIZE; + US.val=erealloc(US.val,US.ml); + US.p = US.val+US.l; + } + } + break; + } + + if(no_output) { + src++; + no_output=0; + continue; + } + *outp++=*src++; + *newlen+=1; + } + *outp='\0'; + return out; } #endif diff --git a/ext/standard/url_scanner.h b/ext/standard/url_scanner.h index 3c0e7b29a59..b7e3af38b25 100644 --- a/ext/standard/url_scanner.h +++ b/ext/standard/url_scanner.h @@ -22,4 +22,29 @@ char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen); +enum url_state { + STATE_NORMAL, + STATE_TAG_START, + STATE_TAG, + STATE_IN_TAG, + STATE_TAG_ATTR, + STATE_TAG_IS, + STATE_TAG_IS2, + STATE_TAG_VAL, + STATE_TAG_VAL2, + STATE_TAG_QVAL1, + STATE_TAG_QVAL2, + STATE_TAG_QVAL2b +}; + +typedef struct url_adapt_struct { + enum url_state state; + char *tag; + char *attr; + char *val; + char delim; + char *p; + int l,ml; +} url_adapt_state_t; + #endif