diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 15d1cf335d..b1378af4c8 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -548,6 +548,7 @@ struct Io char* infile; Biobuf* bin; int32 ilineno; + int nlsemi; int peekc; int peekc1; // second peekc for ... char* cp; // used for content when bin==nil @@ -725,7 +726,6 @@ void importfile(Val*, int line); void cannedimports(char*, char*); void unimportfile(); int32 yylex(void); -void yyoptsemi(int); void typeinit(void); void lexinit(void); char* lexname(int); @@ -1128,10 +1128,6 @@ EXTERN Prog* breakpc; EXTERN Prog* pc; EXTERN Prog* firstpc; -EXTERN int yylast; -EXTERN int yynext; -EXTERN int yysemi; - void allocparams(void); void cgen_as(Node *nl, Node *nr); void cgen_callmeth(Node *n, int proc); diff --git a/src/cmd/gc/go.y b/src/cmd/gc/go.y index 493dbeecc0..6e100d1f41 100644 --- a/src/cmd/gc/go.y +++ b/src/cmd/gc/go.y @@ -8,13 +8,14 @@ * The Go semicolon rules are: * * 1. all statements and declarations are terminated by semicolons - * 2. semicolons can be omitted at top level. - * 3. semicolons can be omitted before and after the closing ) or } + * 2. semicolons can be omitted before and after the closing ) or } * on a list of statements or declarations. + * 3. semicolons are inserted by the lexer before a newline + * following a specific list of tokens. * - * This is accomplished by calling yyoptsemi() to mark the places - * where semicolons are optional. That tells the lexer that if a - * semicolon isn't the next token, it should insert one for us. + * Rules #1 and #2 are accomplished by writing the lists as + * semicolon-separated lists with an optional trailing semicolon. + * Rule #3 is implemented in yylex. */ %{ @@ -67,7 +68,7 @@ %type xdcl fnbody fnres switch_body loop_body dcl_name_list %type new_name_list expr_list keyval_list braced_keyval_list expr_or_type_list xdcl_list -%type oexpr_list oexpr_or_type_list caseblock_list stmt_list oarg_type_list arg_type_list +%type oexpr_list oexpr_or_type_list_ocomma caseblock_list stmt_list oarg_type_list_ocomma arg_type_list %type interfacedcl_list vardcl vardcl_list structdcl structdcl_list %type common_dcl constdcl constdcl1 constdcl_list typedcl_list @@ -112,13 +113,6 @@ %left ')' %left PreferToRightParen -%left '.' - -%left '{' - -%left NotSemi -%left ';' - %% file: loadsys @@ -134,9 +128,10 @@ package: { prevlineno = lineno; yyerror("package statement must be first"); + flusherrors(); mkpackage("main"); } -| LPACKAGE sym +| LPACKAGE sym ';' { mkpackage($2->name); } @@ -157,12 +152,12 @@ loadsys: } imports: -| imports import +| imports import ';' import: - LIMPORT import_stmt osemi -| LIMPORT '(' import_stmt_list osemi ')' osemi -| LIMPORT '(' ')' osemi + LIMPORT import_stmt +| LIMPORT '(' import_stmt_list osemi ')' +| LIMPORT '(' ')' import_stmt: import_here import_package import_there @@ -235,7 +230,7 @@ import_here: } import_package: - LPACKAGE sym + LPACKAGE sym ';' { pkgimportname = $2; if(strcmp($2->name, "main") == 0) @@ -265,43 +260,39 @@ import_there: { resumecheckwidth(); checkimports(); + unimportfile(); } /* * declarations */ xdcl: - common_dcl osemi -| xfndcl osemi + { + yyerror("empty top-level declaration"); + $$ = nil; + } +| common_dcl +| xfndcl { $$ = list1($1); } -| error osemi +| error { $$ = nil; } -| ';' - { - yyerror("empty top-level declaration"); - $$ = nil; - } common_dcl: LVAR vardcl { $$ = $2; - if(yylast == LSEMIBRACE) - yyoptsemi(0); } | LVAR '(' vardcl_list osemi ')' { $$ = $3; - yyoptsemi(0); } | LVAR '(' ')' { $$ = nil; - yyoptsemi(0); } | LCONST constdcl { @@ -314,51 +305,38 @@ common_dcl: $$ = $3; iota = 0; lastconst = nil; - yyoptsemi(0); } | LCONST '(' constdcl ';' constdcl_list osemi ')' { $$ = concat($3, $5); iota = 0; lastconst = nil; - yyoptsemi(0); } | LCONST '(' ')' { $$ = nil; - yyoptsemi(0); } | LTYPE typedcl { $$ = list1($2); - if(yylast == LSEMIBRACE) - yyoptsemi(0); } | LTYPE '(' typedcl_list osemi ')' { $$ = $3; - yyoptsemi(0); } | LTYPE '(' ')' { $$ = nil; - yyoptsemi(0); - } - -varoptsemi: - { - if(yylast == LSEMIBRACE) - yyoptsemi('='); } vardcl: - dcl_name_list ntype varoptsemi + dcl_name_list ntype { $$ = variter($1, $2, nil); } -| dcl_name_list ntype varoptsemi '=' expr_list +| dcl_name_list ntype '=' expr_list { - $$ = variter($1, $2, $5); + $$ = variter($1, $2, $4); } | dcl_name_list '=' expr_list { @@ -508,7 +486,6 @@ compound_stmt: { $$ = liststmt($3); popdcl(); - yyoptsemi(0); } switch_body: @@ -520,7 +497,6 @@ switch_body: { $$ = $3; popdcl(); - yyoptsemi(0); } caseblock: @@ -590,7 +566,6 @@ for_body: { $$ = $1; $$->nbody = concat($$->nbody, $2); - yyoptsemi(0); } for_stmt: @@ -630,7 +605,6 @@ if_stmt: $$ = $3; $$->nbody = $4; // no popdcl; maybe there's an LELSE - yyoptsemi(LELSE); } switch_stmt: @@ -794,7 +768,7 @@ uexpr: * can be preceded by 'defer' and 'go' */ pseudocall: - pexpr '(' oexpr_or_type_list ')' + pexpr '(' oexpr_or_type_list_ocomma ')' { $$ = nod(OCALL, $1, N); $$->list = $3; @@ -1054,15 +1028,10 @@ structtype: { $$ = nod(OTSTRUCT, N, N); $$->list = $3; - // Distinguish closing brace in struct from - // other closing braces by explicitly marking it. - // Used above (yylast == LSEMIBRACE). - yylast = LSEMIBRACE; } | LSTRUCT '{' '}' { $$ = nod(OTSTRUCT, N, N); - yylast = LSEMIBRACE; } interfacetype: @@ -1070,12 +1039,10 @@ interfacetype: { $$ = nod(OTINTER, N, N); $$->list = $3; - yylast = LSEMIBRACE; } | LINTERFACE '{' '}' { $$ = nod(OTINTER, N, N); - yylast = LSEMIBRACE; } keyval: @@ -1100,7 +1067,7 @@ xfndcl: } fndcl: - dcl_name '(' oarg_type_list ')' fnres + dcl_name '(' oarg_type_list_ocomma ')' fnres { Node *n; @@ -1115,7 +1082,7 @@ fndcl: $$->nname->ntype = n; funchdr($$); } -| '(' oarg_type_list ')' new_name '(' oarg_type_list ')' fnres +| '(' oarg_type_list_ocomma ')' new_name '(' oarg_type_list_ocomma ')' fnres { Node *rcvr, *t; @@ -1145,7 +1112,7 @@ fndcl: } fntype: - LFUNC '(' oarg_type_list ')' fnres + LFUNC '(' oarg_type_list_ocomma ')' fnres { $$ = nod(OTFUNC, N, N); $$->list = $3; @@ -1161,7 +1128,6 @@ fnbody: $$ = $2; if($$ == nil) $$ = list1(nod(OEMPTY, N, N)); - yyoptsemi(0); } fnres: @@ -1173,7 +1139,7 @@ fnres: { $$ = list1(nod(ODCLFIELD, N, $1)); } -| '(' oarg_type_list ')' +| '(' oarg_type_list_ocomma ')' { $$ = $2; } @@ -1201,7 +1167,7 @@ xdcl_list: { $$ = nil; } -| xdcl_list xdcl +| xdcl_list xdcl ';' { $$ = concat($1, $2); if(nsyntaxerrors == 0) @@ -1312,7 +1278,7 @@ interfacedcl: } indcl: - '(' oarg_type_list ')' fnres + '(' oarg_type_list_ocomma ')' fnres { // without func keyword $$ = nod(OTFUNC, fakethis(), N); @@ -1349,11 +1315,11 @@ arg_type_list: $$ = list($1, $3); } -oarg_type_list: +oarg_type_list_ocomma: { $$ = nil; } -| arg_type_list +| arg_type_list ocomma { $$ = checkarglist($1); } @@ -1517,7 +1483,6 @@ braced_keyval_list: * optional things */ osemi: - %prec NotSemi | ';' ocomma: @@ -1535,11 +1500,11 @@ oexpr_list: } | expr_list -oexpr_or_type_list: +oexpr_or_type_list_ocomma: { $$ = nil; } -| expr_or_type_list +| expr_or_type_list ocomma osimple_stmt: { @@ -1576,29 +1541,29 @@ oliteral: * an output package */ hidden_import: - LPACKAGE sym + LPACKAGE sym ';' /* variables */ -| LVAR hidden_pkg_importsym hidden_type +| LVAR hidden_pkg_importsym hidden_type ';' { importvar($2, $3, PEXTERN); } -| LCONST hidden_pkg_importsym '=' hidden_constant +| LCONST hidden_pkg_importsym '=' hidden_constant ';' { importconst($2, types[TIDEAL], $4); } -| LCONST hidden_pkg_importsym hidden_type '=' hidden_constant +| LCONST hidden_pkg_importsym hidden_type '=' hidden_constant ';' { importconst($2, $3, $5); } -| LTYPE hidden_pkgtype hidden_type +| LTYPE hidden_pkgtype hidden_type ';' { importtype($2, $3); } -| LFUNC hidden_pkg_importsym '(' ohidden_funarg_list ')' ohidden_funres +| LFUNC hidden_pkg_importsym '(' ohidden_funarg_list ')' ohidden_funres ';' { importvar($2, functype(N, $4, $6), PFUNC); } -| LFUNC '(' hidden_funarg_list ')' sym '(' ohidden_funarg_list ')' ohidden_funres +| LFUNC '(' hidden_funarg_list ')' sym '(' ohidden_funarg_list ')' ohidden_funres ';' { if($3->next != nil || $3->n->op != ODCLFIELD) { yyerror("bad receiver in method"); diff --git a/src/cmd/gc/lex.c b/src/cmd/gc/lex.c index c202f3ecdc..43b676d6fc 100644 --- a/src/cmd/gc/lex.c +++ b/src/cmd/gc/lex.c @@ -85,6 +85,7 @@ main(int argc, char *argv[]) fatal("open %s: %r", infile); curio.peekc = 0; curio.peekc1 = 0; + curio.nlsemi = 0; block = 1; @@ -310,6 +311,7 @@ importfile(Val *f, int line) curio.peekc = 0; curio.peekc1 = 0; curio.infile = file; + curio.nlsemi = 0; typecheckok = 1; for(;;) { c = getc(); @@ -354,6 +356,7 @@ cannedimports(char *file, char *cp) curio.peekc1 = 0; curio.infile = file; curio.cp = cp; + curio.nlsemi = 0; pkgmyname = S; typecheckok = 1; @@ -389,8 +392,14 @@ _yylex(void) l0: c = getc(); - if(isspace(c)) + if(isspace(c)) { + if(c == '\n' && curio.nlsemi) { + ungetc(c); + DBG("lex: implicit semi\n"); + return ';'; + } goto l0; + } lineno = lexlineno; /* start of token */ @@ -444,7 +453,6 @@ l0: cp = mal(sizeof(int32)); clen = sizeof(int32); - caseq: for(;;) { if(escchar('"', &escflag, &v)) break; @@ -460,15 +468,14 @@ l0: clen += c; } } - goto catem; - + goto strlit; + case '`': /* `...` */ strcpy(lexbuf, "``"); cp = mal(sizeof(int32)); clen = sizeof(int32); - casebq: for(;;) { c = getc(); if(c == EOF) { @@ -480,51 +487,8 @@ l0: cp = remal(cp, clen, 1); cp[clen++] = c; } - goto catem; - - catem: - c = getc(); - if(isspace(c)) - goto catem; - - // skip comments - if(c == '/') { - c1 = getc(); - if(c1 == '*') { - for(;;) { - c = getr(); - while(c == '*') { - c = getr(); - if(c == '/') - goto catem; - } - if(c == EOF) { - yyerror("eof in comment"); - errorexit(); - } - } - } - if(c1 == '/') { - for(;;) { - c = getr(); - if(c == '\n') - goto catem; - if(c == EOF) { - yyerror("eof in comment"); - errorexit(); - } - } - } - ungetc(c1); - } - - // cat adjacent strings - if(c == '"') - goto caseq; - if(c == '`') - goto casebq; - ungetc(c); + strlit: *(int32*)cp = clen-sizeof(int32); // length do { cp = remal(cp, clen, 1); @@ -554,12 +518,22 @@ l0: case '/': c1 = getc(); if(c1 == '*') { + int nl; + + nl = 0; for(;;) { c = getr(); + if(c == '\n') + nl = 1; while(c == '*') { c = getr(); - if(c == '/') + if(c == '/') { + if(nl) + ungetc('\n'); goto l0; + } + if(c == '\n') + nl = 1; } if(c == EOF) { yyerror("eof in comment"); @@ -570,8 +544,10 @@ l0: if(c1 == '/') { for(;;) { c = getr(); - if(c == '\n') + if(c == '\n') { + ungetc(c); goto l0; + } if(c == EOF) { yyerror("eof in comment"); errorexit(); @@ -962,42 +938,43 @@ caseout: return LLITERAL; } -/* - * help the parser. if the next token is not c and not ';', - * insert a ';' before it. - */ -void -yyoptsemi(int c) -{ - if(c == 0) - c = -1; - if(yychar <= 0) - yysemi = c; -} - int32 yylex(void) { - // if we delayed a token, return that one. - if(yynext) { - yylast = yynext; - yynext = 0; - return yylast; + int lx; + + lx = _yylex(); + + if(curio.nlsemi && lx == EOF) { + // if the nlsemi bit is set, we'd be willing to + // insert a ; if we saw a \n, but we didn't. + // that means the final \n is missing. + // complain here, because we can give a + // good message. the syntax error we'd get + // otherwise is inscrutable. + yyerror("missing newline at end of file"); + lx = ';'; } - yylast = _yylex(); - - // if there's an optional semicolon needed, - // delay the token we just read. - if(yysemi) { - if(yylast != ';' && yylast != yysemi) { - yynext = yylast; - yylast = ';'; - } - yysemi = 0; + switch(lx) { + case LNAME: + case LLITERAL: + case LBREAK: + case LCONTINUE: + case LFALL: + case LRETURN: + case LINC: + case LDEC: + case ')': + case '}': + case ']': + curio.nlsemi = 1; + break; + default: + curio.nlsemi = 0; + break; } - - return yylast; + return lx; } int