- snapshot of state before trying yet another, hopefully better working

way to integrate comments into the generated output - various simplificatins and cleanups throughout R=r OCL=20062 CL=20062
2024-09-23 03:19:12 +00:00 · 2008-11-26 13:23:26 -08:00 · 2008-11-26 13:23:26 -08:00 · 732b53a1fe
commit 732b53a1fe
parent d040d26863
6 changed files with 179 additions and 155 deletions
--- a/usr/gri/pretty/ast.go
+++ b/usr/gri/pretty/ast.go
@ -180,14 +180,14 @@ export var BadDecl = NewDecl(0, Scanner.ILLEGAL, false);
 // Program

 export type Comment struct {
-	pos, tok int;
+	pos int;
 	text string;
 }


-export func NewComment(pos, tok int, text string) *Comment {
+export func NewComment(pos int, text string) *Comment {
 	c := new(Comment);
-	c.pos, c.tok, c.text = pos, tok, text;
+	c.pos, c.text = pos, text;
 	return c;
 }

--- a/usr/gri/pretty/compilation.go
+++ b/usr/gri/pretty/compilation.go
@ -122,7 +122,7 @@ export func Compile(src_file string, flags *Flags) (*AST.Program, int) {
 	err.Init(src_file, src, flags.columns);

 	var scanner Scanner.Scanner;
-	scanner.Init(&err, src, flags.testmode);
+	scanner.Init(&err, src, true, flags.testmode);

 	var tstream *<-chan *Scanner.Token;
 	if flags.tokenchan {
--- a/usr/gri/pretty/parser.go
+++ b/usr/gri/pretty/parser.go
@ -78,15 +78,8 @@ func (P *Parser) Next0() {


 func (P *Parser) Next() {
-	// TODO This is too expensive for every token - fix
-	for P.Next0();
-		P.tok == Scanner.COMMENT_WW ||
-		P.tok == Scanner.COMMENT_WB ||
-		P.tok == Scanner.COMMENT_BW ||
-		P.tok == Scanner.COMMENT_BB ;
-		P.Next0() 
-	{
-		P.comments.Push(AST.NewComment(P.pos, P.tok, P.val));
+	for P.Next0(); P.tok == Scanner.COMMENT; P.Next0() {
+		P.comments.Push(AST.NewComment(P.pos, P.val));
 	}
 }

--- a/usr/gri/pretty/pretty.go
+++ b/usr/gri/pretty/pretty.go
@ -49,8 +49,7 @@ func main() {
 				return;
 			}
 			if !silent.BVal() && !flags.testmode {
-				var P Printer.Printer;
-				(&P).Program(prog);
+				Printer.Print(prog);
 			}
 		}
 	}
--- a/usr/gri/pretty/printer.go
+++ b/usr/gri/pretty/printer.go
@ -4,20 +4,20 @@

 package Printer

-import "array"
-import Strings "strings"
-import Scanner "scanner"
-import AST "ast"
-import Flag "flag"
-import Fmt "fmt"
-import IO "io"
-import OS "os"
-import TabWriter "tabwriter"
+import (
+	"os";
+	"array";
+	"tabwriter";
+	"flag";
+	"fmt";
+	Scanner "scanner";
+	AST "ast";
+)

 var (
-	tabwidth = Flag.Int("tabwidth", 4, nil, "tab width");
-	usetabs = Flag.Bool("usetabs", false, nil, "align with tabs instead of blanks");
-	comments = Flag.Bool("comments", false, nil, "enable printing of comments");
+	tabwidth = flag.Int("tabwidth", 4, nil, "tab width");
+	usetabs = flag.Bool("usetabs", true, nil, "align with tabs instead of blanks");
+	comments = flag.Bool("comments", false, nil, "enable printing of comments");
 )


@ -34,25 +34,60 @@ func assert(p bool) {
 // ----------------------------------------------------------------------------
 // Printer

-export type Printer struct {
-	writer IO.Write;
+type Printer struct {
+	// output
+	writer *tabwriter.Writer;
 	
+	// comments
+	comments *array.Array;
+	cindex int;
+	cpos int;
+
 	// formatting control
 	lastpos int;  // pos after last string
 	level int;  // true scope level
 	indent int;  // indentation level
 	semi bool;  // pending ";"
 	newl int;  // pending "\n"'s
-
-	// comments
-	clist *array.Array;
-	cindex int;
-	cpos int;
 }


-func (P *Printer) Printf(fmt string, s ...) {
-	Fmt.fprintf(P.writer, fmt, s);
+func (P *Printer) NextComment() {
+	P.cindex++;
+	if P.comments != nil && P.cindex < P.comments.Len() {
+		P.cpos = P.comments.At(P.cindex).(*AST.Comment).pos;
+	} else {
+		P.cpos = 1<<30;  // infinite
+	}
+}
+
+
+func (P *Printer) Init(writer *tabwriter.Writer, comments *array.Array) {
+	// writer
+	padchar := byte(' ');
+	if usetabs.BVal() {
+		padchar = '\t';
+	}
+	P.writer = tabwriter.New(os.Stdout, int(tabwidth.IVal()), 1, padchar, true);
+
+	// comments
+	P.comments = comments;
+	P.cindex = -1;
+	P.NextComment();
+	
+	// formatting control initialized correctly by default
+}
+
+
+// ----------------------------------------------------------------------------
+// Printing support
+
+func (P *Printer) Printf(format string, s ...) {
+	n, err := fmt.fprintf(P.writer, format, s);
+	if err != nil {
+		panic("print error - exiting");
+	}
+	P.lastpos += n;
 }


@ -60,6 +95,7 @@ func (P *Printer) String(pos int, s string) {
 	if pos == 0 {
 		pos = P.lastpos;  // estimate
 	}
+	P.lastpos = pos;

 	if P.semi && P.level > 0 {  // no semicolons at level 0
 		P.Printf(";");
@ -67,66 +103,78 @@ func (P *Printer) String(pos int, s string) {

 	//print("--", pos, "[", s, "]\n");
 	
+	src_nl := 0;
 	at_line_begin := false;
 	for comments.BVal() && P.cpos < pos {
 		//print("cc", P.cpos, "\n");
 		
-		// we have a comment that comes before s
-		comment := P.clist.At(P.cindex).(*AST.Comment);
-		text := comment.text;
-		assert(len(text) >= 3);  // classification char + "//" or "/*"
+		// we have a comment/newline that comes before s
+		comment := P.comments.At(P.cindex).(*AST.Comment);
+		ctext := comment.text;
 		
-		// classify comment
-		switch comment.tok {
-		case Scanner.COMMENT_BB:
-			// black space before and after comment on the same line
-			// - print surrounded by blanks
-			P.Printf(" %s ", text);
+		if ctext == "\n" {
+			// found a newline in src
+			src_nl++;

-		case Scanner.COMMENT_BW:
-			// only white space after comment on the same line
-			// - put into next cell
-			P.Printf("\t%s", text);
-			
-		case Scanner.COMMENT_WW, Scanner.COMMENT_WB:
-			// only white space before comment on the same line
-			// - indent
-			/*
-			if !P.buf.EmptyLine() {
-				P.buf.Newline();
-			}
-			*/
-			for i := P.indent; i > 0; i-- {
-				P.Printf("\t");
-			}
-			P.Printf("%s", text);
-
-		default:
-			panic("UNREACHABLE");
-		}
-		
-		if text[1] == '/' {
-			// line comments must end in newline
-			// TODO should we set P.newl instead?
-			P.Printf("\n");
-			for i := P.indent; i > 0; i-- {
-				P.Printf("\t");
-			}
-			at_line_begin = true;
-		}
-
-		P.cindex++;
-		if P.cindex < P.clist.Len() {
-			P.cpos = P.clist.At(P.cindex).(*AST.Comment).pos;
 		} else {
-			P.cpos = 1000000000;  // infinite
+			// classify comment
+			assert(len(ctext) >= 3);  // classification char + "//" or "/*"
+			//-style comment
+			if src_nl > 0 || P.cpos == 0 {
+				// only white space before comment on this line
+				// or file starts with comment
+				// - indent
+				P.Printf("\n");
+				for i := P.indent; i > 0; i-- {
+					P.Printf("\t");
+				}
+				P.Printf("%s", ctext);
+			} else {
+				// black space before comment on this line
+				if ctext[1] == '/' {
+					//-style comment
+					// - put in next cell
+					P.Printf("\t%s", ctext);
+				} else {
+					/*-style comment */
+					// - print surrounded by blanks
+					P.Printf(" %s ", ctext);
+				}
+			}
+
+			if ctext[1] == '/' {
+				//-style comments must end in newline
+				if P.newl == 0 {
+					P.newl = 1;
+				}
+				/*
+				// TODO should we set P.newl instead?
+				P.Printf("\n");
+				for i := P.indent; i > 0; i-- {
+					P.Printf("\t");
+				}
+				at_line_begin = true;
+				*/
+			}
+			
+			src_nl = 0;
 		}
+
+		P.NextComment();
 	}

 	if at_line_begin && P.newl > 0 {
 		P.newl--;
 	}
 	
+	if src_nl > P.newl {
+		P.newl = src_nl;
+	}
+
+	if P.newl > 2 {
+		P.newl = 2;
+	}
+
 	if P.newl > 0 {
 		P.Printf("\n");
 		if P.newl > 1 {
@ -141,7 +189,6 @@ func (P *Printer) String(pos int, s string) {

 	P.Printf("%s", s);

-	P.lastpos = pos + len(s);
 	P.semi, P.newl = false, 0;
 }

@ -151,11 +198,6 @@ func (P *Printer) Blank() {
 }


-func (P *Printer) Tab() {
-	P.String(0, "\t");
-}
-
-
 func (P *Printer) Token(pos int, tok int) {
 	P.String(pos, Scanner.TokenString(tok));
 }
@ -225,7 +267,7 @@ func (P *Printer) Fields(list *array.Array) {
 				} else if prev == x.tok {
 					P.String(0, ", ");
 				} else {
-					P.Tab();
+					P.String(0, "\t");
 				}
 			}
 			P.Expr(x);
@ -565,7 +607,7 @@ func (P *Printer) Declaration(d *AST.Decl, parenthesized bool) {
 		}

 		if d.val != nil {
-			P.Tab();
+			P.String(0, "\t");
 			if d.tok != Scanner.IMPORT {
 				P.String(0, "= ");
 			}
@ -603,30 +645,37 @@ func (P *Printer) Declaration(d *AST.Decl, parenthesized bool) {
 // Program

 func (P *Printer) Program(p *AST.Program) {
-	// TODO should initialize all fields?
-	padchar := byte(' ');
-	if usetabs.BVal() {
-		padchar = '\t';
-	}
-	P.writer = TabWriter.New(OS.Stdout, int(tabwidth.IVal()), 1, padchar, true);
-	
-	P.clist = p.comments;
-	P.cindex = 0;
-	if p.comments.Len() > 0 {
-		P.cpos = p.comments.At(0).(*AST.Comment).pos;
-	} else {
-		P.cpos = 1000000000;  // infinite
-	}
-
-	// Print package
 	P.String(p.pos, "package ");
 	P.Expr(p.ident);
 	P.newl = 2;
 	for i := 0; i < p.decls.Len(); i++ {
 		P.Declaration(p.decls.At(i), false);
 	}
-	P.newl = 2;	// TODO we should be able to do this with 1 instead of 2
-				// but we are loosing the last buffer flush in that case
-
-	P.String(0, "");  // flush buffer
+	
+	// end program with '\n'
+	P.newl = 1;
+}
+
+
+// ----------------------------------------------------------------------------
+// External interface
+
+export func Print(prog *AST.Program) {
+	// setup
+	padchar := byte(' ');
+	if usetabs.BVal() {
+		padchar = '\t';
+	}
+	writer := tabwriter.New(os.Stdout, int(tabwidth.IVal()), 1, padchar, true);
+	var P Printer;
+	P.Init(writer, prog.comments);
+
+	P.Program(prog);
+	
+	// flush
+	P.String(0, "");
+	err := P.writer.Flush();
+	if err != nil {
+		panic("print error - exiting");
+	}
 }
--- a/usr/gri/pretty/scanner.go
+++ b/usr/gri/pretty/scanner.go
@ -17,10 +17,7 @@ export const (
 	STRING;
 	EOF;

-	COMMENT_BB;
-	COMMENT_BW;
-	COMMENT_WB;
-	COMMENT_WW;
+	COMMENT;

 	ADD;
 	SUB;
@ -124,10 +121,7 @@ export func TokenString(tok int) string {
 	case STRING: return "STRING";
 	case EOF: return "EOF";

-	case COMMENT_BB: return "COMMENT_BB";
-	case COMMENT_BW: return "COMMENT_BW";
-	case COMMENT_WB: return "COMMENT_WB";
-	case COMMENT_WW: return "COMMENT_WW";
+	case COMMENT: return "COMMENT";

 	case ADD: return "+";
 	case SUB: return "-";
@ -285,10 +279,12 @@ export type ErrorHandler interface {


 export type Scanner struct {
+	// setup
 	err ErrorHandler;
+	src string;  // source
+	scan_comments bool;

 	// scanning
-	src string;  // source
 	pos int;  // current reading position
 	ch int;  // one char look-ahead
 	chpos int;  // position of ch
@ -341,10 +337,11 @@ func (S *Scanner) ExpectNoErrors() {
 }


-func (S *Scanner) Init(err ErrorHandler, src string, testmode bool) {
+func (S *Scanner) Init(err ErrorHandler, src string, scan_comments, testmode bool) {
 	S.err = err;
-	
 	S.src = src;
+	S.scan_comments = scan_comments;
+
 	S.pos = 0;
 	S.linepos = 0;

@ -379,41 +376,43 @@ func (S *Scanner) Expect(ch int) {
 }


-// Returns true if a newline was seen, returns false otherwise.
-func (S *Scanner) SkipWhitespace() bool {
-	sawnl := S.chpos == 0;  // file beginning is always start of a new line
+func (S *Scanner) SkipWhitespace() {
 	for {
 		switch S.ch {
-		case '\t', '\r', ' ':  // nothing to do
-		case '\n': sawnl = true;
-		default: return sawnl;
+		case '\t', '\r', ' ':
+			// nothing to do
+		case '\n':
+			if S.scan_comments {
+				return;
+			}
+		default:
+			return;
 		}
 		S.Next();
 	}
 	panic("UNREACHABLE");
-	return false;
 }


-func (S *Scanner) ScanComment(leading_ws bool) (tok int, val string) {
+func (S *Scanner) ScanComment() string {
 	// first '/' already consumed
 	pos := S.chpos - 1;
 	
 	if S.ch == '/' {
-		// comment
+		//-style comment
 		S.Next();
 		for S.ch >= 0 {
 			S.Next();
 			if S.ch == '\n' {
 				// '\n' terminates comment but we do not include
-				// it in the comment (otherwise we cannot see the
+				// it in the comment (otherwise we don't see the
 				// start of a newline in SkipWhitespace()).
 				goto exit;
 			}
 		}
 		
 	} else {
-		/* comment */
+		/*-style comment */
 		S.Expect('*');
 		for S.ch >= 0 {
 			ch := S.ch;
@ -430,12 +429,6 @@ func (S *Scanner) ScanComment(leading_ws bool) (tok int, val string) {
 exit:
 	comment := S.src[pos : S.chpos];

-	// skip whitespace but stop at line end
-	for S.ch == '\t' || S.ch == '\r' || S.ch == ' ' {
-		S.Next();
-	}
-	trailing_ws := S.ch == '\n';
-
 	if S.testmode {
 		// interpret ERROR and SYNC comments
 		oldpos := -1;
@ -457,21 +450,7 @@ exit:
 		}
 	}

-	if leading_ws {
-		if trailing_ws {
-			tok = COMMENT_WW;
-		} else {
-			tok = COMMENT_WB;
-		}
-	} else {
-		if trailing_ws {
-			tok = COMMENT_BW;
-		} else {
-			tok = COMMENT_BB;
-		}
-	}
-
-	return tok, comment;
+	return comment;
 }


@ -700,7 +679,7 @@ func (S *Scanner) Select4(tok0, tok1, ch2, tok2, tok3 int) int {


 func (S *Scanner) Scan() (pos, tok int, val string) {
-	sawnl := S.SkipWhitespace();
+L:	S.SkipWhitespace();
 	
 	pos, tok = S.chpos, ILLEGAL;
 	
@ -711,6 +690,7 @@ func (S *Scanner) Scan() (pos, tok int, val string) {
 		S.Next();  // always make progress
 		switch ch {
 		case -1: tok = EOF;
+		case '\n': tok, val = COMMENT, "\n";
 		case '"': tok, val = STRING, S.ScanString();
 		case '\'': tok, val = INT, S.ScanChar();
 		case '`': tok, val = STRING, S.ScanRawString();
@ -740,7 +720,10 @@ func (S *Scanner) Scan() (pos, tok int, val string) {
 		case '*': tok = S.Select2(MUL, MUL_ASSIGN);
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
-				tok, val = S.ScanComment(sawnl);
+				tok, val = COMMENT, S.ScanComment();
+				if !S.scan_comments {
+					goto L;
+				}
 			} else {
 				tok = S.Select2(QUO, QUO_ASSIGN);
 			}