First cut at a Go pretty printer:

- code scavenged from Go-in-Go front-end (will merge back)
- using "symbol-table" free parsing to build AST
- no printing yet

R=r
OCL=15504
CL=15504
This commit is contained in:
Robert Griesemer 2008-09-18 16:58:37 -07:00
parent a67258f380
commit 81d7c51837
8 changed files with 2379 additions and 0 deletions

39
usr/gri/pretty/Makefile Normal file
View File

@ -0,0 +1,39 @@
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
G=6g
L=6l
pretty: pretty.6
$(L) -o pretty pretty.6
test: all
pretty *.go
# pretty $(GOROOT)/test/fixedbugs/*.go # some files legally don't compile
pretty $(GOROOT)/test/sieve.go
pretty $(GOROOT)/src/pkg/*.go
pretty $(GOROOT)/src/lib/flag.go
pretty $(GOROOT)/src/lib/fmt.go
pretty $(GOROOT)/src/lib/rand.go
pretty $(GOROOT)/src/lib/math/*.go
pretty $(GOROOT)/src/lib/container/*.go
pretty $(GOROOT)/src/syscall/*.go
pretty base.go decls.go
pretty -token_chan base.go decls.go
echo "PASSED"
install: pretty
cp pretty $(HOME)/bin/pretty
clean:
rm -f pretty *.6 *~
pretty.6: parser.6 printer.6 platform.6 scanner.6
parser.6: ast.6 scanner.6 utils.6
scanner.6: utils.6 platform.6
%.6: %.go
$(G) $(F) $<

39
usr/gri/pretty/ast.go Normal file
View File

@ -0,0 +1,39 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package AST;
export type Expr interface {
pos() int;
print();
}
export type Stat interface {
pos() int;
print();
}
// ---------------------------------------------------------------------
// Concrete nodes
export type Ident struct {
pos_ int;
val_ string;
}
func (p *Ident) pos() int {
return p.pos_;
}
func (p *Ident) print() {
print("x"); // TODO fix this
}
// TODO: complete this

1320
usr/gri/pretty/parser.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,70 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package Platform
import Utils "utils"
// ----------------------------------------------------------------------------
// Environment
export var
GOARCH,
GOOS,
GOROOT,
USER string;
func GetEnv(key string) string {
n := len(key);
for i := 0; i < sys.envc(); i++ {
v := sys.envv(i);
if v[0 : n] == key {
return v[n + 1 : len(v)]; // +1: trim "="
}
}
return "";
}
func init() {
GOARCH = GetEnv("GOARCH");
GOOS = GetEnv("GOOS");
GOROOT = GetEnv("GOROOT");
USER = GetEnv("USER");
}
// ----------------------------------------------------------------------------
// I/O
export const (
MAGIC_obj_file = "@gri-go.7@v0"; // make it clear thar it cannot be a source file
src_file_ext = ".go";
obj_file_ext = ".7";
)
export func ReadObjectFile(filename string) (data string, ok bool) {
data, ok = sys.readfile(filename + obj_file_ext);
magic := MAGIC_obj_file; // TODO remove once len(constant) works
if ok && len(data) >= len(magic) && data[0 : len(magic)] == magic {
return data, ok;
}
return "", false;
}
export func ReadSourceFile(name string) (data string, ok bool) {
name = Utils.TrimExt(name, src_file_ext) + src_file_ext;
data, ok = sys.readfile(name);
return data, ok;
}
export func WriteObjectFile(name string, data string) bool {
name = Utils.TrimExt(Utils.BaseName(name), src_file_ext) + obj_file_ext;
return sys.writefile(name, data);
}

48
usr/gri/pretty/pretty.go Normal file
View File

@ -0,0 +1,48 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import Flag "flag"
import Platform "platform"
import Scanner "scanner"
import AST "ast" // should not be needed
import Parser "parser"
import Printer "printer"
var (
verbose = Flag.Bool("v", false, nil, "verbose mode");
sixg = Flag.Bool("6g", false, nil, "6g compatibility mode");
tokenchan = Flag.Bool("token_chan", false, nil, "use token channel for scanner-parser connection");
)
func main() {
Flag.Parse();
// process files
for i := 0; i < Flag.NArg(); i++ {
src_file := Flag.Arg(i);
src, ok := Platform.ReadSourceFile(src_file);
if !ok {
print("cannot open ", src_file, "\n");
return;
}
scanner := new(Scanner.Scanner);
scanner.Open(src_file, src);
var tstream *<-chan *Scanner.Token;
if tokenchan.BVal() {
tstream = scanner.TokenStream();
}
parser := new(Parser.Parser);
parser.Open(verbose.BVal(), scanner, tstream);
parser.ParseProgram();
}
}

View File

@ -0,0 +1,8 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package Printer;
// TODO Fill in the code to print the AST

792
usr/gri/pretty/scanner.go Normal file
View File

@ -0,0 +1,792 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package Scanner
import Platform "platform"
import Utils "utils"
export const (
ILLEGAL = iota;
EOF;
INT;
FLOAT;
STRING;
COMMA;
COLON;
SEMICOLON;
PERIOD;
LPAREN;
RPAREN;
LBRACK;
RBRACK;
LBRACE;
RBRACE;
ASSIGN;
DEFINE;
INC;
DEC;
NOT;
AND;
OR;
XOR;
ADD;
SUB;
MUL;
QUO;
REM;
EQL;
NEQ;
LSS;
LEQ;
GTR;
GEQ;
SHL;
SHR;
ARROW;
HASH;
ADD_ASSIGN;
SUB_ASSIGN;
MUL_ASSIGN;
QUO_ASSIGN;
REM_ASSIGN;
AND_ASSIGN;
OR_ASSIGN;
XOR_ASSIGN;
SHL_ASSIGN;
SHR_ASSIGN;
LAND;
LOR;
// IDENT must be immediately before keywords
IDENT;
// keywords
KEYWORDS_BEG;
BREAK;
CASE;
CHAN;
CONST;
CONTINUE;
DEFAULT;
ELSE;
EXPORT;
FALLTHROUGH;
FOR;
FUNC;
GO;
GOTO;
IF;
IMPORT;
INTERFACE;
MAP;
PACKAGE;
RANGE;
RETURN;
SELECT;
STRUCT;
SWITCH;
TYPE;
VAR;
KEYWORDS_END;
)
var Keywords *map [string] int;
var VerboseMsgs bool; // error message customization
export func TokenName(tok int) string {
switch (tok) {
case ILLEGAL: return "illegal";
case EOF: return "eof";
case INT: return "int";
case FLOAT: return "float";
case STRING: return "string";
case COMMA: return ",";
case COLON: return ":";
case SEMICOLON: return ";";
case PERIOD: return ".";
case LPAREN: return "(";
case RPAREN: return ")";
case LBRACK: return "[";
case RBRACK: return "]";
case LBRACE: return "LBRACE";
case RBRACE: return "RBRACE";
case ASSIGN: return "=";
case DEFINE: return ":=";
case INC: return "++";
case DEC: return "--";
case NOT: return "!";
case AND: return "&";
case OR: return "|";
case XOR: return "^";
case ADD: return "+";
case SUB: return "-";
case MUL: return "*";
case QUO: return "/";
case REM: return "%";
case EQL: return "==";
case NEQ: return "!=";
case LSS: return "<";
case LEQ: return "<=";
case GTR: return ">";
case GEQ: return ">=";
case SHL: return "<<";
case SHR: return ">>";
case ARROW: return "<-";
case HASH: return "#";
case ADD_ASSIGN: return "+=";
case SUB_ASSIGN: return "-=";
case MUL_ASSIGN: return "+=";
case QUO_ASSIGN: return "/=";
case REM_ASSIGN: return "%=";
case AND_ASSIGN: return "&=";
case OR_ASSIGN: return "|=";
case XOR_ASSIGN: return "^=";
case SHL_ASSIGN: return "<<=";
case SHR_ASSIGN: return ">>=";
case LAND: return "&&";
case LOR: return "||";
case IDENT: return "ident";
case BREAK: return "break";
case CASE: return "case";
case CHAN: return "chan";
case CONST: return "const";
case CONTINUE: return "continue";
case DEFAULT: return "default";
case ELSE: return "else";
case EXPORT: return "export";
case FALLTHROUGH: return "fallthrough";
case FOR: return "for";
case FUNC: return "func";
case GO: return "go";
case GOTO: return "goto";
case IF: return "if";
case IMPORT: return "import";
case INTERFACE: return "interface";
case MAP: return "map";
case PACKAGE: return "package";
case RANGE: return "range";
case RETURN: return "return";
case SELECT: return "select";
case STRUCT: return "struct";
case SWITCH: return "switch";
case TYPE: return "type";
case VAR: return "var";
}
return "???";
}
func init() {
Keywords = new(map [string] int);
for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ {
Keywords[TokenName(i)] = i;
}
// Provide column information in error messages for gri only...
VerboseMsgs = Platform.USER == "gri";
}
func is_whitespace(ch int) bool {
return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
}
func is_letter(ch int) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 128 ;
}
func digit_val(ch int) int {
if '0' <= ch && ch <= '9' {
return ch - '0';
}
if 'a' <= ch && ch <= 'f' {
return ch - 'a' + 10;
}
if 'A' <= ch && ch <= 'F' {
return ch - 'A' + 10;
}
return 16; // larger than any legal digit val
}
export type Scanner struct {
filename string; // error reporting only
nerrors int; // number of errors
errpos int; // last error position
src string; // scanned source
pos int; // current reading position
ch int; // one char look-ahead
chpos int; // position of ch
}
// Read the next Unicode char into S.ch.
// S.ch < 0 means end-of-file.
//
func (S *Scanner) Next() {
const (
Bit1 = 7;
Bitx = 6;
Bit2 = 5;
Bit3 = 4;
Bit4 = 3;
T1 = (1 << (Bit1 + 1) - 1) ^ 0xFF; // 0000 0000
Tx = (1 << (Bitx + 1) - 1) ^ 0xFF; // 1000 0000
T2 = (1 << (Bit2 + 1) - 1) ^ 0xFF; // 1100 0000
T3 = (1 << (Bit3 + 1) - 1) ^ 0xFF; // 1110 0000
T4 = (1 << (Bit4 + 1) - 1) ^ 0xFF; // 1111 0000
Rune1 = 1 << (Bit1 + 0*Bitx) - 1; // 0000 0000 0111 1111
Rune2 = 1 << (Bit2 + 1*Bitx) - 1; // 0000 0111 1111 1111
Rune3 = 1 << (Bit3 + 2*Bitx) - 1; // 1111 1111 1111 1111
Maskx = 0x3F; // 1 << Bitx - 1; // 0011 1111
Testx = 0xC0; // Maskx ^ 0xFF; // 1100 0000
Bad = 0xFFFD; // Runeerror
);
src := S.src;
lim := len(src);
pos := S.pos;
// 1-byte sequence
// 0000-007F => T1
if pos >= lim {
S.ch = -1; // end of file
S.chpos = lim;
return;
}
c0 := int(src[pos]);
pos++;
if c0 < Tx {
S.ch = c0;
S.chpos = S.pos;
S.pos = pos;
return;
}
// 2-byte sequence
// 0080-07FF => T2 Tx
if pos >= lim {
goto bad;
}
c1 := int(src[pos]) ^ Tx;
pos++;
if c1 & Testx != 0 {
goto bad;
}
if c0 < T3 {
if c0 < T2 {
goto bad;
}
r := (c0 << Bitx | c1) & Rune2;
if r <= Rune1 {
goto bad;
}
S.ch = r;
S.chpos = S.pos;
S.pos = pos;
return;
}
// 3-byte sequence
// 0800-FFFF => T3 Tx Tx
if pos >= lim {
goto bad;
}
c2 := int(src[pos]) ^ Tx;
pos++;
if c2 & Testx != 0 {
goto bad;
}
if c0 < T4 {
r := (((c0 << Bitx | c1) << Bitx) | c2) & Rune3;
if r <= Rune2 {
goto bad;
}
S.ch = r;
S.chpos = S.pos;
S.pos = pos;
return;
}
// bad encoding
bad:
S.ch = Bad;
S.chpos = S.pos;
S.pos += 1;
return;
}
// Compute (line, column) information for a given source position.
func (S *Scanner) LineCol(pos int) (line, col int) {
line = 1;
lpos := 0;
src := S.src;
if pos > len(src) {
pos = len(src);
}
for i := 0; i < pos; i++ {
if src[i] == '\n' {
line++;
lpos = i;
}
}
return line, pos - lpos;
}
func (S *Scanner) Error(pos int, msg string) {
const errdist = 10;
delta := pos - S.errpos; // may be negative!
if delta < 0 {
delta = -delta;
}
if delta > errdist || S.nerrors == 0 /* always report first error */ {
print(S.filename);
if pos >= 0 {
// print position
line, col := S.LineCol(pos);
if VerboseMsgs {
print(":", line, ":", col);
} else {
print(":", line);
}
}
print(": ", msg, "\n");
S.nerrors++;
S.errpos = pos;
}
if S.nerrors >= 10 {
sys.exit(1);
}
}
func (S *Scanner) Open(filename, src string) {
S.filename = filename;
S.nerrors = 0;
S.errpos = 0;
S.src = src;
S.pos = 0;
S.Next();
}
func CharString(ch int) string {
s := string(ch);
switch ch {
case '\a': s = `\a`;
case '\b': s = `\b`;
case '\f': s = `\f`;
case '\n': s = `\n`;
case '\r': s = `\r`;
case '\t': s = `\t`;
case '\v': s = `\v`;
case '\\': s = `\\`;
case '\'': s = `\'`;
}
return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")";
}
func (S *Scanner) Expect(ch int) {
if S.ch != ch {
S.Error(S.chpos, "expected " + CharString(ch) + ", found " + CharString(S.ch));
}
S.Next(); // make always progress
}
func (S *Scanner) SkipWhitespace() {
for is_whitespace(S.ch) {
S.Next();
}
}
func (S *Scanner) SkipComment() {
// '/' already consumed
if S.ch == '/' {
// comment
S.Next();
for S.ch != '\n' && S.ch >= 0 {
S.Next();
}
} else {
/* comment */
pos := S.chpos - 1;
S.Expect('*');
for S.ch >= 0 {
ch := S.ch;
S.Next();
if ch == '*' && S.ch == '/' {
S.Next();
return;
}
}
S.Error(pos, "comment not terminated");
}
}
func (S *Scanner) ScanIdentifier() (tok int, val string) {
pos := S.chpos;
for is_letter(S.ch) || digit_val(S.ch) < 10 {
S.Next();
}
val = S.src[pos : S.chpos];
var present bool;
tok, present = Keywords[val];
if !present {
tok = IDENT;
}
return tok, val;
}
func (S *Scanner) ScanMantissa(base int) {
for digit_val(S.ch) < base {
S.Next();
}
}
func (S *Scanner) ScanNumber(seen_decimal_point bool) (tok int, val string) {
pos := S.chpos;
tok = INT;
if seen_decimal_point {
tok = FLOAT;
pos--; // '.' is one byte
S.ScanMantissa(10);
goto exponent;
}
if S.ch == '0' {
// int or float
S.Next();
if S.ch == 'x' || S.ch == 'X' {
// hexadecimal int
S.Next();
S.ScanMantissa(16);
} else {
// octal int or float
S.ScanMantissa(8);
if digit_val(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' {
// float
tok = FLOAT;
goto mantissa;
}
// octal int
}
goto exit;
}
mantissa:
// decimal int or float
S.ScanMantissa(10);
if S.ch == '.' {
// float
tok = FLOAT;
S.Next();
S.ScanMantissa(10)
}
exponent:
if S.ch == 'e' || S.ch == 'E' {
// float
tok = FLOAT;
S.Next();
if S.ch == '-' || S.ch == '+' {
S.Next();
}
S.ScanMantissa(10);
}
exit:
return tok, S.src[pos : S.chpos];
}
func (S *Scanner) ScanDigits(n int, base int) {
for digit_val(S.ch) < base {
S.Next();
n--;
}
if n > 0 {
S.Error(S.chpos, "illegal char escape");
}
}
func (S *Scanner) ScanEscape(quote int) string {
// TODO: fix this routine
ch := S.ch;
pos := S.chpos;
S.Next();
switch (ch) {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
return string(ch);
case '0', '1', '2', '3', '4', '5', '6', '7':
S.ScanDigits(3 - 1, 8); // 1 char already read
return ""; // TODO fix this
case 'x':
S.ScanDigits(2, 16);
return ""; // TODO fix this
case 'u':
S.ScanDigits(4, 16);
return ""; // TODO fix this
case 'U':
S.ScanDigits(8, 16);
return ""; // TODO fix this
default:
// check for quote outside the switch for better generated code (eventually)
if ch == quote {
return string(quote);
}
S.Error(pos, "illegal char escape");
}
return ""; // TODO fix this
}
func (S *Scanner) ScanChar() string {
// '\'' already consumed
pos := S.chpos - 1;
ch := S.ch;
S.Next();
if ch == '\\' {
S.ScanEscape('\'');
}
S.Expect('\'');
return S.src[pos : S.chpos];
}
func (S *Scanner) ScanString() string {
// '"' already consumed
pos := S.chpos - 1;
for S.ch != '"' {
ch := S.ch;
S.Next();
if ch == '\n' || ch < 0 {
S.Error(pos, "string not terminated");
break;
}
if ch == '\\' {
S.ScanEscape('"');
}
}
S.Next();
return S.src[pos : S.chpos];
}
func (S *Scanner) ScanRawString() string {
// '`' already consumed
pos := S.chpos - 1;
for S.ch != '`' {
ch := S.ch;
S.Next();
if ch == '\n' || ch < 0 {
S.Error(pos, "string not terminated");
break;
}
}
S.Next();
return S.src[pos : S.chpos];
}
func (S *Scanner) Select2(tok0, tok1 int) int {
if S.ch == '=' {
S.Next();
return tok1;
}
return tok0;
}
func (S *Scanner) Select3(tok0, tok1, ch2, tok2 int) int {
if S.ch == '=' {
S.Next();
return tok1;
}
if S.ch == ch2 {
S.Next();
return tok2;
}
return tok0;
}
func (S *Scanner) Select4(tok0, tok1, ch2, tok2, tok3 int) int {
if S.ch == '=' {
S.Next();
return tok1;
}
if S.ch == ch2 {
S.Next();
if S.ch == '=' {
S.Next();
return tok3;
}
return tok2;
}
return tok0;
}
func (S *Scanner) Scan() (tok, pos int, val string) {
S.SkipWhitespace();
ch := S.ch;
tok = ILLEGAL;
pos = S.chpos;
switch {
case is_letter(ch): tok, val = S.ScanIdentifier();
case digit_val(ch) < 10: tok, val = S.ScanNumber(false);
default:
S.Next(); // always make progress
switch ch {
case -1: tok = EOF;
case '"': tok, val = STRING, S.ScanString();
case '\'': tok, val = INT, S.ScanChar();
case '`': tok, val = STRING, S.ScanRawString();
case ':': tok = S.Select2(COLON, DEFINE);
case '.':
if digit_val(S.ch) < 10 {
tok, val = S.ScanNumber(true);
} else {
tok = PERIOD;
}
case ',': tok = COMMA;
case ';': tok = SEMICOLON;
case '(': tok = LPAREN;
case ')': tok = RPAREN;
case '[': tok = LBRACK;
case ']': tok = RBRACK;
case '{': tok = LBRACE;
case '}': tok = RBRACE;
case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
case '*': tok = S.Select2(MUL, MUL_ASSIGN);
case '/':
if S.ch == '/' || S.ch == '*' {
S.SkipComment();
// cannot simply return because of 6g bug
tok, pos, val = S.Scan();
return tok, pos, val;
}
tok = S.Select2(QUO, QUO_ASSIGN);
case '%': tok = S.Select2(REM, REM_ASSIGN);
case '^': tok = S.Select2(XOR, XOR_ASSIGN);
case '<':
if S.ch == '-' {
S.Next();
tok = ARROW;
} else {
tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
}
case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
case '=': tok = S.Select2(ASSIGN, EQL);
case '!': tok = S.Select2(NOT, NEQ);
case '&': tok = S.Select3(AND, AND_ASSIGN, '&', LAND);
case '|': tok = S.Select3(OR, OR_ASSIGN, '|', LOR);
case '#': tok = HASH;
default:
S.Error(pos, "illegal character " + CharString(ch));
tok = ILLEGAL;
}
}
return tok, pos, val;
}
export type Token struct {
pos int;
tok int;
val string;
}
func (S *Scanner) TokenStream() *<-chan *Token {
ch := new(chan *Token);
go func(S *Scanner, ch *chan <- *Token) {
for {
t := new(Token);
t.tok, t.pos, t.val = S.Scan();
ch <- t;
if t.tok == EOF {
break;
}
}
}(S, ch);
return ch;
}

63
usr/gri/pretty/utils.go Normal file
View File

@ -0,0 +1,63 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package Utils
export func BaseName(s string) string {
// TODO this is not correct for non-ASCII strings!
i := len(s) - 1;
for i >= 0 && s[i] != '/' {
if s[i] > 128 {
panic("non-ASCII string");
}
i--;
}
return s[i + 1 : len(s)];
}
export func Contains(s, sub string, pos int) bool {
end := pos + len(sub);
return pos >= 0 && end <= len(s) && s[pos : end] == sub;
}
export func TrimExt(s, ext string) string {
i := len(s) - len(ext);
if i >= 0 && s[i : len(s)] == ext {
s = s[0 : i];
}
return s;
}
export func IntToString(x, base int) string {
x0 := x;
if x < 0 {
x = -x;
if x < 0 {
panic("smallest int not handled");
}
} else if x == 0 {
return "0";
}
// x > 0
hex := "0123456789ABCDEF";
var buf [32] byte;
i := len(buf);
for x > 0 {
i--;
buf[i] = hex[x % base];
x /= base;
}
if x0 < 0 {
i--;
buf[i] = '-';
}
return string(buf)[i : len(buf)];
}