go/usr/gri/pretty/format.go
Robert Griesemer 787dd4d3f3 semi-weekly snapshot:
- format.go implementation complete
- lots of documentation added (not quite complete)

TBR=r
OCL=29130
CL=29130
2009-05-20 17:09:59 -07:00

968 lines
23 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/* The format package implements syntax-directed, type-driven formatting
of arbitrary data structures. Formatting a data structure consists of
two phases: first, a format specification is parsed (once per format)
which results in a "compiled" format. The format can then be used
repeatedly to print arbitrary values to a io.Writer.
A format specification consists of a set of named format rules in EBNF.
The rule names correspond to the type names of the data structure to be
formatted. Each format rule consists of literal values and struct field
names which are combined into sequences, alternatives, grouped, optional,
repeated, or indented sub-expressions. Additionally, format rules may be
specified via Go formatter functions.
When formatting a value, its type name determines the format rule. The
syntax of the rule or the corresponding formatter function determines
if and how the value is formatted. A format rule may refer to a struct
field of the current value. In this case the same mechanism is applied
recursively to that field.
*/
package format
import (
"container/vector";
"flag";
"fmt";
"go/scanner";
"go/token";
"io";
"os";
"reflect";
"runtime";
"strconv";
"strings";
)
// ----------------------------------------------------------------------------
// Format representation
// Custom formatters implement the Formatter function type.
// A formatter is invoked with a writer w, an environment env
// (provided to format.Fprint and simply passed through), the
// value to format, and the rule name under which the formatter
// was installed (the same formatter function may be installed
// under different names).
//
type Formatter func(w io.Writer, env, value interface{}, rule_name string) bool
// A FormatterMap is a set of custom formatters.
// It maps a rule name to a formatter.
//
type FormatterMap map [string] Formatter;
// A production expression is built from the following nodes.
//
type (
expr interface {};
alternatives []expr; // x | y | z
sequence []expr; // x y z
// a literal is represented as string or []byte
field struct {
field_name string; // including "^", "*"
rule_name string; // "" if no rule name specified
};
indentation struct {
indent, body expr; // >> indent body <<
};
option struct {
body expr; // [body]
};
repetition struct {
body, div expr; // {body / div}
};
custom struct {
rule_name string;
form Formatter
};
)
/* The syntax of a format specification is presented in the same EBNF
notation as used in the Go language spec. The syntax of white space,
comments, identifiers, and string literals is the same as in Go.
A format specification consists of a possibly empty set of package
declarations and format rules:
Format = [ Entry { ";" Entry } ] [ ";" ] .
Entry = PackageDecl | FormatRule .
A package declaration binds a package name (such as 'ast') to a
package import path (such as '"go/ast"'). A package name must be
declared at most once.
PackageDecl = PackageName ImportPath .
PackageName = identifier .
ImportPath = string .
A format rule binds a rule name to a format expression. A rule name
may be a type name or one of the special names 'default' (denoting
the default rule) or '/' (denoting the global "divider" rule - see
below). A type name may be the name of a predeclared type ('int',
'float32', etc.), the name of an anonymous composite type ('array',
'pointer', etc.), or the name of a user-defined type qualified by
the corresponding package name (for instance 'ast.MapType'). The
package name must have been declared already. A rule name must be
declared at most once.
FormatRule = RuleName "=" Expression .
RuleName = TypeName | "default" | "/" .
TypeName = [ PackageName "." ] identifier .
A format expression specifies how a value is to be formatted. In its
most general form, a format expression is a set of alternatives separated
by "|". Each alternative and the entire expression may be empty.
Expression = [ Sequence ] { "|" [ Sequence ] } .
Sequence = Operand { Operand } .
Operand = Literal | Field | Indentation | Group | Option | Repetition .
Literal = string .
Field = FieldName [ ":" RuleName ] .
FieldName = identifier | "^" | "*" .
Indent = ">>" Operand Expression "<<" .
Group = "(" Expression ")" .
Option = "[" Expression "]" .
Repetition = "{" Expression [ "/" Expression ] "}" .
TODO complete this comment
*/
type Format map [string] expr;
// ----------------------------------------------------------------------------
// Error handling
// Error describes an individual error. The position Pos, if valid,
// indicates the format source position the error relates to. The
// error is specified with the Msg string.
//
type Error struct {
Pos token.Position;
Msg string;
}
// Error implements the os.Error interface.
func (e *Error) String() string {
pos := "";
if e.Pos.IsValid() {
pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column);
}
return pos + e.Msg;
}
// Multiple parser errors are returned as an ErrorList.
type ErrorList []*Error
// ErrorList implements the SortInterface.
func (p ErrorList) Len() int { return len(p); }
func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; }
func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; }
// ErrorList implements the os.Error interface.
func (p ErrorList) String() string {
switch len(p) {
case 0: return "unspecified error";
case 1: return p[0].String();
}
return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
}
// ----------------------------------------------------------------------------
// Parsing
type parser struct {
errors vector.Vector;
scanner scanner.Scanner;
pos token.Position; // token position
tok token.Token; // one token look-ahead
lit []byte; // token literal
packs map [string] string; // PackageName -> ImportPath
rules Format; // RuleName -> Expression
}
// The parser implements scanner.Error.
func (p *parser) Error(pos token.Position, msg string) {
// Don't collect errors that are on the same line as the previous error
// in the hope to reduce the number of spurious errors due to incorrect
// parser synchronization.
if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line {
p.errors.Push(&Error{pos, msg});
}
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan();
}
func (p *parser) error_expected(pos token.Position, msg string) {
msg = "expected " + msg;
if pos.Offset == p.pos.Offset {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'";
if p.tok.IsLiteral() {
msg += " " + string(p.lit);
}
}
p.Error(pos, msg);
}
func (p *parser) expect(tok token.Token) token.Position {
pos := p.pos;
if p.tok != tok {
p.error_expected(pos, "'" + tok.String() + "'");
}
p.next(); // make progress in any case
return pos;
}
func (p *parser) parseIdentifier() string {
name := string(p.lit);
p.expect(token.IDENT);
return name;
}
func (p *parser) parseTypeName() (string, bool) {
pos := p.pos;
name, is_ident := p.parseIdentifier(), true;
if p.tok == token.PERIOD {
// got a package name, lookup package
if import_path, found := p.packs[name]; found {
name = import_path;
} else {
p.Error(pos, "package not declared: " + name);
}
p.next();
name, is_ident = name + "." + p.parseIdentifier(), false;
}
return name, is_ident;
}
// Parses a rule name and returns it. If the rule name is
// a package-qualified type name, the package name is resolved.
// The 2nd result value is true iff the rule name consists of a
// single identifier only (and thus could be a package name).
//
func (p *parser) parseRuleName() (string, bool) {
name, is_ident := "", false;
switch p.tok {
case token.IDENT:
name, is_ident = p.parseTypeName();
case token.DEFAULT:
name = "default";
p.next();
case token.QUO:
name = "/";
p.next();
default:
p.error_expected(p.pos, "rule name");
p.next(); // make progress in any case
}
return name, is_ident;
}
func asLiteral(x interface{}) expr {
s := x.(string);
if len(s) > 0 && s[0] == '%' {
// literals containing format characters are represented as strings
return s;
}
// all other literals are represented as []byte for faster writing
return io.StringBytes(s);
}
func (p *parser) parseLiteral() expr {
if p.tok != token.STRING {
p.expect(token.STRING);
return "";
}
s, err := strconv.Unquote(string(p.lit));
if err != nil {
panic("scanner error");
}
p.next();
// A string literal may contain newline characters and %-format specifiers.
// To simplify and speed up printing of the literal, split it into segments
// that start with "\n" or "%" (but noy "%%"), possibly followed by a last
// segment that starts with some other character. If there is more than one
// such segment, return a sequence of "simple" literals, otherwise just
// return the string.
// split string
var list vector.Vector;
list.Init(0);
i0 := 0;
for i := 0; i < len(s); i++ {
switch s[i] {
case '\n':
// next segment starts with '\n'
case '%':
if i+1 >= len(s) || s[i+1] == '%' {
i++;
continue; // "%%" is not a format-%
}
// next segment starts with '%'
default:
// all other cases do not split the string
continue;
}
// split off the current segment
if i0 < i {
list.Push(s[i0 : i]);
i0 = i;
}
}
// the final segment may start with any character
// (it is empty iff the string is empty)
list.Push(s[i0 : len(s)]);
// no need for a sequence there is only one segment
if list.Len() == 1 {
return asLiteral(list.At(0));
}
// convert list into a sequence
seq := make(sequence, list.Len());
for i := 0; i < list.Len(); i++ {
seq[i] = asLiteral(list.At(i));
}
return seq;
}
func (p *parser) parseField() expr {
var fname string;
switch p.tok {
case token.XOR:
fname = "^";
p.next();
case token.MUL:
fname = "*";
p.next();
case token.IDENT:
// TODO(gri) could use reflect.ExpandType() to lookup a field
// at parse-time - would provide "compile-time" errors and
// faster printing.
fname = p.parseIdentifier();
default:
return nil;
}
var rule_name string;
if p.tok == token.COLON {
p.next();
var _ bool;
rule_name, _ = p.parseRuleName();
}
return &field{fname, rule_name};
}
func (p *parser) parseExpression() expr
func (p *parser) parseOperand() (x expr) {
switch p.tok {
case token.STRING:
x = p.parseLiteral();
case token.SHR:
p.next();
x = &indentation{p.parseOperand(), p.parseExpression()};
p.expect(token.SHL);
case token.LPAREN:
p.next();
x = p.parseExpression();
p.expect(token.RPAREN);
case token.LBRACK:
p.next();
x = &option{p.parseExpression()};
p.expect(token.RBRACK);
case token.LBRACE:
p.next();
x = p.parseExpression();
var div expr;
if p.tok == token.QUO {
p.next();
div = p.parseExpression();
}
x = &repetition{x, div};
p.expect(token.RBRACE);
default:
x = p.parseField(); // may be nil
}
return x;
}
func (p *parser) parseSequence() expr {
var list vector.Vector;
list.Init(0);
for x := p.parseOperand(); x != nil; x = p.parseOperand() {
list.Push(x);
}
// no need for a sequence if list.Len() < 2
switch list.Len() {
case 0: return nil;
case 1: return list.At(0).(expr);
}
// convert list into a sequence
seq := make(sequence, list.Len());
for i := 0; i < list.Len(); i++ {
seq[i] = list.At(i).(expr);
}
return seq;
}
func (p *parser) parseExpression() expr {
var list vector.Vector;
list.Init(0);
for {
x := p.parseSequence();
if x != nil {
list.Push(x);
}
if p.tok != token.OR {
break;
}
p.next();
}
// no need for an alternatives if list.Len() < 2
switch list.Len() {
case 0: return nil;
case 1: return list.At(0).(expr);
}
// convert list into a alternatives
alt := make(alternatives, list.Len());
for i := 0; i < list.Len(); i++ {
alt[i] = list.At(i).(expr);
}
return alt;
}
func (p *parser) parseFormat() {
for p.tok != token.EOF {
pos := p.pos;
name, is_ident := p.parseRuleName();
switch p.tok {
case token.STRING:
// package declaration
import_path, err := strconv.Unquote(string(p.lit));
if err != nil {
panic("scanner error");
}
p.next();
// add package declaration
if !is_ident {
p.Error(pos, "illegal package name: " + name);
} else if _, found := p.packs[name]; !found {
p.packs[name] = import_path;
} else {
p.Error(pos, "package already declared: " + name);
}
case token.ASSIGN:
// format rule
p.next();
x := p.parseExpression();
// add rule
if _, found := p.rules[name]; !found {
p.rules[name] = x;
} else {
p.Error(pos, "format rule already declared: " + name);
}
default:
p.error_expected(p.pos, "package declaration or format rule");
p.next(); // make progress in any case
}
if p.tok == token.SEMICOLON {
p.next();
} else {
break;
}
}
p.expect(token.EOF);
}
func (p *parser) remap(pos token.Position, name string) string {
i := strings.Index(name, ".");
if i >= 0 {
package_name := name[0 : i];
type_name := name[i : len(name)];
// lookup package
if import_path, found := p.packs[package_name]; found {
name = import_path + "." + type_name;
} else {
p.Error(pos, "package not declared: " + package_name);
}
}
return name;
}
// Parse parses a set of format productions from source src. If there are no
// errors, the result is a Format and the error is nil. Otherwise the format
// is nil and a non-empty ErrorList is returned.
//
func Parse(src []byte, fmap FormatterMap) (Format, os.Error) {
// parse source
var p parser;
p.errors.Init(0);
p.scanner.Init(src, &p, false);
p.next();
p.packs = make(map [string] string);
p.rules = make(Format);
p.parseFormat();
// add custom formatters, if any
var invalidPos token.Position;
for name, form := range fmap {
name = p.remap(invalidPos, name);
if t, found := p.rules[name]; !found {
p.rules[name] = &custom{name, form};
} else {
var invalidPos token.Position;
p.Error(invalidPos, "formatter already declared: " + name);
}
}
// convert errors list, if any
if p.errors.Len() > 0 {
errors := make(ErrorList, p.errors.Len());
for i := 0; i < p.errors.Len(); i++ {
errors[i] = p.errors.At(i).(*Error);
}
return nil, errors;
}
return p.rules, nil;
}
// ----------------------------------------------------------------------------
// Formatting
// The current formatting state.
type state struct {
f Format; // the format used
env interface{}; // the user-supplied environment, simply passed through
def expr; // the default rule, if any
div expr; // the global divider rule, if any
writediv bool; // true if the divider needs to be written
errors chan os.Error; // not chan *Error: errors <- nil would be wrong!
indent io.ByteBuffer; // the current indentation
}
func (ps *state) init(f Format, env interface{}, errors chan os.Error) {
ps.f = f;
ps.env = env;
// if we have a default ("default") rule, cache it for fast access
if def, has_def := f["default"]; has_def {
ps.def = def;
}
// if we have a divider ("/") rule, cache it for fast access
if div, has_div := f["/"]; has_div {
ps.div = div;
}
ps.errors = errors;
}
func (ps *state) error(msg string) {
ps.errors <- os.NewError(msg);
runtime.Goexit();
}
// Get a field value given a field name. Returns the field value and
// the "embedding level" at which it was found. The embedding level
// is 0 for top-level fields in a struct.
//
func getField(val reflect.Value, fieldname string) (reflect.Value, int) {
// do we have a struct in the first place?
if val.Kind() != reflect.StructKind {
return nil, 0;
}
sval, styp := val.(reflect.StructValue), val.Type().(reflect.StructType);
// look for field at the top level
for i := 0; i < styp.Len(); i++ {
name, typ, tag, offset := styp.Field(i);
if name == fieldname || name == "" && strings.HasSuffix(typ.Name(), "." + fieldname) /* anonymous field */ {
return sval.Field(i), 0;
}
}
// look for field in anonymous fields
var field reflect.Value;
level := 1000; // infinity (no struct has that many levels)
for i := 0; i < styp.Len(); i++ {
name, typ, tag, offset := styp.Field(i);
if name == "" {
f, l := getField(sval.Field(i), fieldname);
// keep the most shallow field
if f != nil && l < level {
field, level = f, l;
}
}
}
return field, level + 1;
}
var default_names = map[int]string {
reflect.ArrayKind: "array",
reflect.BoolKind: "bool",
reflect.ChanKind: "chan",
reflect.DotDotDotKind: "ellipsis",
reflect.FloatKind: "float",
reflect.Float32Kind: "float32",
reflect.Float64Kind: "float64",
reflect.FuncKind: "func",
reflect.IntKind: "int",
reflect.Int16Kind: "int16",
reflect.Int32Kind: "int32",
reflect.Int64Kind: "int64",
reflect.Int8Kind: "int8",
reflect.InterfaceKind: "interface",
reflect.MapKind: "map",
reflect.PtrKind: "pointer",
reflect.StringKind: "string",
reflect.StructKind: "struct",
reflect.UintKind: "uint",
reflect.Uint16Kind: "uint16",
reflect.Uint32Kind: "uint32",
reflect.Uint64Kind: "uint64",
reflect.Uint8Kind: "uint8",
reflect.UintptrKind: "uintptr",
}
func typename(value reflect.Value) string {
name := value.Type().Name();
if name == "" {
if default_name, found := default_names[value.Kind()]; found {
name = default_name;
}
}
return name;
}
func (ps *state) getFormat(name string) expr {
if fexpr, found := ps.f[name]; found {
return fexpr;
}
if ps.def != nil {
return ps.def;
}
ps.error(fmt.Sprintf("no production for type: '%s'\n", name));
return nil;
}
func (ps *state) printf(w io.Writer, fexpr expr, value reflect.Value, index int) bool
func (ps *state) printDiv(w io.Writer, value reflect.Value) {
if ps.div != nil && ps.writediv {
div := ps.div;
ps.div = nil;
ps.printf(w, div, value, 0);
ps.div = div;
}
ps.writediv = true;
}
func (ps *state) writeIndented(w io.Writer, s []byte) {
// write indent after each '\n'
i0 := 0;
for i := 0; i < len(s); i++ {
if s[i] == '\n' {
w.Write(s[i0 : i+1]);
w.Write(ps.indent.Data());
i0 = i+1;
}
}
w.Write(s[i0 : len(s)]);
}
// TODO complete this comment
// Returns true if a non-empty field value was found.
func (ps *state) printf(w io.Writer, fexpr expr, value reflect.Value, index int) bool {
if fexpr == nil {
return true;
}
switch t := fexpr.(type) {
case alternatives:
// - write first non-empty alternative
// - result is not empty iff there is an non-empty alternative
for _, x := range t {
var buf io.ByteBuffer;
if ps.printf(&buf, x, value, 0) {
w.Write(buf.Data());
return true;
}
}
return false;
case sequence:
// - write every element of the sequence
// - result is not empty iff no element was empty
b := true;
for _, x := range t {
b = ps.printf(w, x, value, index) && b;
}
return b;
case []byte:
// write literal, may start with "\n"
ps.printDiv(w, value);
if len(t) > 0 && t[0] == '\n' && ps.indent.Len() > 0 {
// newline must be followed by indentation
w.Write([]byte{'\n'});
w.Write(ps.indent.Data());
t = t[1 : len(t)];
}
w.Write(t);
return true;
case string:
// write format literal with value, starts with "%" (but not "%%")
ps.printDiv(w, value);
fmt.Fprintf(w, t, value.Interface());
return true;
case *field:
// - write the contents of the field
// - format is either the field format or the type-specific format
// - result is not empty iff the field is not empty
switch t.field_name {
case "^":
// identity - value doesn't change
case "*":
// indirect
switch v := value.(type) {
case reflect.ArrayValue:
if v.Len() <= index {
return false;
}
value = v.Elem(index);
case reflect.MapValue:
ps.error("reflection support for maps incomplete\n");
case reflect.PtrValue:
if v.Get() == nil {
return false;
}
value = v.Sub();
case reflect.InterfaceValue:
if v.Get() == nil {
return false;
}
value = v.Value();
default:
ps.error(fmt.Sprintf("error: * does not apply to `%s`\n", value.Type().Name()));
}
default:
// field
field, _ := getField(value, t.field_name);
if field == nil {
ps.error(fmt.Sprintf("error: no field `%s` in `%s`\n", t.field_name, value.Type().Name()));
}
value = field;
}
// field-specific rule name
rule_name := t.rule_name;
if rule_name == "" {
rule_name = typename(value)
}
fexpr = ps.getFormat(rule_name);
return ps.printf(w, fexpr, value, index);
case *indentation:
// - write the body within the given indentation
// - the result is not empty iff the body is not empty
saved_len := ps.indent.Len();
ps.printf(&ps.indent, t.indent, value, index); // add additional indentation
b := ps.printf(w, t.body, value, index);
ps.indent.Truncate(saved_len); // reset indentation
return b;
case *option:
// - write body if it is not empty
// - the result is always not empty
var buf io.ByteBuffer;
if ps.printf(&buf, t.body, value, 0) {
w.Write(buf.Data());
}
return true;
case *repetition:
// - write body until as long as it is not empty
// - the result is always not empty
var buf io.ByteBuffer;
for i := 0; ps.printf(&buf, t.body, value, i); i++ {
if i > 0 {
ps.printf(w, t.div, value, i);
}
w.Write(buf.Data());
buf.Reset();
}
return true;
case *custom:
// - invoke custom formatter
var buf io.ByteBuffer;
if t.form(&buf, ps.env, value.Interface(), t.rule_name) {
ps.writeIndented(w, buf.Data());
return true;
}
return false;
}
panic("unreachable");
return false;
}
// Sandbox to wrap a writer.
// Counts total number of bytes written and handles write errors.
//
type sandbox struct {
writer io.Writer;
written int;
errors chan os.Error;
}
// Write data to the sandboxed writer. If an error occurs, Write
// doesn't return. Instead it reports the error to the errors
// channel and exits the current goroutine.
//
func (s *sandbox) Write(data []byte) (int, os.Error) {
n, err := s.writer.Write(data);
s.written += n;
if err != nil {
s.errors <- err;
runtime.Goexit();
}
return n, nil;
}
// Fprint formats each argument according to the format f
// and writes to w. The result is the total number of bytes
// written and an os.Error, if any.
//
func (f Format) Fprint(w io.Writer, env interface{}, args ...) (int, os.Error) {
errors := make(chan os.Error);
sw := sandbox{w, 0, errors};
var ps state;
ps.init(f, env, errors);
go func() {
value := reflect.NewValue(args).(reflect.StructValue);
for i := 0; i < value.Len(); i++ {
fld := value.Field(i);
ps.printf(&sw, ps.getFormat(typename(fld)), fld, 0);
}
errors <- nil; // no errors
}();
return sw.written, <-errors;
}
// Print formats each argument according to the format f
// and writes to standard output. The result is the total
// number of bytes written and an os.Error, if any.
//
func (f Format) Print(args ...) (int, os.Error) {
return f.Fprint(os.Stdout, nil, args);
}
// Sprint formats each argument according to the format f
// and returns the resulting string. If an error occurs
// during formatting, the result contains the respective
// error message at the end.
//
func (f Format) Sprint(args ...) string {
var buf io.ByteBuffer;
n, err := f.Fprint(&buf, nil, args);
if err != nil {
fmt.Fprintf(&buf, "--- Sprint(%v) failed: %v", args, err);
}
return string(buf.Data());
}