exp/template/html: Reworked escapeText to recognize attr boundaries.

The following testcases now pass:

`<a href=x` tests that we do not error on partial unquoted attrs.
`<a href=x ` tests that spaces do end unquoted attrs on spaces.
`<a href=''` tests that we recognize the end of single quoted attrs.
`<a href=""` tests that we recognize the end of double quoted attrs.

R=golang-dev, nigeltao
CC=golang-dev
https://golang.org/cl/4932051
This commit is contained in:
Mike Samuel 2011-08-25 11:24:43 +10:00 committed by Nigel Tao
parent 15580526de
commit 42a56d3e81
2 changed files with 144 additions and 51 deletions

View File

@ -23,6 +23,9 @@ func Escape(t *template.Template) (*template.Template, os.Error) {
if c.errStr != "" { if c.errStr != "" {
return nil, fmt.Errorf("%s:%d: %s", t.Name(), c.errLine, c.errStr) return nil, fmt.Errorf("%s:%d: %s", t.Name(), c.errLine, c.errStr)
} }
if c.state != stateText {
return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c)
}
return t, nil return t, nil
} }
@ -38,7 +41,7 @@ func escape(c context, n parse.Node) context {
case *parse.RangeNode: case *parse.RangeNode:
return escapeBranch(c, &n.BranchNode, "range") return escapeBranch(c, &n.BranchNode, "range")
case *parse.TextNode: case *parse.TextNode:
return escapeText(c, n) return escapeText(c, n.Text)
case *parse.WithNode: case *parse.WithNode:
return escapeBranch(c, &n.BranchNode, "with") return escapeBranch(c, &n.BranchNode, "with")
} }
@ -91,11 +94,19 @@ func join(a, b context, line int, nodeName string) context {
// escapeBranch escapes a branch template node: "if", "range" and "with". // escapeBranch escapes a branch template node: "if", "range" and "with".
func escapeBranch(c context, n *parse.BranchNode, nodeName string) context { func escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
c0 := escapeList(c, n.List) c0 := escapeList(c, n.List)
if nodeName == "range" { if nodeName == "range" && c0.state != stateError {
// The "true" branch of a "range" node can execute multiple times. // The "true" branch of a "range" node can execute multiple times.
// We check that executing n.List once results in the same context // We check that executing n.List once results in the same context
// as executing n.List twice. // as executing n.List twice.
c0 = join(c0, escapeList(c0, n.List), n.Line, nodeName) c0 = join(c0, escapeList(c0, n.List), n.Line, nodeName)
if c0.state == stateError {
// Make clear that this is a problem on loop re-entry
// since developers tend to overlook that branch when
// debugging templates.
c0.errLine = n.Line
c0.errStr = "on range loop re-entry: " + c0.errStr
return c0
}
} }
c1 := escapeList(c, n.ElseList) c1 := escapeList(c, n.ElseList)
return join(c0, c1, n.Line, nodeName) return join(c0, c1, n.Line, nodeName)
@ -112,10 +123,40 @@ func escapeList(c context, n *parse.ListNode) context {
return c return c
} }
// delimEnds maps each delim to a string of characters that terminate it.
var delimEnds = [...]string{
delimDoubleQuote: `"`,
delimSingleQuote: "'",
// Determined empirically by running the below in various browsers.
// var div = document.createElement("DIV");
// for (var i = 0; i < 0x10000; ++i) {
// div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
// if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
// document.write("<p>U+" + i.toString(16));
// }
delimSpaceOrTagEnd: " \t\n\f\r>",
}
// escapeText escapes a text template node. // escapeText escapes a text template node.
func escapeText(c context, n *parse.TextNode) context { func escapeText(c context, s []byte) context {
for s := n.Text; len(s) > 0; { for len(s) > 0 {
if c.delim == delimNone {
c, s = transitionFunc[c.state](c, s) c, s = transitionFunc[c.state](c, s)
continue
}
i := bytes.IndexAny(s, delimEnds[c.delim])
if i == -1 {
// Remain inside the attribute.
// TODO: Recurse to take into account grammars for
// JS, CSS, URIs embedded in attrs once implemented.
return c
}
if c.delim != delimSpaceOrTagEnd {
// Consume any quote.
i++
}
c, s = context{state: stateTag}, s[i:]
} }
return c return c
} }
@ -157,15 +198,15 @@ func tText(c context, s []byte) (context, []byte) {
// tTag is the context transition function for the tag state. // tTag is the context transition function for the tag state.
func tTag(c context, s []byte) (context, []byte) { func tTag(c context, s []byte) (context, []byte) {
// Skip to the end tag, if there is one. // Find the attribute name.
i := bytes.IndexByte(s, '>') attrStart := eatWhiteSpace(s, 0)
if i != -1 { i, err := eatAttrName(s, attrStart)
return context{state: stateText}, s[i+1:] if err != nil {
return context{
state: stateError,
errStr: err.String(),
}, nil
} }
// Otherwise, find the attribute name.
i = eatWhiteSpace(s, 0)
attrStart, i := i, eatAttrName(s, i)
if i == len(s) { if i == len(s) {
return context{state: stateTag}, nil return context{state: stateTag}, nil
} }
@ -174,37 +215,44 @@ func tTag(c context, s []byte) (context, []byte) {
state = stateURL state = stateURL
} }
// Consume the "=". // Look for the start of the value.
i = eatWhiteSpace(s, i) i = eatWhiteSpace(s, i)
if i == len(s) || s[i] != '=' { if i == len(s) {
return context{state: stateTag}, s[i:] return context{state: stateTag}, s[i:]
} }
if s[i] == '>' {
return context{state: stateText}, s[i+1:]
} else if s[i] != '=' {
// Possible due to a valueless attribute or '/' in "<input />".
return context{state: stateTag}, s[i:]
}
// Consume the "=".
i = eatWhiteSpace(s, i+1) i = eatWhiteSpace(s, i+1)
// Find the delimiter. // Find the attribute delimiter.
if i == len(s) { if i < len(s) {
return context{state: state, delim: delimSpaceOrTagEnd}, nil
}
switch s[i] { switch s[i] {
case '\'': case '\'':
return context{state: state, delim: delimSingleQuote}, s[i+1:] return context{state: state, delim: delimSingleQuote}, s[i+1:]
case '"': case '"':
return context{state: state, delim: delimDoubleQuote}, s[i+1:] return context{state: state, delim: delimDoubleQuote}, s[i+1:]
} }
}
// TODO: This shouldn't be an error: `<a b=1 c={{.X}}` should be valid. return context{state: state, delim: delimSpaceOrTagEnd}, s[i:]
return context{state: stateError}, nil
} }
// tAttr is the context transition function for the attribute state. // tAttr is the context transition function for the attribute state.
func tAttr(c context, s []byte) (context, []byte) { func tAttr(c context, s []byte) (context, []byte) {
// TODO: look for the delimiter.
return c, nil return c, nil
} }
// tURL is the context transition function for the URL state. // tURL is the context transition function for the URL state.
func tURL(c context, s []byte) (context, []byte) { func tURL(c context, s []byte) (context, []byte) {
// TODO: look for the delimiter. // TODO: Look for query and fragment boundaries within a URL so we
// can %-encode actions in the query and fragment parts, HTML escape
// actions elsewhere, and filter any actions at the start that might
// inject a dangerous protocol such as "javascript:".
return c, nil return c, nil
} }
@ -214,16 +262,24 @@ func tError(c context, s []byte) (context, []byte) {
} }
// eatAttrName returns the largest j such that s[i:j] is an attribute name. // eatAttrName returns the largest j such that s[i:j] is an attribute name.
func eatAttrName(s []byte, i int) int { // It returns an error if s[i:] does not look like it begins with an
// attribute name, such as encountering a quote mark without a preceding
// equals sign.
func eatAttrName(s []byte, i int) (int, os.Error) {
for j := i; j < len(s); j++ { for j := i; j < len(s); j++ {
switch s[j] { switch s[j] {
case ' ', '\n', '\r', '\t', '=': case ' ', '\t', '\n', '\f', '\r', '=', '>':
return j return j, nil
case '\'', '"', '<':
// These result in a parse warning in HTML5 and are
// indicative of serious problems if seen in an attr
// name in a template.
return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
default: default:
// No-op. // No-op.
} }
} }
return len(s) return len(s), nil
} }
// eatTagName returns the largest j such that s[i:j] is a tag name. // eatTagName returns the largest j such that s[i:j] is a tag name.
@ -248,7 +304,7 @@ func eatTagName(s []byte, i int) int {
func eatWhiteSpace(s []byte, i int) int { func eatWhiteSpace(s []byte, i int) int {
for j := i; j < len(s); j++ { for j := i; j < len(s); j++ {
switch s[j] { switch s[j] {
case ' ', '\n', '\r', '\t': case ' ', '\t', '\n', '\f', '\r':
// No-op. // No-op.
default: default:
return j return j

View File

@ -8,7 +8,6 @@ import (
"bytes" "bytes"
"strings" "strings"
"template" "template"
"template/parse"
"testing" "testing"
) )
@ -87,6 +86,11 @@ func TestEscape(t *testing.T) {
`<a href="{{"'a<b'"}}">`, `<a href="{{"'a<b'"}}">`,
`<a href="'a%3Cb'">`, `<a href="'a%3Cb'">`,
}, },
{
"multipleAttrs",
"<a b=1 c={{.H}}>",
"<a b=1 c=&lt;Hello&gt;>",
},
} }
for _, tc := range testCases { for _, tc := range testCases {
@ -147,15 +151,11 @@ func TestErrors(t *testing.T) {
"{{if .Cond}}\n{{else}}\n<a{{end}}", "{{if .Cond}}\n{{else}}\n<a{{end}}",
"z:1: {{if}} branches", "z:1: {{if}} branches",
}, },
/*
TODO: Should the error really be non-empty? Both branches close the tag...
// Missing quote in the else branch.
{ {
// Missing quote in the else branch.
`{{if .Cond}}<a href="foo">{{else}}<a href="bar>{{end}}`, `{{if .Cond}}<a href="foo">{{else}}<a href="bar>{{end}}`,
"z:1: {{if}} branches", "z:1: {{if}} branches",
}, },
*/
{ {
// Different kind of attribute: href implies a URL. // Different kind of attribute: href implies a URL.
"<a {{if .Cond}}href='{{else}}title='{{end}}{{.X}}'>", "<a {{if .Cond}}href='{{else}}title='{{end}}{{.X}}'>",
@ -171,11 +171,15 @@ func TestErrors(t *testing.T) {
}, },
{ {
"{{range .Items}}<a{{end}}", "{{range .Items}}<a{{end}}",
"z:1: {{range}} branches", `z:1: on range loop re-entry: "<" in attribute name: "<a"`,
}, },
{ {
"\n{{range .Items}} x='<a{{end}}", "\n{{range .Items}} x='<a{{end}}",
"z:2: {{range}} branches", "z:2: on range loop re-entry: {{range}} branches",
},
{
"<a b=1 c={{.H}}",
"z ends in a non-text context: {stateAttr delimSpaceOrTagEnd",
}, },
} }
@ -236,14 +240,38 @@ func TestEscapeText(t *testing.T) {
`<a href=`, `<a href=`,
context{state: stateURL, delim: delimSpaceOrTagEnd}, context{state: stateURL, delim: delimSpaceOrTagEnd},
}, },
{
`<a href=x`,
context{state: stateURL, delim: delimSpaceOrTagEnd},
},
{
`<a href=x `,
context{state: stateTag},
},
{
`<a href=>`,
context{state: stateText},
},
{
`<a href=x>`,
context{state: stateText},
},
{ {
`<a href ='`, `<a href ='`,
context{state: stateURL, delim: delimSingleQuote}, context{state: stateURL, delim: delimSingleQuote},
}, },
{
`<a href=''`,
context{state: stateTag},
},
{ {
`<a href= "`, `<a href= "`,
context{state: stateURL, delim: delimDoubleQuote}, context{state: stateURL, delim: delimDoubleQuote},
}, },
{
`<a href=""`,
context{state: stateTag},
},
{ {
`<a title="`, `<a title="`,
context{state: stateAttr, delim: delimDoubleQuote}, context{state: stateAttr, delim: delimDoubleQuote},
@ -256,20 +284,29 @@ func TestEscapeText(t *testing.T) {
`<a Href='/`, `<a Href='/`,
context{state: stateURL, delim: delimSingleQuote}, context{state: stateURL, delim: delimSingleQuote},
}, },
{
`<a href='"`,
context{state: stateURL, delim: delimSingleQuote},
},
{
`<a href="'`,
context{state: stateURL, delim: delimDoubleQuote},
},
{
`<input checked type="checkbox"`,
context{state: stateTag},
},
} }
for _, tc := range testCases { for _, tc := range testCases {
n := &parse.TextNode{ b := []byte(tc.input)
NodeType: parse.NodeText, c := escapeText(context{}, b)
Text: []byte(tc.input),
}
c := escapeText(context{}, n)
if !tc.output.eq(c) { if !tc.output.eq(c) {
t.Errorf("input %q: want context %v got %v", tc.input, tc.output, c) t.Errorf("input %q: want context %v got %v", tc.input, tc.output, c)
continue continue
} }
if tc.input != string(n.Text) { if tc.input != string(b) {
t.Errorf("input %q: text node was modified: want %q got %q", tc.input, tc.input, n.Text) t.Errorf("input %q: text node was modified: want %q got %q", tc.input, tc.input, b)
continue continue
} }
} }