mirror of
https://github.com/golang/go.git
synced 2024-09-30 06:47:04 +00:00
exp/template/html: simplify transition functions
This simplifies transition functions to make it easier to reliably elide comments in a later CL. Before: - transition functions are responsible for detecting special end tags. After: - the code to detect special end tags is done in one place. We were relying on end tags being skipped which meant we were not noticing comments inside script/style elements that contain no substitutions. This change means we will notice all such comments where necessary, but stripTags will notice none since it does not need to. This speeds up stripTags. R=nigeltao CC=golang-dev https://golang.org/cl/5074041
This commit is contained in:
parent
9169c27eaa
commit
1f577d26d7
@ -175,6 +175,15 @@ func isComment(s state) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// isInTag return whether s occurs solely inside an HTML tag.
|
||||
func isInTag(s state) bool {
|
||||
switch s {
|
||||
case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// delim is the delimiter that will end the current HTML attribute.
|
||||
type delim uint8
|
||||
|
||||
|
@ -583,7 +583,14 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
|
||||
// s, then returns the context after those tokens and the unprocessed suffix.
|
||||
func contextAfterText(c context, s []byte) (context, int) {
|
||||
if c.delim == delimNone {
|
||||
return transitionFunc[c.state](c, s)
|
||||
c1, i := tSpecialTagEnd(c, s)
|
||||
if i == 0 {
|
||||
// A special end tag (`</script>`) has been seen and
|
||||
// all content preceding it has been consumed.
|
||||
return c1, 0
|
||||
}
|
||||
// Consider all content up to any end tag.
|
||||
return transitionFunc[c.state](c, s[:i])
|
||||
}
|
||||
|
||||
i := bytes.IndexAny(s, delimEnds[c.delim])
|
||||
|
@ -814,7 +814,7 @@ func TestErrors(t *testing.T) {
|
||||
},
|
||||
{
|
||||
`<a onclick='alert(/x+\`,
|
||||
`unfinished escape sequence in JS regexp: "x+\\"`,
|
||||
`unfinished escape sequence in JS string: "x+\\"`,
|
||||
},
|
||||
{
|
||||
`<a onclick="/foo[\]/`,
|
||||
|
@ -165,12 +165,17 @@ func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
|
||||
// For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `.
|
||||
func stripTags(html string) string {
|
||||
var b bytes.Buffer
|
||||
s, c, i := []byte(html), context{}, 0
|
||||
s, c, i, allText := []byte(html), context{}, 0, true
|
||||
// Using the transition funcs helps us avoid mangling
|
||||
// `<div title="1>2">` or `I <3 Ponies!`.
|
||||
for i != len(s) {
|
||||
if c.delim == delimNone {
|
||||
d, nread := transitionFunc[c.state](c, s[i:])
|
||||
st := c.state
|
||||
// Use RCDATA instead of parsing into JS or CSS styles.
|
||||
if c.element != elementNone && !isInTag(st) {
|
||||
st = stateRCDATA
|
||||
}
|
||||
d, nread := transitionFunc[st](c, s[i:])
|
||||
i1 := i + nread
|
||||
if c.state == stateText || c.state == stateRCDATA {
|
||||
// Emit text up to the start of the tag or comment.
|
||||
@ -184,6 +189,8 @@ func stripTags(html string) string {
|
||||
}
|
||||
}
|
||||
b.Write(s[i:j])
|
||||
} else {
|
||||
allText = false
|
||||
}
|
||||
c, i = d, i1
|
||||
continue
|
||||
@ -198,10 +205,9 @@ func stripTags(html string) string {
|
||||
}
|
||||
c, i = context{state: stateTag, element: c.element}, i1
|
||||
}
|
||||
if c.state == stateText {
|
||||
if b.Len() == 0 {
|
||||
return html
|
||||
}
|
||||
if allText {
|
||||
return html
|
||||
} else if c.state == stateText || c.state == stateRCDATA {
|
||||
b.Write(s[i:])
|
||||
}
|
||||
return b.String()
|
||||
|
@ -59,6 +59,7 @@ func TestStripTags(t *testing.T) {
|
||||
{`Foo<script type="text/javascript">alert(1337)</script>Bar`, "FooBar"},
|
||||
{`Foo<div title="1>2">Bar`, "FooBar"},
|
||||
{`I <3 Ponies!`, `I <3 Ponies!`},
|
||||
{`<script>foo()</script>`, ``},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -27,9 +27,9 @@ var transitionFunc = [...]func(context, []byte) (context, int){
|
||||
stateAttr: tAttr,
|
||||
stateURL: tURL,
|
||||
stateJS: tJS,
|
||||
stateJSDqStr: tJSStr,
|
||||
stateJSSqStr: tJSStr,
|
||||
stateJSRegexp: tJSRegexp,
|
||||
stateJSDqStr: tJSDelimited,
|
||||
stateJSSqStr: tJSDelimited,
|
||||
stateJSRegexp: tJSDelimited,
|
||||
stateJSBlockCmt: tBlockCmt,
|
||||
stateJSLineCmt: tLineCmt,
|
||||
stateCSS: tCSS,
|
||||
@ -57,14 +57,18 @@ func tText(c context, s []byte) (context, int) {
|
||||
return context{state: stateHTMLCmt}, i + 4
|
||||
}
|
||||
i++
|
||||
end := false
|
||||
if s[i] == '/' {
|
||||
if i+1 == len(s) {
|
||||
return c, len(s)
|
||||
}
|
||||
i++
|
||||
end, i = true, i+1
|
||||
}
|
||||
j, e := eatTagName(s, i)
|
||||
if j != i {
|
||||
if end {
|
||||
e = elementNone
|
||||
}
|
||||
// We've found an HTML tag.
|
||||
return context{state: stateTag, element: e}, j
|
||||
}
|
||||
@ -122,10 +126,9 @@ func tAttrName(c context, s []byte) (context, int) {
|
||||
i, err := eatAttrName(s, 0)
|
||||
if err != nil {
|
||||
return context{state: stateError, err: err}, len(s)
|
||||
} else if i == len(s) {
|
||||
return c, len(s)
|
||||
} else if i != len(s) {
|
||||
c.state = stateAfterName
|
||||
}
|
||||
c.state = stateAfterName
|
||||
return c, i
|
||||
}
|
||||
|
||||
@ -172,8 +175,7 @@ func tBeforeValue(c context, s []byte) (context, int) {
|
||||
|
||||
// tHTMLCmt is the context transition function for stateHTMLCmt.
|
||||
func tHTMLCmt(c context, s []byte) (context, int) {
|
||||
i := bytes.Index(s, commentEnd)
|
||||
if i != -1 {
|
||||
if i := bytes.Index(s, commentEnd); i != -1 {
|
||||
return context{}, i + 3
|
||||
}
|
||||
return c, len(s)
|
||||
@ -192,10 +194,8 @@ var specialTagEndMarkers = [...]string{
|
||||
// element states.
|
||||
func tSpecialTagEnd(c context, s []byte) (context, int) {
|
||||
if c.element != elementNone {
|
||||
end := specialTagEndMarkers[c.element]
|
||||
i := strings.Index(strings.ToLower(string(s)), end)
|
||||
if i != -1 {
|
||||
return context{state: stateTag}, i + len(end)
|
||||
if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
|
||||
return context{}, i
|
||||
}
|
||||
}
|
||||
return c, len(s)
|
||||
@ -220,10 +220,6 @@ func tURL(c context, s []byte) (context, int) {
|
||||
|
||||
// tJS is the context transition function for the JS state.
|
||||
func tJS(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
}
|
||||
|
||||
i := bytes.IndexAny(s, `"'/`)
|
||||
if i == -1 {
|
||||
// Entire input is non string, comment, regexp tokens.
|
||||
@ -258,24 +254,25 @@ func tJS(c context, s []byte) (context, int) {
|
||||
return c, i + 1
|
||||
}
|
||||
|
||||
// tJSStr is the context transition function for the JS string states.
|
||||
func tJSStr(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
// tJSDelimited is the context transition function for the JS string and regexp
|
||||
// states.
|
||||
func tJSDelimited(c context, s []byte) (context, int) {
|
||||
specials := `\"`
|
||||
switch c.state {
|
||||
case stateJSSqStr:
|
||||
specials = `\'`
|
||||
case stateJSRegexp:
|
||||
specials = `\/[]`
|
||||
}
|
||||
|
||||
quoteAndEsc := `\"`
|
||||
if c.state == stateJSSqStr {
|
||||
quoteAndEsc = `\'`
|
||||
}
|
||||
|
||||
k := 0
|
||||
k, inCharset := 0, false
|
||||
for {
|
||||
i := k + bytes.IndexAny(s[k:], quoteAndEsc)
|
||||
i := k + bytes.IndexAny(s[k:], specials)
|
||||
if i < k {
|
||||
return c, len(s)
|
||||
break
|
||||
}
|
||||
if s[i] == '\\' {
|
||||
switch s[i] {
|
||||
case '\\':
|
||||
i++
|
||||
if i == len(s) {
|
||||
return context{
|
||||
@ -283,47 +280,16 @@ func tJSStr(c context, s []byte) (context, int) {
|
||||
err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
|
||||
}, len(s)
|
||||
}
|
||||
} else {
|
||||
c.state, c.jsCtx = stateJS, jsCtxDivOp
|
||||
return c, i + 1
|
||||
}
|
||||
k = i + 1
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// tJSRegexp is the context transition function for the /RegExp/ literal state.
|
||||
func tJSRegexp(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
}
|
||||
|
||||
k, inCharset := 0, false
|
||||
for {
|
||||
i := k + bytes.IndexAny(s[k:], `\/[]`)
|
||||
if i < k {
|
||||
break
|
||||
}
|
||||
switch s[i] {
|
||||
case '/':
|
||||
if !inCharset {
|
||||
c.state, c.jsCtx = stateJS, jsCtxDivOp
|
||||
return c, i + 1
|
||||
}
|
||||
case '\\':
|
||||
i++
|
||||
if i == len(s) {
|
||||
return context{
|
||||
state: stateError,
|
||||
err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS regexp: %q", s),
|
||||
}, len(s)
|
||||
}
|
||||
case '[':
|
||||
inCharset = true
|
||||
case ']':
|
||||
inCharset = false
|
||||
default:
|
||||
panic("unreachable")
|
||||
// end delimiter
|
||||
if !inCharset {
|
||||
c.state, c.jsCtx = stateJS, jsCtxDivOp
|
||||
return c, i + 1
|
||||
}
|
||||
}
|
||||
k = i + 1
|
||||
}
|
||||
@ -344,9 +310,6 @@ var blockCommentEnd = []byte("*/")
|
||||
|
||||
// tBlockCmt is the context transition function for /*comment*/ states.
|
||||
func tBlockCmt(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
}
|
||||
i := bytes.Index(s, blockCommentEnd)
|
||||
if i == -1 {
|
||||
return c, len(s)
|
||||
@ -364,9 +327,6 @@ func tBlockCmt(c context, s []byte) (context, int) {
|
||||
|
||||
// tLineCmt is the context transition function for //comment states.
|
||||
func tLineCmt(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
}
|
||||
var lineTerminators string
|
||||
var endState state
|
||||
switch c.state {
|
||||
@ -400,10 +360,6 @@ func tLineCmt(c context, s []byte) (context, int) {
|
||||
|
||||
// tCSS is the context transition function for the CSS state.
|
||||
func tCSS(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
}
|
||||
|
||||
// CSS quoted strings are almost never used except for:
|
||||
// (1) URLs as in background: "/foo.png"
|
||||
// (2) Multiword font-names as in font-family: "Times New Roman"
|
||||
@ -478,10 +434,6 @@ func tCSS(c context, s []byte) (context, int) {
|
||||
|
||||
// tCSSStr is the context transition function for the CSS string and URL states.
|
||||
func tCSSStr(c context, s []byte) (context, int) {
|
||||
if d, i := tSpecialTagEnd(c, s); i != len(s) {
|
||||
return d, i
|
||||
}
|
||||
|
||||
var endAndEsc string
|
||||
switch c.state {
|
||||
case stateCSSDqStr, stateCSSDqURL:
|
||||
|
Loading…
Reference in New Issue
Block a user