html: parse framesets

Pass tests1.dat, test 106:
<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>

| <html>
|   <head>
|   <frameset>
|     <frame>
|     <frameset>
|       <frame>
|     <noframes>

Also pass test 107:
<h1><table><td><h3></table><h3></h1>

R=nigeltao
CC=golang-dev
https://golang.org/cl/5373050
This commit is contained in:
Andrew Balholm 2011-11-10 23:56:13 +11:00 committed by Nigel Tao
parent be8025604e
commit e9e874b7fc
2 changed files with 89 additions and 4 deletions

View File

@ -321,7 +321,7 @@ func (p *parser) resetInsertionMode() insertionMode {
case "body":
return inBodyIM
case "frameset":
// TODO: return inFramesetIM
return inFramesetIM
case "html":
return beforeHeadIM
}
@ -517,7 +517,8 @@ func afterHeadIM(p *parser) (insertionMode, bool) {
attr = p.tok.Attr
framesetOK = false
case "frameset":
// TODO.
p.addElement(p.tok.Data, p.tok.Attr)
return inFramesetIM, true
case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
p.oe = append(p.oe, p.head)
defer p.oe.pop()
@ -646,7 +647,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
break
}
p.popUntil(buttonScopeStopTags, "p")
p.addElement("li", p.tok.Attr)
p.addElement(p.tok.Data, p.tok.Attr)
case "optgroup", "option":
if p.top().Data == "option" {
p.oe.pop()
@ -1169,6 +1170,69 @@ func afterBodyIM(p *parser) (insertionMode, bool) {
return afterBodyIM, true
}
// Section 11.2.5.4.19.
func inFramesetIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
switch p.tok.Data {
case "html":
return useTheRulesFor(p, inFramesetIM, inBodyIM)
case "frameset":
p.addElement(p.tok.Data, p.tok.Attr)
case "frame":
p.addElement(p.tok.Data, p.tok.Attr)
p.oe.pop()
p.acknowledgeSelfClosingTag()
case "noframes":
return useTheRulesFor(p, inFramesetIM, inHeadIM)
}
case EndTagToken:
switch p.tok.Data {
case "frameset":
if p.oe.top().Data != "html" {
p.oe.pop()
if p.oe.top().Data != "frameset" {
return afterFramesetIM, true
}
}
}
default:
// Ignore the token.
}
return inFramesetIM, true
}
// Section 11.2.5.4.20.
func afterFramesetIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
switch p.tok.Data {
case "html":
return useTheRulesFor(p, inFramesetIM, inBodyIM)
case "noframes":
return useTheRulesFor(p, inFramesetIM, inHeadIM)
}
case EndTagToken:
switch p.tok.Data {
case "html":
return afterAfterFramesetIM, true
}
default:
// Ignore the token.
}
return afterFramesetIM, true
}
// Section 11.2.5.4.21.
func afterAfterBodyIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
@ -1191,6 +1255,27 @@ func afterAfterBodyIM(p *parser) (insertionMode, bool) {
return inBodyIM, false
}
// Section 11.2.5.4.22.
func afterAfterFramesetIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
switch p.tok.Data {
case "html":
return useTheRulesFor(p, afterAfterFramesetIM, inBodyIM)
case "noframes":
return useTheRulesFor(p, afterAfterFramesetIM, inHeadIM)
}
default:
// Ignore the token.
}
return afterAfterFramesetIM, true
}
// Parse returns the parse tree for the HTML from the given Reader.
// The input is assumed to be UTF-8 encoded.
func Parse(r io.Reader) (*Node, error) {

View File

@ -133,7 +133,7 @@ func TestParser(t *testing.T) {
n int
}{
// TODO(nigeltao): Process all the test cases from all the .dat files.
{"tests1.dat", 106},
{"tests1.dat", 108},
{"tests2.dat", 0},
{"tests3.dat", 0},
}