html: parse framesets

Pass tests1.dat, test 106: <frameset><frame><frameset><frame></frameset><noframes></noframes></frameset> | <html> | <head> | <frameset> | <frame> | <frameset> | <frame> | <noframes> Also pass test 107: <h1><table><td><h3></table><h3></h1> R=nigeltao CC=golang-dev https://golang.org/cl/5373050
2024-09-30 14:57:10 +00:00 · 2011-11-10 23:56:13 +11:00 · 2011-11-10 23:56:13 +11:00 · e9e874b7fc
commit e9e874b7fc
parent be8025604e
2 changed files with 89 additions and 4 deletions
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@ -321,7 +321,7 @@ func (p *parser) resetInsertionMode() insertionMode {
 		case "body":
 			return inBodyIM
 		case "frameset":
-			// TODO: return inFramesetIM
+			return inFramesetIM
 		case "html":
 			return beforeHeadIM
 		}
@ -517,7 +517,8 @@ func afterHeadIM(p *parser) (insertionMode, bool) {
 			attr = p.tok.Attr
 			framesetOK = false
 		case "frameset":
-			// TODO.
+			p.addElement(p.tok.Data, p.tok.Attr)
+			return inFramesetIM, true
 		case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
 			p.oe = append(p.oe, p.head)
 			defer p.oe.pop()
@ -646,7 +647,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
 				break
 			}
 			p.popUntil(buttonScopeStopTags, "p")
-			p.addElement("li", p.tok.Attr)
+			p.addElement(p.tok.Data, p.tok.Attr)
 		case "optgroup", "option":
 			if p.top().Data == "option" {
 				p.oe.pop()
@ -1169,6 +1170,69 @@ func afterBodyIM(p *parser) (insertionMode, bool) {
 	return afterBodyIM, true
 }

+// Section 11.2.5.4.19.
+func inFramesetIM(p *parser) (insertionMode, bool) {
+	switch p.tok.Type {
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case StartTagToken:
+		switch p.tok.Data {
+		case "html":
+			return useTheRulesFor(p, inFramesetIM, inBodyIM)
+		case "frameset":
+			p.addElement(p.tok.Data, p.tok.Attr)
+		case "frame":
+			p.addElement(p.tok.Data, p.tok.Attr)
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+		case "noframes":
+			return useTheRulesFor(p, inFramesetIM, inHeadIM)
+		}
+	case EndTagToken:
+		switch p.tok.Data {
+		case "frameset":
+			if p.oe.top().Data != "html" {
+				p.oe.pop()
+				if p.oe.top().Data != "frameset" {
+					return afterFramesetIM, true
+				}
+			}
+		}
+	default:
+		// Ignore the token.
+	}
+	return inFramesetIM, true
+}
+
+// Section 11.2.5.4.20.
+func afterFramesetIM(p *parser) (insertionMode, bool) {
+	switch p.tok.Type {
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case StartTagToken:
+		switch p.tok.Data {
+		case "html":
+			return useTheRulesFor(p, inFramesetIM, inBodyIM)
+		case "noframes":
+			return useTheRulesFor(p, inFramesetIM, inHeadIM)
+		}
+	case EndTagToken:
+		switch p.tok.Data {
+		case "html":
+			return afterAfterFramesetIM, true
+		}
+	default:
+		// Ignore the token.
+	}
+	return afterFramesetIM, true
+}
+
 // Section 11.2.5.4.21.
 func afterAfterBodyIM(p *parser) (insertionMode, bool) {
 	switch p.tok.Type {
@ -1191,6 +1255,27 @@ func afterAfterBodyIM(p *parser) (insertionMode, bool) {
 	return inBodyIM, false
 }

+// Section 11.2.5.4.22.
+func afterAfterFramesetIM(p *parser) (insertionMode, bool) {
+	switch p.tok.Type {
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case StartTagToken:
+		switch p.tok.Data {
+		case "html":
+			return useTheRulesFor(p, afterAfterFramesetIM, inBodyIM)
+		case "noframes":
+			return useTheRulesFor(p, afterAfterFramesetIM, inHeadIM)
+		}
+	default:
+		// Ignore the token.
+	}
+	return afterAfterFramesetIM, true
+}
+
 // Parse returns the parse tree for the HTML from the given Reader.
 // The input is assumed to be UTF-8 encoded.
 func Parse(r io.Reader) (*Node, error) {
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@ -133,7 +133,7 @@ func TestParser(t *testing.T) {
 		n int
 	}{
 		// TODO(nigeltao): Process all the test cases from all the .dat files.
-		{"tests1.dat", 106},
+		{"tests1.dat", 108},
 		{"tests2.dat", 0},
 		{"tests3.dat", 0},
 	}