diff --git a/src/pkg/mime/mediatype.go b/src/pkg/mime/mediatype.go index e9e649f950..9f8d2050e1 100644 --- a/src/pkg/mime/mediatype.go +++ b/src/pkg/mime/mediatype.go @@ -6,6 +6,8 @@ package mime import ( "bytes" + "fmt" + "os" "strings" "unicode" ) @@ -46,11 +48,16 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) { params = make(map[string]string) + // Map of base parameter name -> parameter name -> value + // for parameters containing a '*' character. + // Lazily initialized. + var continuation map[string]map[string]string + v = v[i:] for len(v) > 0 { v = strings.TrimLeftFunc(v, unicode.IsSpace) if len(v) == 0 { - return + break } key, value, rest := consumeMediaParam(v) if key == "" { @@ -62,12 +69,83 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) { // Parse error. return "", nil } - params[key] = value + + pmap := params + if idx := strings.Index(key, "*"); idx != -1 { + baseName := key[:idx] + if continuation == nil { + continuation = make(map[string]map[string]string) + } + var ok bool + if pmap, ok = continuation[baseName]; !ok { + continuation[baseName] = make(map[string]string) + pmap = continuation[baseName] + } + } + if _, exists := pmap[key]; exists { + // Duplicate parameter name is bogus. + return "", nil + } + pmap[key] = value v = rest } + + // Stitch together any continuations or things with stars + // (i.e. RFC 2231 things with stars: "foo*0" or "foo*") + var buf bytes.Buffer + for key, pieceMap := range continuation { + singlePartKey := key + "*" + if v, ok := pieceMap[singlePartKey]; ok { + decv := decode2231Enc(v) + params[key] = decv + continue + } + + buf.Reset() + valid := false + for n := 0; ; n++ { + simplePart := fmt.Sprintf("%s*%d", key, n) + if v, ok := pieceMap[simplePart]; ok { + valid = true + buf.WriteString(v) + continue + } + encodedPart := simplePart + "*" + if v, ok := pieceMap[encodedPart]; ok { + valid = true + if n == 0 { + buf.WriteString(decode2231Enc(v)) + } else { + decv, _ := percentHexUnescape(v) + buf.WriteString(decv) + } + } else { + break + } + } + if valid { + params[key] = buf.String() + } + } + return } +func decode2231Enc(v string) string { + sv := strings.Split(v, "'", 3) + if len(sv) != 3 { + return "" + } + // Ignoring lang in sv[1] for now. + charset := strings.ToLower(sv[0]) + if charset != "us-ascii" && charset != "utf-8" { + // TODO: unsupported encoding + return "" + } + encv, _ := percentHexUnescape(sv[2]) + return encv +} + func isNotTokenChar(rune int) bool { return !IsTokenChar(rune) } @@ -107,17 +185,14 @@ func consumeValue(v string) (value, rest string) { for idx, rune = range rest { switch { case nextIsLiteral: - if rune >= 0x80 { - return "", v - } buffer.WriteRune(rune) nextIsLiteral = false case rune == leadQuote: return buffer.String(), rest[idx+1:] - case IsQText(rune): - buffer.WriteRune(rune) case rune == '\\': nextIsLiteral = true + case rune != '\r' && rune != '\n': + buffer.WriteRune(rune) default: return "", v } @@ -137,6 +212,7 @@ func consumeMediaParam(v string) (param, value, rest string) { if param == "" { return "", "", v } + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) if !strings.HasPrefix(rest, "=") { return "", "", v @@ -149,3 +225,66 @@ func consumeMediaParam(v string) (param, value, rest string) { } return param, value, rest } + +func percentHexUnescape(s string) (string, os.Error) { + // Count %, check that they're well-formed. + percents := 0 + for i := 0; i < len(s); { + if s[i] != '%' { + i++ + continue + } + percents++ + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + s = s[i:] + if len(s) > 3 { + s = s[0:3] + } + return "", fmt.Errorf("Bogus characters after %: %q", s) + } + i += 3 + } + if percents == 0 { + return s, nil + } + + t := make([]byte, len(s)-2*percents) + j := 0 + for i := 0; i < len(s); { + switch s[i] { + case '%': + t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) + j++ + i += 3 + default: + t[j] = s[i] + j++ + i++ + } + } + return string(t), nil +} + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +func unhex(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 0 +} diff --git a/src/pkg/mime/mediatype_test.go b/src/pkg/mime/mediatype_test.go index f960315957..454ddd0377 100644 --- a/src/pkg/mime/mediatype_test.go +++ b/src/pkg/mime/mediatype_test.go @@ -114,6 +114,28 @@ func TestParseMediaType(t *testing.T) { "form-data", m("key", "value", "blah", "value", "name", "foo")}, + {`foo; key=val1; key=the-key-appears-again-which-is-bogus`, + "", m()}, + + // From RFC 2231: + {`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`, + "application/x-stuff", + m("title", "This is ***fun***")}, + + {`message/external-body; access-type=URL; ` + + `URL*0="ftp://";` + + `URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`, + "message/external-body", + m("access-type", "URL", + "URL", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar")}, + + {`application/x-stuff; ` + + `title*0*=us-ascii'en'This%20is%20even%20more%20; ` + + `title*1*=%2A%2A%2Afun%2A%2A%2A%20; ` + + `title*2="isn't it!"`, + "application/x-stuff", + m("title", "This is even more ***fun*** isn't it!")}, + // Tests from http://greenbytes.de/tech/tc2231/ // TODO(bradfitz): add the rest of the tests from that site. {`attachment; filename="f\oo.html"`, @@ -159,8 +181,41 @@ func TestParseMediaType(t *testing.T) { "attachment", m("creation-date", "Wed, 12 Feb 1997 16:29:51 -0500")}, {`foobar`, "foobar", m()}, - // TODO(bradfitz): rest of them, including RFC2231 encoded UTF-8 and - // other charsets. + {`attachment; filename* =UTF-8''foo-%c3%a4.html`, + "attachment", + m("filename", "foo-ä.html")}, + {`attachment; filename*=UTF-8''A-%2541.html`, + "attachment", + m("filename", "A-%41.html")}, + {`attachment; filename*0="foo."; filename*1="html"`, + "attachment", + m("filename", "foo.html")}, + {`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`, + "attachment", + m("filename", "foo-ä.html")}, + {`attachment; filename*0="foo"; filename*01="bar"`, + "attachment", + m("filename", "foo")}, + {`attachment; filename*0="foo"; filename*2="bar"`, + "attachment", + m("filename", "foo")}, + {`attachment; filename*1="foo"; filename*2="bar"`, + "attachment", m()}, + {`attachment; filename*1="bar"; filename*0="foo"`, + "attachment", + m("filename", "foobar")}, + {`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`, + "attachment", + m("filename", "foo-ä.html")}, + {`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`, + "attachment", + m("filename", "foo-ä.html")}, + + // Browsers also just send UTF-8 directly without RFC 2231, + // at least when the source page is served with UTF-8. + {`form-data; firstname="Брэд"; lastname="Фицпатрик"`, + "form-data", + m("firstname", "Брэд", "lastname", "Фицпатрик")}, } for _, test := range tests { mt, params := ParseMediaType(test.in)