mirror of
https://github.com/golang/go.git
synced 2024-10-02 07:47:11 +00:00
Fixes to URL functionality:
- Extend http.URLUnescape to convert '+' to space - Add http.URLEscape - Rename URL.Query to EncodedQuery (and stop decoding it, as decoding this field before separating key/value pairs loses important information) - Report a clean error on incomplete hex escapes - Update existing tests, add new ones APPROVED=rsc DELTA=293 (256 added, 3 deleted, 34 changed) OCL=29685 CL=29759
This commit is contained in:
parent
e11f833bed
commit
da0a582564
@ -41,37 +41,60 @@ func unhex(c byte) byte {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Return true if the specified character should be escaped when appearing in a
|
||||
// URL string.
|
||||
//
|
||||
// TODO: for now, this is a hack; it only flags a few common characters that have
|
||||
// special meaning in URLs. That will get the job done in the common cases.
|
||||
func shouldEscape(c byte) bool {
|
||||
switch c {
|
||||
case ' ', '?', '&', '=', '#', '+', '%':
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// URLUnescape unescapes a URL-encoded string,
|
||||
// converting %AB into the byte 0xAB.
|
||||
// converting %AB into the byte 0xAB and '+' into ' ' (space).
|
||||
// It returns a BadURL error if any % is not followed
|
||||
// by two hexadecimal digits.
|
||||
func URLUnescape(s string) (string, os.Error) {
|
||||
// Count %, check that they're well-formed.
|
||||
n := 0;
|
||||
anyPlusses := false;
|
||||
for i := 0; i < len(s); {
|
||||
if s[i] == '%' {
|
||||
switch s[i] {
|
||||
case '%':
|
||||
n++;
|
||||
if !ishex(s[i+1]) || !ishex(s[i+2]) {
|
||||
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
||||
return "", BadURL{"invalid hexadecimal escape"}
|
||||
}
|
||||
i += 3
|
||||
} else {
|
||||
i += 3;
|
||||
case '+':
|
||||
anyPlusses = true;
|
||||
i++;
|
||||
default:
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
if n == 0 && !anyPlusses {
|
||||
return s, nil
|
||||
}
|
||||
|
||||
t := make([]byte, len(s)-2*n);
|
||||
j := 0;
|
||||
for i := 0; i < len(s); {
|
||||
if s[i] == '%' {
|
||||
switch s[i] {
|
||||
case '%':
|
||||
t[j] = unhex(s[i+1]) << 4 | unhex(s[i+2]);
|
||||
j++;
|
||||
i += 3;
|
||||
} else {
|
||||
case '+':
|
||||
t[j] = ' ';
|
||||
j++;
|
||||
i++;
|
||||
default:
|
||||
t[j] = s[i];
|
||||
j++;
|
||||
i++;
|
||||
@ -80,9 +103,53 @@ func URLUnescape(s string) (string, os.Error) {
|
||||
return string(t), nil;
|
||||
}
|
||||
|
||||
// URLEscape converts a string into URL-encoded form.
|
||||
func URLEscape(s string) string {
|
||||
spaceCount, hexCount := 0, 0;
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i];
|
||||
if (shouldEscape(c)) {
|
||||
if (c == ' ') {
|
||||
spaceCount++;
|
||||
} else {
|
||||
hexCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if spaceCount == 0 && hexCount == 0 {
|
||||
return s;
|
||||
}
|
||||
|
||||
t := make([]byte, len(s)+2*hexCount);
|
||||
j := 0;
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i];
|
||||
if !shouldEscape(c) {
|
||||
t[j] = s[i];
|
||||
j++;
|
||||
} else if (c == ' ') {
|
||||
t[j] = '+';
|
||||
j++;
|
||||
} else {
|
||||
t[j] = '%';
|
||||
t[j+1] = "0123456789abcdef"[c>>4];
|
||||
t[j+2] = "0123456789abcdef"[c&15];
|
||||
j += 3;
|
||||
}
|
||||
}
|
||||
return string(t);
|
||||
}
|
||||
|
||||
// A URL represents a parsed URL (technically, a URI reference).
|
||||
// The general form represented is:
|
||||
// scheme://[userinfo@]host/path[?query][#fragment]
|
||||
// The Raw, RawPath, and RawQuery fields are in "wire format" (special
|
||||
// characters must be hex-escaped if not meant to have special meaning).
|
||||
// All other fields are logical values; '+' or '%' represent themselves.
|
||||
//
|
||||
// Note, the reason for using wire format for the query is that it needs
|
||||
// to be split into key/value pairs before decoding.
|
||||
type URL struct {
|
||||
Raw string; // the original string
|
||||
Scheme string; // scheme
|
||||
@ -91,7 +158,7 @@ type URL struct {
|
||||
Userinfo string; // userinfo
|
||||
Host string; // host
|
||||
Path string; // /path
|
||||
Query string; // query
|
||||
RawQuery string; // query
|
||||
Fragment string; // fragment
|
||||
}
|
||||
|
||||
@ -156,10 +223,7 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
|
||||
// RFC 2396: a relative URI (no scheme) has a ?query,
|
||||
// but absolute URIs only have query if path begins with /
|
||||
if url.Scheme == "" || len(path) > 0 && path[0] == '/' {
|
||||
path, url.Query = split(path, '?', true);
|
||||
if url.Query, err = URLUnescape(url.Query); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path, url.RawQuery = split(path, '?', true);
|
||||
}
|
||||
|
||||
// Maybe path is //authority/path
|
||||
@ -180,6 +244,21 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Remove escapes from the Authority and Userinfo fields, and verify
|
||||
// that Scheme and Host contain no escapes (that would be illegal).
|
||||
if url.Authority, err = URLUnescape(url.Authority); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if url.Userinfo, err = URLUnescape(url.Userinfo); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if (strings.Index(url.Scheme, "%") >= 0) {
|
||||
return nil, BadURL{"hexadecimal escape in scheme"}
|
||||
}
|
||||
if (strings.Index(url.Host, "%") >= 0) {
|
||||
return nil, BadURL{"hexadecimal escape in host"}
|
||||
}
|
||||
|
||||
return url, nil
|
||||
}
|
||||
|
||||
@ -200,7 +279,7 @@ func ParseURLReference(rawurlref string) (url *URL, err os.Error) {
|
||||
//
|
||||
// There are redundant fields stored in the URL structure:
|
||||
// the String method consults Scheme, Path, Host, Userinfo,
|
||||
// Query, and Fragment, but not RawPath or Authority.
|
||||
// RawQuery, and Fragment, but not Raw, RawPath or Authority.
|
||||
func (url *URL) String() string {
|
||||
result := "";
|
||||
if url.Scheme != "" {
|
||||
@ -209,16 +288,16 @@ func (url *URL) String() string {
|
||||
if url.Host != "" || url.Userinfo != "" {
|
||||
result += "//";
|
||||
if url.Userinfo != "" {
|
||||
result += url.Userinfo + "@";
|
||||
result += URLEscape(url.Userinfo) + "@";
|
||||
}
|
||||
result += url.Host;
|
||||
}
|
||||
result += url.Path;
|
||||
if url.Query != "" {
|
||||
result += "?" + url.Query;
|
||||
result += URLEscape(url.Path);
|
||||
if url.RawQuery != "" {
|
||||
result += "?" + url.RawQuery;
|
||||
}
|
||||
if url.Fragment != "" {
|
||||
result += "#" + url.Fragment;
|
||||
result += "#" + URLEscape(url.Fragment);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
type URLTest struct {
|
||||
in string;
|
||||
out *URL;
|
||||
roundtrip string; // expected result of reserializing the URL; empty means same as "in".
|
||||
}
|
||||
|
||||
var urltests = []URLTest {
|
||||
@ -31,7 +32,8 @@ var urltests = []URLTest {
|
||||
"http", "//www.google.com",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"", "", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
// path
|
||||
URLTest{
|
||||
@ -41,7 +43,19 @@ var urltests = []URLTest {
|
||||
"http", "//www.google.com/",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/", "", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
// path with hex escaping... note that space roundtrips to +
|
||||
URLTest{
|
||||
"http://www.google.com/file%20one%26two",
|
||||
&URL{
|
||||
"http://www.google.com/file%20one%26two",
|
||||
"http", "//www.google.com/file%20one%26two",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/file one&two", "", ""
|
||||
},
|
||||
"http://www.google.com/file+one%26two"
|
||||
},
|
||||
// user
|
||||
URLTest{
|
||||
@ -51,7 +65,19 @@ var urltests = []URLTest {
|
||||
"ftp", "//webmaster@www.google.com/",
|
||||
"webmaster@www.google.com", "webmaster", "www.google.com",
|
||||
"/", "", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
// escape sequence in username
|
||||
URLTest{
|
||||
"ftp://john%20doe@www.google.com/",
|
||||
&URL{
|
||||
"ftp://john%20doe@www.google.com/",
|
||||
"ftp", "//john%20doe@www.google.com/",
|
||||
"john doe@www.google.com", "john doe", "www.google.com",
|
||||
"/", "", ""
|
||||
},
|
||||
"ftp://john+doe@www.google.com/"
|
||||
},
|
||||
// query
|
||||
URLTest{
|
||||
@ -61,7 +87,19 @@ var urltests = []URLTest {
|
||||
"http", "//www.google.com/?q=go+language",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/", "q=go+language", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
// query with hex escaping: NOT parsed
|
||||
URLTest{
|
||||
"http://www.google.com/?q=go%20language",
|
||||
&URL{
|
||||
"http://www.google.com/?q=go%20language",
|
||||
"http", "//www.google.com/?q=go%20language",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/", "q=go%20language", ""
|
||||
},
|
||||
""
|
||||
},
|
||||
// path without /, so no query parsing
|
||||
URLTest{
|
||||
@ -70,8 +108,9 @@ var urltests = []URLTest {
|
||||
"http:www.google.com/?q=go+language",
|
||||
"http", "www.google.com/?q=go+language",
|
||||
"", "", "",
|
||||
"www.google.com/?q=go+language", "", ""
|
||||
}
|
||||
"www.google.com/?q=go language", "", ""
|
||||
},
|
||||
"http:www.google.com/%3fq%3dgo+language"
|
||||
},
|
||||
// non-authority
|
||||
URLTest{
|
||||
@ -81,7 +120,8 @@ var urltests = []URLTest {
|
||||
"mailto", "/webmaster@golang.org",
|
||||
"", "", "",
|
||||
"/webmaster@golang.org", "", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
// non-authority
|
||||
URLTest{
|
||||
@ -91,7 +131,8 @@ var urltests = []URLTest {
|
||||
"mailto", "webmaster@golang.org",
|
||||
"", "", "",
|
||||
"webmaster@golang.org", "", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
}
|
||||
|
||||
@ -103,7 +144,8 @@ var urlnofragtests = []URLTest {
|
||||
"http", "//www.google.com/?q=go+language#foo",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/", "q=go+language#foo", ""
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
}
|
||||
|
||||
@ -115,7 +157,18 @@ var urlfragtests = []URLTest {
|
||||
"http", "//www.google.com/?q=go+language",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/", "q=go+language", "foo"
|
||||
}
|
||||
},
|
||||
""
|
||||
},
|
||||
URLTest{
|
||||
"http://www.google.com/?q=go+language#foo%26bar",
|
||||
&URL{
|
||||
"http://www.google.com/?q=go+language",
|
||||
"http", "//www.google.com/?q=go+language",
|
||||
"www.google.com", "", "www.google.com",
|
||||
"/", "q=go+language", "foo&bar"
|
||||
},
|
||||
""
|
||||
},
|
||||
}
|
||||
|
||||
@ -123,7 +176,7 @@ var urlfragtests = []URLTest {
|
||||
func ufmt(u *URL) string {
|
||||
return fmt.Sprintf("%q, %q, %q, %q, %q, %q, %q, %q, %q",
|
||||
u.Raw, u.Scheme, u.RawPath, u.Authority, u.Userinfo,
|
||||
u.Host, u.Path, u.Query, u.Fragment);
|
||||
u.Host, u.Path, u.RawQuery, u.Fragment);
|
||||
}
|
||||
|
||||
func DoTest(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) {
|
||||
@ -158,8 +211,12 @@ func DoTestString(t *testing.T, parse func(string) (*URL, os.Error), name string
|
||||
continue;
|
||||
}
|
||||
s := u.String();
|
||||
if s != tt.in {
|
||||
t.Errorf("%s(%q).String() == %q", tt.in, s);
|
||||
expected := tt.in;
|
||||
if len(tt.roundtrip) > 0 {
|
||||
expected = tt.roundtrip;
|
||||
}
|
||||
if s != expected {
|
||||
t.Errorf("%s(%q).String() == %q (expected %q)", name, tt.in, s, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -172,3 +229,120 @@ func TestURLString(t *testing.T) {
|
||||
DoTestString(t, ParseURLReference, "ParseURLReference", urlfragtests);
|
||||
DoTestString(t, ParseURLReference, "ParseURLReference", urlnofragtests);
|
||||
}
|
||||
|
||||
type URLEscapeTest struct {
|
||||
in string;
|
||||
out string;
|
||||
err os.Error;
|
||||
}
|
||||
|
||||
var unescapeTests = []URLEscapeTest {
|
||||
URLEscapeTest{
|
||||
"",
|
||||
"",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"abc",
|
||||
"abc",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"1%41",
|
||||
"1A",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"1%41%42%43",
|
||||
"1ABC",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"%4a",
|
||||
"J",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"%6F",
|
||||
"o",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"%", // not enough characters after %
|
||||
"",
|
||||
BadURL{"invalid hexadecimal escape"}
|
||||
},
|
||||
URLEscapeTest{
|
||||
"%a", // not enough characters after %
|
||||
"",
|
||||
BadURL{"invalid hexadecimal escape"}
|
||||
},
|
||||
URLEscapeTest{
|
||||
"%1", // not enough characters after %
|
||||
"",
|
||||
BadURL{"invalid hexadecimal escape"}
|
||||
},
|
||||
URLEscapeTest{
|
||||
"123%45%6", // not enough characters after %
|
||||
"",
|
||||
BadURL{"invalid hexadecimal escape"}
|
||||
},
|
||||
URLEscapeTest{
|
||||
"%zz", // invalid hex digits
|
||||
"",
|
||||
BadURL{"invalid hexadecimal escape"}
|
||||
},
|
||||
}
|
||||
|
||||
func TestURLUnescape(t *testing.T) {
|
||||
for i, tt := range unescapeTests {
|
||||
actual, err := URLUnescape(tt.in);
|
||||
if actual != tt.out || (err != nil) != (tt.err != nil) {
|
||||
t.Errorf("URLUnescape(%q) = %q, %s; want %q, %s", tt.in, actual, err, tt.out, tt.err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var escapeTests = []URLEscapeTest {
|
||||
URLEscapeTest{
|
||||
"",
|
||||
"",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"abc",
|
||||
"abc",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"one two",
|
||||
"one+two",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
"10%",
|
||||
"10%25",
|
||||
nil
|
||||
},
|
||||
URLEscapeTest{
|
||||
" ?&=#+%!",
|
||||
"+%3f%26%3d%23%2b%25!",
|
||||
nil
|
||||
},
|
||||
}
|
||||
|
||||
func TestURLEscape(t *testing.T) {
|
||||
for i, tt := range escapeTests {
|
||||
actual := URLEscape(tt.in);
|
||||
if tt.out != actual {
|
||||
t.Errorf("URLEscape(%q) = %q, want %q", tt.in, actual, tt.out);
|
||||
}
|
||||
|
||||
// for bonus points, verify that escape:unescape is an identity.
|
||||
roundtrip, err := URLUnescape(actual);
|
||||
if roundtrip != tt.in || err != nil {
|
||||
t.Errorf("URLUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user