diff --git a/internal/httpsfv/httpsfv.go b/internal/httpsfv/httpsfv.go index 8b2620da..d821e81d 100644 --- a/internal/httpsfv/httpsfv.go +++ b/internal/httpsfv/httpsfv.go @@ -48,24 +48,56 @@ func countLeftWhitespace(s string) int { return i } -// TODO(nsh): Implement other consume functions that will be needed to fully -// deal with all possible HTTP SFV, specifically: -// - consumeDictionary(s string, f func(key, val, param string)) (consumed, rest string, ok bool) -// For example, given `a=123,b;a="a", i`, ConsumeDictionary will call f() 3 times -// with the following args: -// - key: `a`, val: `123`, param: `` -// - key: `b`, val: ``, param:`;a="a"` -// - key: `i`, val: ``, param: `` -// -// - consumeList(s string, f func(member, param string)) (consumed, rest string, ok bool) -// For example, given `123.456;i, ("foo" "bar"; lvl=2); lvl=1`, ConsumeList will -// call f() 2 times with the following args: -// - member: `123.456`, param: `i` -// - member: `("foo" "bar"; lvl=2)`, param: `; lvl=1` - // TODO(nsh): Implement corresponding parse functions for all consume functions // that exists. +// ParseList is used to parse a string that represents a list in an +// HTTP Structured Field Values. +// +// Given a string that represents a list, it will call the given function using +// each of the members and parameters contained in the list. This allows the +// caller to extract information out of the list. +// +// This function will return once it encounters the end of the string, or +// something that is not a list. If it cannot consume the entire given +// string, the ok value returned will be false. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-list. +func ParseList(s string, f func(member, param string)) (ok bool) { + for len(s) != 0 { + var member, param string + if len(s) != 0 && s[0] == '(' { + if member, s, ok = consumeBareInnerList(s, nil); !ok { + return ok + } + } else { + if member, s, ok = consumeBareItem(s); !ok { + return ok + } + } + if param, s, ok = consumeParameter(s, nil); !ok { + return ok + } + if f != nil { + f(member, param) + } + + s = s[countLeftWhitespace(s):] + if len(s) == 0 { + break + } + if s[0] != ',' { + return false + } + s = s[1:] + s = s[countLeftWhitespace(s):] + if len(s) == 0 { + return false + } + } + return true +} + // consumeBareInnerList consumes an inner list // (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list), // except for the inner list's top-most parameter. @@ -151,6 +183,58 @@ func ParseItem(s string, f func(bareItem, param string)) (ok bool) { return rest == "" && ok } +// ParseDictionary is used to parse a string that represents a dictionary in an +// HTTP Structured Field Values. +// +// Given a string that represents a dictionary, it will call the given function +// using each of the keys, values, and parameters contained in the dictionary. +// This allows the caller to extract information out of the dictionary. +// +// This function will return once it encounters the end of the string, or +// something that is not a dictionary. If it cannot consume the entire given +// string, the ok value returned will be false. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-dictionary. +func ParseDictionary(s string, f func(key, val, param string)) (ok bool) { + for len(s) != 0 { + var key, val, param string + val = "?1" // Default value for empty val is boolean true. + if key, s, ok = consumeKey(s); !ok { + return ok + } + if len(s) != 0 && s[0] == '=' { + s = s[1:] + if len(s) != 0 && s[0] == '(' { + if val, s, ok = consumeBareInnerList(s, nil); !ok { + return ok + } + } else { + if val, s, ok = consumeBareItem(s); !ok { + return ok + } + } + } + if param, s, ok = consumeParameter(s, nil); !ok { + return ok + } + if f != nil { + f(key, val, param) + } + s = s[countLeftWhitespace(s):] + if len(s) == 0 { + break + } + if s[0] == ',' { + s = s[1:] + } + s = s[countLeftWhitespace(s):] + if len(s) == 0 { + return false + } + } + return true +} + // https://www.rfc-editor.org/rfc/rfc9651.html#parse-param. func consumeParameter(s string, f func(key, val string)) (consumed, rest string, ok bool) { rest = s diff --git a/internal/httpsfv/httpsfv_test.go b/internal/httpsfv/httpsfv_test.go index 45a3d7ac..a1ecec74 100644 --- a/internal/httpsfv/httpsfv_test.go +++ b/internal/httpsfv/httpsfv_test.go @@ -10,6 +10,73 @@ import ( "testing" ) +func TestParseList(t *testing.T) { + tests := []struct { + name string + in string + wantMembers []string + wantParams []string + wantOk bool + }{ + { + name: "valid list", + in: `a, b,c`, + wantMembers: []string{"a", "b", "c"}, + wantParams: []string{"", "", ""}, + wantOk: true, + }, + { + name: "valid list with params", + in: `a;foo=bar, b,c; baz=baz`, + wantMembers: []string{"a", "b", "c"}, + wantParams: []string{";foo=bar", "", "; baz=baz"}, + wantOk: true, + }, + { + name: "valid list with fake commas", + in: `a;foo=",", (",")`, + wantMembers: []string{"a", `(",")`}, + wantParams: []string{`;foo=","`, ""}, + wantOk: true, + }, + { + name: "valid list with inner list member", + in: `(a b c); foo, bar;baz`, + wantMembers: []string{"(a b c)", "bar"}, + wantParams: []string{"; foo", ";baz"}, + wantOk: true, + }, + { + name: "invalid list with trailing comma", + in: `a;foo=bar, b,c; baz=baz,`, + wantMembers: []string{"a", "b", "c"}, + wantParams: []string{";foo=bar", "", "; baz=baz"}, + }, + { + name: "invalid list with unclosed string", + in: `", b, c,d`, + }, + } + + for _, tc := range tests { + var gotMembers, gotParams []string + f := func(member, param string) { + gotMembers = append(gotMembers, member) + gotParams = append(gotParams, param) + } + ok := ParseList(tc.in, f) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if !slices.Equal(tc.wantMembers, gotMembers) { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotMembers, tc.wantMembers) + } + if !slices.Equal(tc.wantParams, gotParams) { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotParams, tc.wantParams) + } + } +} + func TestConsumeBareInnerList(t *testing.T) { tests := []struct { name string @@ -240,6 +307,82 @@ func TestParseItem(t *testing.T) { } } +func TestParseDictionary(t *testing.T) { + tests := []struct { + name string + in string + wantVal string + wantParam string + wantOk bool + }{ + { + name: "valid dictionary with simple value", + in: `a=b, want=foo, c=d`, + wantVal: "foo", + wantOk: true, + }, + { + name: "valid dictionary with implicit value", + in: `a, want, c=d`, + wantVal: "?1", + wantOk: true, + }, + { + name: "valid dictionary with parameter", + in: `a, want=foo;bar=baz, c=d`, + wantVal: "foo", + wantParam: ";bar=baz", + wantOk: true, + }, + { + name: "valid dictionary with inner list", + in: `a, want=(a b c d;e;f);g=h, c=d`, + wantVal: "(a b c d;e;f)", + wantParam: ";g=h", + wantOk: true, + }, + { + name: "valid dictionary with fake commas", + in: `a=(";");b=";",want=foo;bar`, + wantVal: "foo", + wantParam: ";bar", + wantOk: true, + }, + { + name: "invalid dictionary with bad key", + in: `UPPERCASEKEY=BAD, want=foo, c=d`, + }, + { + name: "invalid dictionary with trailing comma", + in: `trailing=comma,`, + }, + { + name: "invalid dictionary with unclosed string", + in: `a=""",want=foo;bar`, + }, + } + + for _, tc := range tests { + var gotVal, gotParam string + f := func(key, val, param string) { + if key == "want" { + gotVal = val + gotParam = param + } + } + ok := ParseDictionary(tc.in, f) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.wantVal != gotVal { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotVal, tc.wantVal) + } + if tc.wantParam != gotParam { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotParam, tc.wantParam) + } + } +} + func TestConsumeParameter(t *testing.T) { tests := []struct { name string