internal/httpsfv: add support for Bare Inner List and Item type.

This change implements the consume and Parse functions for both the Item
and Bare Inner List type. This is part of a chain of changes that is needed
in order for us to fully support HTTP Structured Field Values parsing as
defined in RFC 9651.

In future changes, we will utilize the support for Bare Inner List and Item
that is added here to support more complex types, namely Dictionary and
List.

Note that Bare Inner List is something we define on our own. We define a
Bare Inner List as an Inner List without the top-most parameter meant
for the Inner List. For example, the Inner List `(a;b c;d);e` would
translate to the Bare Inner List `(a;b c;d)`. We have done this because
the parameter of an Inner List will be exposed to the user via
ParseDictionary() or ParseList() too. By implementing Bare Inner List,
we can avoid having two ways of accessing the Inner List parameter, and
incurring the cost of a more complex implementation for Inner List and
other types that utilize Inner List (e.g. if we have consumeInnerList,
ParseDictionary will have to use consumeInnerList and backtrack the
consumption to separate out the InnerList parameter).

For go/golang#75500

Change-Id: I9b418d10b5755195d1cc3ff5f7ea211423bc4b48
Reviewed-on: https://go-review.googlesource.com/c/net/+/707099
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Nicholas Husin <husin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Nicholas S. Husin
2025-09-29 11:00:02 -04:00
committed by Nicholas Husin
parent fe9bcbcc92
commit 7d8cfcee6c
2 changed files with 318 additions and 20 deletions

View File

@@ -62,21 +62,95 @@ func countLeftWhitespace(s string) int {
// call f() 2 times with the following args:
// - member: `123.456`, param: `i`
// - member: `("foo" "bar"; lvl=2)`, param: `; lvl=1`
//
// - consumeItem(s string, f func(bareItem, param string)) (consumed, rest string, ok bool)
// For example, given `"foo"; bar=baz;foo=bar`, ConsumeItem will call f() with
// the following args:
// - bareItem: `"foo"`, param: `; bar=baz;foo=bar`
//
// - consumeInnerList(s string f func(bareItem, param, listParam string)) (consumed, rest string, ok bool)
// For example, given `("foo"; a=1;b=2 "bar";baz;lvl=2);lvl=1`, ConsumeInnerList
// will call f() 2 times with the following args:
// - bareItem: `"foo"`, param: `; a=1;b=2`, listParam: `;lvl=1`
// - bareItem: `"bar"`, param: `;baz;lvl=2`, listParam: `;lvl=1`
// TODO(nsh): Implement corresponding parse functions for all consume functions
// that exists.
// consumeBareInnerList consumes an inner list
// (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list),
// except for the inner list's top-most parameter.
// For example, given `(a;b c;d);e`, it will consume only `(a;b c;d)`
func consumeBareInnerList(s string, f func(bareItem, param string)) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '(' {
return "", s, false
}
rest = s[1:]
for len(rest) != 0 {
var bareItem, param string
rest = rest[countLeftWhitespace(rest):]
if len(rest) != 0 && rest[0] == ')' {
rest = rest[1:]
break
}
if bareItem, rest, ok = consumeBareItem(rest); !ok {
return "", s, ok
}
if param, rest, ok = consumeParameter(rest, nil); !ok {
return "", s, ok
}
if len(rest) == 0 || (rest[0] != ')' && !isSP(rest[0])) {
return "", s, false
}
if f != nil {
f(bareItem, param)
}
}
return s[:len(s)-len(rest)], rest, true
}
// ParseBareInnerList is used to parse a string that represents a bare inner
// list in an HTTP Structured Field Values.
//
// We define a bare inner list as an inner list
// (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list),
// without the top-most parameter of the inner list. For example, given the
// inner list `(a;b c;d);e`, the bare inner list would be `(a;b c;d)`.
//
// Given a string that represents a bare inner list, it will call the given
// function using each of the bare item and parameter within the bare inner
// list. This allows the caller to extract information out of the bare inner
// list.
//
// This function will return once it encounters the end of the bare inner list,
// or something that is not a bare inner list. If it cannot consume the entire
// given string, the ok value returned will be false.
func ParseBareInnerList(s string, f func(bareItem, param string)) (ok bool) {
_, rest, ok := consumeBareInnerList(s, f)
return rest == "" && ok
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-item.
func consumeItem(s string, f func(bareItem, param string)) (consumed, rest string, ok bool) {
var bareItem, param string
if bareItem, rest, ok = consumeBareItem(s); !ok {
return "", s, ok
}
if param, rest, ok = consumeParameter(rest, nil); !ok {
return "", s, ok
}
if f != nil {
f(bareItem, param)
}
return s[:len(s)-len(rest)], rest, true
}
// ParseItem is used to parse a string that represents an item in an HTTP
// Structured Field Values.
//
// Given a string that represents an item, it will call the given function
// once, with the bare item and the parameter of the item. This allows the
// caller to extract information out of the parameter.
//
// This function will return once it encounters the end of the string, or
// something that is not an item. If it cannot consume the entire given
// string, the ok value returned will be false.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-item.
func ParseItem(s string, f func(bareItem, param string)) (ok bool) {
_, rest, ok := consumeItem(s, f)
return rest == "" && ok
}
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-param.
func consumeParameter(s string, f func(key, val string)) (consumed, rest string, ok bool) {
rest = s
@@ -87,9 +161,7 @@ func consumeParameter(s string, f func(key, val string)) (consumed, rest string,
break
}
rest = rest[1:]
if i := countLeftWhitespace(rest); i > 0 {
rest = rest[i:]
}
rest = rest[countLeftWhitespace(rest):]
key, rest, ok = consumeKey(rest)
if !ok {
return "", s, ok
@@ -122,10 +194,7 @@ func consumeParameter(s string, f func(key, val string)) (consumed, rest string,
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-param.
func ParseParameter(s string, f func(key, val string)) (ok bool) {
_, rest, ok := consumeParameter(s, f)
if rest != "" {
return false
}
return ok
return rest == "" && ok
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-key.
@@ -196,7 +265,6 @@ func consumeString(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '"' {
return "", s, false
}
for i := 1; i < len(s); i++ {
switch ch := s[i]; ch {
case '\\':
@@ -215,7 +283,6 @@ func consumeString(s string) (consumed, rest string, ok bool) {
}
}
}
return "", s, false
}

View File

@@ -4,11 +4,242 @@
package httpsfv
import (
"slices"
"strconv"
"strings"
"testing"
)
func TestConsumeBareInnerList(t *testing.T) {
tests := []struct {
name string
in string
wantBareItems []string
wantParams []string
wantListParam string
wantOk bool
}{
{
name: "valid inner list without param",
in: `(a b c)`,
wantBareItems: []string{"a", "b", "c"},
wantParams: []string{"", "", ""},
wantOk: true,
},
{
name: "valid inner list with param",
in: `(a;d b c;e)`,
wantBareItems: []string{"a", "b", "c"},
wantParams: []string{";d", "", ";e"},
wantOk: true,
},
{
name: "valid inner list with fake ending parenthesis",
in: `(")";foo=")")`,
wantBareItems: []string{`")"`},
wantParams: []string{`;foo=")"`},
wantOk: true,
},
{
name: "valid inner list with list parameter",
in: `(a b;c); d`,
wantBareItems: []string{"a", "b"},
wantParams: []string{"", ";c"},
wantOk: true,
},
{
name: "valid inner list with more content after",
in: `(a b;c); d, a`,
wantBareItems: []string{"a", "b"},
wantParams: []string{"", ";c"},
wantOk: true,
},
{
name: "invalid inner list",
in: `(a b;c `,
wantBareItems: []string{"a", "b"},
wantParams: []string{"", ";c"},
},
}
for _, tc := range tests {
var gotBareItems, gotParams []string
f := func(bareItem, param string) {
gotBareItems = append(gotBareItems, bareItem)
gotParams = append(gotParams, param)
}
gotConsumed, gotRest, ok := consumeBareInnerList(tc.in, f)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if !slices.Equal(tc.wantBareItems, gotBareItems) {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotBareItems, tc.wantBareItems)
}
if !slices.Equal(tc.wantParams, gotParams) {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotParams, tc.wantParams)
}
if gotConsumed+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, gotConsumed, gotRest, tc.in)
}
}
}
func TestParseBareInnerList(t *testing.T) {
tests := []struct {
name string
in string
wantBareItems []string
wantParams []string
wantOk bool
}{
{
name: "valid inner list",
in: `(a b;c)`,
wantBareItems: []string{"a", "b"},
wantParams: []string{"", ";c"},
wantOk: true,
},
{
name: "valid inner list with list parameter",
in: `(a b;c); d`,
wantBareItems: []string{"a", "b"},
wantParams: []string{"", ";c"},
},
{
name: "invalid inner list",
in: `(a b;c `,
wantBareItems: []string{"a", "b"},
wantParams: []string{"", ";c"},
},
}
for _, tc := range tests {
var gotBareItems, gotParams []string
f := func(bareItem, param string) {
gotBareItems = append(gotBareItems, bareItem)
gotParams = append(gotParams, param)
}
ok := ParseBareInnerList(tc.in, f)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if !slices.Equal(tc.wantBareItems, gotBareItems) {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotBareItems, tc.wantBareItems)
}
if !slices.Equal(tc.wantParams, gotParams) {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotParams, tc.wantParams)
}
}
}
func TestConsumeItem(t *testing.T) {
tests := []struct {
name string
in string
wantBareItem string
wantParam string
wantOk bool
}{
{
name: "valid bare item",
in: `fookey`,
wantBareItem: `fookey`,
wantOk: true,
},
{
name: "valid bare item and param",
in: `fookey; a="123"`,
wantBareItem: `fookey`,
wantParam: `; a="123"`,
wantOk: true,
},
{
name: "valid item with content after",
in: `fookey; a="123", otheritem; otherparam=1`,
wantBareItem: `fookey`,
wantParam: `; a="123"`,
wantOk: true,
},
{
name: "invalid just param",
in: `;a="123"`,
},
}
for _, tc := range tests {
var gotBareItem, gotParam string
f := func(bareItem, param string) {
gotBareItem = bareItem
gotParam = param
}
gotConsumed, gotRest, ok := consumeItem(tc.in, f)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.wantBareItem != gotBareItem {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotBareItem, tc.wantBareItem)
}
if tc.wantParam != gotParam {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotParam, tc.wantParam)
}
if gotConsumed+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, gotConsumed, gotRest, tc.in)
}
}
}
func TestParseItem(t *testing.T) {
tests := []struct {
name string
in string
wantBareItem string
wantParam string
wantOk bool
}{
{
name: "valid bare item",
in: `fookey`,
wantBareItem: `fookey`,
wantOk: true,
},
{
name: "valid bare item and param",
in: `fookey; a="123"`,
wantBareItem: `fookey`,
wantParam: `; a="123"`,
wantOk: true,
},
{
name: "valid item with content after",
in: `fookey; a="123", otheritem; otherparam=1`,
wantBareItem: `fookey`,
wantParam: `; a="123"`,
},
{
name: "invalid just param",
in: `;a="123"`,
},
}
for _, tc := range tests {
var gotBareItem, gotParam string
f := func(bareItem, param string) {
gotBareItem = bareItem
gotParam = param
}
ok := ParseItem(tc.in, f)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.wantBareItem != gotBareItem {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotBareItem, tc.wantBareItem)
}
if tc.wantParam != gotParam {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, gotParam, tc.wantParam)
}
}
}
func TestConsumeParameter(t *testing.T) {
tests := []struct {
name string