internal/httpsfv: add parsing functionality for types defined in RFC 8941

This change introduces parsing functions for all item types defined in
RFC 8941, namely: integers, decimals, strings, tokens, byte sequences,
and booleans.

At this point, internal/httpsfv should be usable for parsing any RFC
8941-compliant HTTP Structured Field Values.

In a future CL, we will add support for parsing display strings and
dates, so that this package fully supports RFC 9651.

For golang/go#75500

Change-Id: Ib8ad2caa5f6ea4285d00506faa4b8127c2cc9419
Reviewed-on: https://go-review.googlesource.com/c/net/+/708435
Auto-Submit: Nicholas Husin <nsh@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Nicholas Husin <husin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Nicholas S. Husin
2025-10-01 21:17:35 -04:00
committed by Gopher Robot
parent fbba2c22cb
commit edb764c229
2 changed files with 389 additions and 31 deletions

View File

@@ -2,12 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package httpsfv provide functionality for dealing with HTTP Structured Field
// Values.
// Package httpsfv provides functionality for dealing with HTTP Structured
// Field Values.
package httpsfv
import (
"slices"
"strconv"
"strings"
"unicode/utf8"
)
@@ -70,15 +72,14 @@ func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) {
return ch1<<4 | ch2, true
}
// TODO(nsh): Implement corresponding parse functions for all consume functions
// that exists.
// TODO(nsh): Implement parse functions for date and display string to make
// this package fully support parsing RFC 9651-compliant HTTP SFV.
// ParseList is used to parse a string that represents a list in an
// HTTP Structured Field Values.
// ParseList parses a list from a given HTTP Structured Field Values.
//
// Given a string that represents a list, it will call the given function using
// each of the members and parameters contained in the list. This allows the
// caller to extract information out of the list.
// Given an HTTP SFV string that represents a list, it will call the given
// function using each of the members and parameters contained in the list.
// This allows the caller to extract information out of the list.
//
// This function will return once it encounters the end of the string, or
// something that is not a list. If it cannot consume the entire given
@@ -123,7 +124,7 @@ func ParseList(s string, f func(member, param string)) (ok bool) {
// consumeBareInnerList consumes an inner list
// (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list),
// except for the inner list's top-most parameter.
// For example, given `(a;b c;d);e`, it will consume only `(a;b c;d)`
// For example, given `(a;b c;d);e`, it will consume only `(a;b c;d)`.
func consumeBareInnerList(s string, f func(bareItem, param string)) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '(' {
return "", s, false
@@ -152,18 +153,18 @@ func consumeBareInnerList(s string, f func(bareItem, param string)) (consumed, r
return s[:len(s)-len(rest)], rest, true
}
// ParseBareInnerList is used to parse a string that represents a bare inner
// list in an HTTP Structured Field Values.
// ParseBareInnerList parses a bare inner list from a given HTTP Structured
// Field Values.
//
// We define a bare inner list as an inner list
// (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list),
// without the top-most parameter of the inner list. For example, given the
// inner list `(a;b c;d);e`, the bare inner list would be `(a;b c;d)`.
//
// Given a string that represents a bare inner list, it will call the given
// function using each of the bare item and parameter within the bare inner
// list. This allows the caller to extract information out of the bare inner
// list.
// Given an HTTP SFV string that represents a bare inner list, it will call the
// given function using each of the bare item and parameter within the bare
// inner list. This allows the caller to extract information out of the bare
// inner list.
//
// This function will return once it encounters the end of the bare inner list,
// or something that is not a bare inner list. If it cannot consume the entire
@@ -188,12 +189,11 @@ func consumeItem(s string, f func(bareItem, param string)) (consumed, rest strin
return s[:len(s)-len(rest)], rest, true
}
// ParseItem is used to parse a string that represents an item in an HTTP
// Structured Field Values.
// ParseItem parses an item from a given HTTP Structured Field Values.
//
// Given a string that represents an item, it will call the given function
// once, with the bare item and the parameter of the item. This allows the
// caller to extract information out of the parameter.
// Given an HTTP SFV string that represents an item, it will call the given
// function once, with the bare item and the parameter of the item. This allows
// the caller to extract information out of the item.
//
// This function will return once it encounters the end of the string, or
// something that is not an item. If it cannot consume the entire given
@@ -205,12 +205,13 @@ func ParseItem(s string, f func(bareItem, param string)) (ok bool) {
return rest == "" && ok
}
// ParseDictionary is used to parse a string that represents a dictionary in an
// HTTP Structured Field Values.
// ParseDictionary parses a dictionary from a given HTTP Structured Field
// Values.
//
// Given a string that represents a dictionary, it will call the given function
// using each of the keys, values, and parameters contained in the dictionary.
// This allows the caller to extract information out of the dictionary.
// Given an HTTP SFV string that represents a dictionary, it will call the
// given function using each of the keys, values, and parameters contained in
// the dictionary. This allows the caller to extract information out of the
// dictionary.
//
// This function will return once it encounters the end of the string, or
// something that is not a dictionary. If it cannot consume the entire given
@@ -286,12 +287,11 @@ func consumeParameter(s string, f func(key, val string)) (consumed, rest string,
return s[:len(s)-len(rest)], rest, true
}
// ParseParameter is used to parse a string that represents a parameter in an
// HTTP Structured Field Values.
// ParseParameter parses a parameter from a given HTTP Structured Field Values.
//
// Given a string that represents a parameter, it will call the given function
// using each of the keys and values contained in the parameter. This allows
// the caller to extract information out of the parameter.
// Given an HTTP SFV string that represents a parameter, it will call the given
// function using each of the keys and values contained in the parameter. This
// allows the caller to extract information out of the parameter.
//
// This function will return once it encounters the end of the string, or
// something that is not a parameter. If it cannot consume the entire given
@@ -366,6 +366,41 @@ func consumeIntegerOrDecimal(s string) (consumed, rest string, ok bool) {
return s[:i], s[i:], true
}
// ParseInteger parses an integer from a given HTTP Structured Field Values.
//
// The entire HTTP SFV string must consist of a valid integer. It returns the
// parsed integer and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-integer-or-decim.
func ParseInteger(s string) (parsed int64, ok bool) {
if _, rest, ok := consumeIntegerOrDecimal(s); !ok || rest != "" {
return 0, false
}
if n, err := strconv.ParseInt(s, 10, 64); err == nil {
return n, true
}
return 0, false
}
// ParseDecimal parses a decimal from a given HTTP Structured Field Values.
//
// The entire HTTP SFV string must consist of a valid decimal. It returns the
// parsed decimal and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-integer-or-decim.
func ParseDecimal(s string) (parsed float64, ok bool) {
if _, rest, ok := consumeIntegerOrDecimal(s); !ok || rest != "" {
return 0, false
}
if !strings.Contains(s, ".") {
return 0, false
}
if n, err := strconv.ParseFloat(s, 64); err == nil {
return n, true
}
return 0, false
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-string.
func consumeString(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '"' {
@@ -392,6 +427,19 @@ func consumeString(s string) (consumed, rest string, ok bool) {
return "", s, false
}
// ParseString parses a Go string from a given HTTP Structured Field Values.
//
// The entire HTTP SFV string must consist of a valid string. It returns the
// parsed string and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-string.
func ParseString(s string) (parsed string, ok bool) {
if _, rest, ok := consumeString(s); !ok || rest != "" {
return "", false
}
return s[1 : len(s)-1], true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-token
func consumeToken(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || (!isAlpha(s[0]) && s[0] != '*') {
@@ -407,6 +455,19 @@ func consumeToken(s string) (consumed, rest string, ok bool) {
return s[:i], s[i:], true
}
// ParseToken parses a token from a given HTTP Structured Field Values.
//
// The entire HTTP SFV string must consist of a valid token. It returns the
// parsed token and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-token
func ParseToken(s string) (parsed string, ok bool) {
if _, rest, ok := consumeToken(s); !ok || rest != "" {
return "", false
}
return s, true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-byte-sequence.
func consumeByteSequence(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != ':' {
@@ -423,6 +484,20 @@ func consumeByteSequence(s string) (consumed, rest string, ok bool) {
return "", s, false
}
// ParseByteSequence parses a byte sequence from a given HTTP Structured Field
// Values.
//
// The entire HTTP SFV string must consist of a valid byte sequence. It returns
// the parsed byte sequence and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-byte-sequence.
func ParseByteSequence(s string) (parsed []byte, ok bool) {
if _, rest, ok := consumeByteSequence(s); !ok || rest != "" {
return nil, false
}
return []byte(s[1 : len(s)-1]), true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-boolean.
func consumeBoolean(s string) (consumed, rest string, ok bool) {
if len(s) >= 2 && (s[:2] == "?0" || s[:2] == "?1") {
@@ -431,6 +506,19 @@ func consumeBoolean(s string) (consumed, rest string, ok bool) {
return "", s, false
}
// ParseBoolean parses a boolean from a given HTTP Structured Field Values.
//
// The entire HTTP SFV string must consist of a valid boolean. It returns the
// parsed boolean and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-boolean.
func ParseBoolean(s string) (parsed bool, ok bool) {
if _, rest, ok := consumeBoolean(s); !ok || rest != "" {
return false, false
}
return s == "?1", true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date.
func consumeDate(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '@' {

View File

@@ -655,6 +655,106 @@ func TestConsumeIntegerOrDecimal(t *testing.T) {
}
}
func TestParseInteger(t *testing.T) {
tests := []struct {
name string
in string
want int64
wantOk bool
}{
{
name: "valid integer",
in: "123456",
want: 123456,
wantOk: true,
},
{
name: "valid integer with more content after",
in: "123456,12345",
},
{
name: "valid max integer",
in: "999999999999999",
want: 999999999999999,
wantOk: true,
},
{
name: "valid min integer",
in: "-999999999999999",
want: -999999999999999,
wantOk: true,
},
{
name: "invalid integer too high",
in: "9999999999999999",
},
{
name: "invalid integer too low",
in: "-9999999999999999",
},
{
name: "invalid integer with fraction",
in: "-123456789012.123",
},
}
for _, tc := range tests {
got, ok := ParseInteger(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestParseDecimal(t *testing.T) {
tests := []struct {
name string
in string
want float64
wantOk bool
}{
{
name: "valid decimal",
in: "123456.789",
want: 123456.789,
wantOk: true,
},
{
name: "valid decimal with more content after",
in: "123456.789, 123",
},
{
name: "invalid decimal with no fraction",
in: "123456",
},
{
name: "invalid decimal integer component too long",
in: "1234567890123.1",
},
{
name: "invalid decimal fraction component too long",
in: "1.1234",
},
{
name: "invalid decimal trailing dot",
in: "1.",
},
}
for _, tc := range tests {
got, ok := ParseDecimal(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeString(t *testing.T) {
tests := []struct {
name string
@@ -708,6 +808,54 @@ func TestConsumeString(t *testing.T) {
}
}
func TestParseString(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid basic string",
in: `"foo bar"`,
want: "foo bar",
wantOk: true,
},
{
name: "valid basic string with more content after",
in: `"foo bar", a=3`,
},
{
name: "valid string with escaped dquote",
in: `"foo bar \""`,
want: `foo bar \"`,
wantOk: true,
},
{
name: "invalid string no starting dquote",
in: `foo bar"`,
},
{
name: "invalid string no closing dquote",
in: `"foo bar`,
},
{
name: "invalid string invalid character",
in: string([]byte{'"', 0x00, '"'}),
},
}
for _, tc := range tests {
got, ok := ParseString(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeToken(t *testing.T) {
tests := []struct {
name string
@@ -747,6 +895,46 @@ func TestConsumeToken(t *testing.T) {
}
}
func TestParseToken(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid token",
in: "a_b-c.d3:f%00/*",
want: "a_b-c.d3:f%00/*",
wantOk: true,
},
{
name: "valid token with uppercase",
in: "FOOBAR",
want: "FOOBAR",
wantOk: true,
},
{
name: "valid token with content after",
in: "FOOBAR, foobar",
},
{
name: "invalid token",
in: "0invalid",
},
}
for _, tc := range tests {
got, ok := ParseToken(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeByteSequence(t *testing.T) {
tests := []struct {
name string
@@ -794,6 +982,48 @@ func TestConsumeByteSequence(t *testing.T) {
}
}
func TestParseByteSequence(t *testing.T) {
tests := []struct {
name string
in string
want []byte
wantOk bool
}{
{
name: "valid byte sequence",
in: ":aGVsbG8gd29ybGQ=:",
want: []byte("aGVsbG8gd29ybGQ="),
wantOk: true,
},
{
name: "valid byte sequence with more content after",
in: ":aGVsbG8gd29ybGQ=::aGVsbG8gd29ybGQ=:",
},
{
name: "invalid byte sequence character",
in: ":-:",
},
{
name: "invalid byte sequence opening",
in: "aGVsbG8gd29ybGQ=:",
},
{
name: "invalid byte sequence closing",
in: ":aGVsbG8gd29ybGQ=",
},
}
for _, tc := range tests {
got, ok := ParseByteSequence(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if !slices.Equal(tc.want, got) {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeBoolean(t *testing.T) {
tests := []struct {
name string
@@ -833,6 +1063,46 @@ func TestConsumeBoolean(t *testing.T) {
}
}
func TestParseBoolean(t *testing.T) {
tests := []struct {
name string
in string
want bool
wantOk bool
}{
{
name: "valid boolean false",
in: "?0",
want: false,
wantOk: true,
},
{
name: "valid boolean true",
in: "?1",
want: true,
wantOk: true,
},
{
name: "valid boolean with more content after",
in: "?1, a=1",
},
{
name: "invalid boolean",
in: "?2",
},
}
for _, tc := range tests {
got, ok := ParseBoolean(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeDate(t *testing.T) {
tests := []struct {
name string