From edb764c2296fdbcd9955bdc3ef9a67394824569d Mon Sep 17 00:00:00 2001 From: "Nicholas S. Husin" Date: Wed, 1 Oct 2025 21:17:35 -0400 Subject: [PATCH] internal/httpsfv: add parsing functionality for types defined in RFC 8941 This change introduces parsing functions for all item types defined in RFC 8941, namely: integers, decimals, strings, tokens, byte sequences, and booleans. At this point, internal/httpsfv should be usable for parsing any RFC 8941-compliant HTTP Structured Field Values. In a future CL, we will add support for parsing display strings and dates, so that this package fully supports RFC 9651. For golang/go#75500 Change-Id: Ib8ad2caa5f6ea4285d00506faa4b8127c2cc9419 Reviewed-on: https://go-review.googlesource.com/c/net/+/708435 Auto-Submit: Nicholas Husin Reviewed-by: Damien Neil Reviewed-by: Nicholas Husin LUCI-TryBot-Result: Go LUCI --- internal/httpsfv/httpsfv.go | 150 +++++++++++++---- internal/httpsfv/httpsfv_test.go | 270 +++++++++++++++++++++++++++++++ 2 files changed, 389 insertions(+), 31 deletions(-) diff --git a/internal/httpsfv/httpsfv.go b/internal/httpsfv/httpsfv.go index 7567d05c..f8e2fcc0 100644 --- a/internal/httpsfv/httpsfv.go +++ b/internal/httpsfv/httpsfv.go @@ -2,12 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package httpsfv provide functionality for dealing with HTTP Structured Field -// Values. +// Package httpsfv provides functionality for dealing with HTTP Structured +// Field Values. package httpsfv import ( "slices" + "strconv" + "strings" "unicode/utf8" ) @@ -70,15 +72,14 @@ func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) { return ch1<<4 | ch2, true } -// TODO(nsh): Implement corresponding parse functions for all consume functions -// that exists. +// TODO(nsh): Implement parse functions for date and display string to make +// this package fully support parsing RFC 9651-compliant HTTP SFV. -// ParseList is used to parse a string that represents a list in an -// HTTP Structured Field Values. +// ParseList parses a list from a given HTTP Structured Field Values. // -// Given a string that represents a list, it will call the given function using -// each of the members and parameters contained in the list. This allows the -// caller to extract information out of the list. +// Given an HTTP SFV string that represents a list, it will call the given +// function using each of the members and parameters contained in the list. +// This allows the caller to extract information out of the list. // // This function will return once it encounters the end of the string, or // something that is not a list. If it cannot consume the entire given @@ -123,7 +124,7 @@ func ParseList(s string, f func(member, param string)) (ok bool) { // consumeBareInnerList consumes an inner list // (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list), // except for the inner list's top-most parameter. -// For example, given `(a;b c;d);e`, it will consume only `(a;b c;d)` +// For example, given `(a;b c;d);e`, it will consume only `(a;b c;d)`. func consumeBareInnerList(s string, f func(bareItem, param string)) (consumed, rest string, ok bool) { if len(s) == 0 || s[0] != '(' { return "", s, false @@ -152,18 +153,18 @@ func consumeBareInnerList(s string, f func(bareItem, param string)) (consumed, r return s[:len(s)-len(rest)], rest, true } -// ParseBareInnerList is used to parse a string that represents a bare inner -// list in an HTTP Structured Field Values. +// ParseBareInnerList parses a bare inner list from a given HTTP Structured +// Field Values. // // We define a bare inner list as an inner list // (https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-inner-list), // without the top-most parameter of the inner list. For example, given the // inner list `(a;b c;d);e`, the bare inner list would be `(a;b c;d)`. // -// Given a string that represents a bare inner list, it will call the given -// function using each of the bare item and parameter within the bare inner -// list. This allows the caller to extract information out of the bare inner -// list. +// Given an HTTP SFV string that represents a bare inner list, it will call the +// given function using each of the bare item and parameter within the bare +// inner list. This allows the caller to extract information out of the bare +// inner list. // // This function will return once it encounters the end of the bare inner list, // or something that is not a bare inner list. If it cannot consume the entire @@ -188,12 +189,11 @@ func consumeItem(s string, f func(bareItem, param string)) (consumed, rest strin return s[:len(s)-len(rest)], rest, true } -// ParseItem is used to parse a string that represents an item in an HTTP -// Structured Field Values. +// ParseItem parses an item from a given HTTP Structured Field Values. // -// Given a string that represents an item, it will call the given function -// once, with the bare item and the parameter of the item. This allows the -// caller to extract information out of the parameter. +// Given an HTTP SFV string that represents an item, it will call the given +// function once, with the bare item and the parameter of the item. This allows +// the caller to extract information out of the item. // // This function will return once it encounters the end of the string, or // something that is not an item. If it cannot consume the entire given @@ -205,12 +205,13 @@ func ParseItem(s string, f func(bareItem, param string)) (ok bool) { return rest == "" && ok } -// ParseDictionary is used to parse a string that represents a dictionary in an -// HTTP Structured Field Values. +// ParseDictionary parses a dictionary from a given HTTP Structured Field +// Values. // -// Given a string that represents a dictionary, it will call the given function -// using each of the keys, values, and parameters contained in the dictionary. -// This allows the caller to extract information out of the dictionary. +// Given an HTTP SFV string that represents a dictionary, it will call the +// given function using each of the keys, values, and parameters contained in +// the dictionary. This allows the caller to extract information out of the +// dictionary. // // This function will return once it encounters the end of the string, or // something that is not a dictionary. If it cannot consume the entire given @@ -286,12 +287,11 @@ func consumeParameter(s string, f func(key, val string)) (consumed, rest string, return s[:len(s)-len(rest)], rest, true } -// ParseParameter is used to parse a string that represents a parameter in an -// HTTP Structured Field Values. +// ParseParameter parses a parameter from a given HTTP Structured Field Values. // -// Given a string that represents a parameter, it will call the given function -// using each of the keys and values contained in the parameter. This allows -// the caller to extract information out of the parameter. +// Given an HTTP SFV string that represents a parameter, it will call the given +// function using each of the keys and values contained in the parameter. This +// allows the caller to extract information out of the parameter. // // This function will return once it encounters the end of the string, or // something that is not a parameter. If it cannot consume the entire given @@ -366,6 +366,41 @@ func consumeIntegerOrDecimal(s string) (consumed, rest string, ok bool) { return s[:i], s[i:], true } +// ParseInteger parses an integer from a given HTTP Structured Field Values. +// +// The entire HTTP SFV string must consist of a valid integer. It returns the +// parsed integer and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-integer-or-decim. +func ParseInteger(s string) (parsed int64, ok bool) { + if _, rest, ok := consumeIntegerOrDecimal(s); !ok || rest != "" { + return 0, false + } + if n, err := strconv.ParseInt(s, 10, 64); err == nil { + return n, true + } + return 0, false +} + +// ParseDecimal parses a decimal from a given HTTP Structured Field Values. +// +// The entire HTTP SFV string must consist of a valid decimal. It returns the +// parsed decimal and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-integer-or-decim. +func ParseDecimal(s string) (parsed float64, ok bool) { + if _, rest, ok := consumeIntegerOrDecimal(s); !ok || rest != "" { + return 0, false + } + if !strings.Contains(s, ".") { + return 0, false + } + if n, err := strconv.ParseFloat(s, 64); err == nil { + return n, true + } + return 0, false +} + // https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-string. func consumeString(s string) (consumed, rest string, ok bool) { if len(s) == 0 || s[0] != '"' { @@ -392,6 +427,19 @@ func consumeString(s string) (consumed, rest string, ok bool) { return "", s, false } +// ParseString parses a Go string from a given HTTP Structured Field Values. +// +// The entire HTTP SFV string must consist of a valid string. It returns the +// parsed string and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-string. +func ParseString(s string) (parsed string, ok bool) { + if _, rest, ok := consumeString(s); !ok || rest != "" { + return "", false + } + return s[1 : len(s)-1], true +} + // https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-token func consumeToken(s string) (consumed, rest string, ok bool) { if len(s) == 0 || (!isAlpha(s[0]) && s[0] != '*') { @@ -407,6 +455,19 @@ func consumeToken(s string) (consumed, rest string, ok bool) { return s[:i], s[i:], true } +// ParseToken parses a token from a given HTTP Structured Field Values. +// +// The entire HTTP SFV string must consist of a valid token. It returns the +// parsed token and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-token +func ParseToken(s string) (parsed string, ok bool) { + if _, rest, ok := consumeToken(s); !ok || rest != "" { + return "", false + } + return s, true +} + // https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-byte-sequence. func consumeByteSequence(s string) (consumed, rest string, ok bool) { if len(s) == 0 || s[0] != ':' { @@ -423,6 +484,20 @@ func consumeByteSequence(s string) (consumed, rest string, ok bool) { return "", s, false } +// ParseByteSequence parses a byte sequence from a given HTTP Structured Field +// Values. +// +// The entire HTTP SFV string must consist of a valid byte sequence. It returns +// the parsed byte sequence and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-byte-sequence. +func ParseByteSequence(s string) (parsed []byte, ok bool) { + if _, rest, ok := consumeByteSequence(s); !ok || rest != "" { + return nil, false + } + return []byte(s[1 : len(s)-1]), true +} + // https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-boolean. func consumeBoolean(s string) (consumed, rest string, ok bool) { if len(s) >= 2 && (s[:2] == "?0" || s[:2] == "?1") { @@ -431,6 +506,19 @@ func consumeBoolean(s string) (consumed, rest string, ok bool) { return "", s, false } +// ParseBoolean parses a boolean from a given HTTP Structured Field Values. +// +// The entire HTTP SFV string must consist of a valid boolean. It returns the +// parsed boolean and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-boolean. +func ParseBoolean(s string) (parsed bool, ok bool) { + if _, rest, ok := consumeBoolean(s); !ok || rest != "" { + return false, false + } + return s == "?1", true +} + // https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date. func consumeDate(s string) (consumed, rest string, ok bool) { if len(s) == 0 || s[0] != '@' { diff --git a/internal/httpsfv/httpsfv_test.go b/internal/httpsfv/httpsfv_test.go index ab1cd0f1..9e004e09 100644 --- a/internal/httpsfv/httpsfv_test.go +++ b/internal/httpsfv/httpsfv_test.go @@ -655,6 +655,106 @@ func TestConsumeIntegerOrDecimal(t *testing.T) { } } +func TestParseInteger(t *testing.T) { + tests := []struct { + name string + in string + want int64 + wantOk bool + }{ + { + name: "valid integer", + in: "123456", + want: 123456, + wantOk: true, + }, + { + name: "valid integer with more content after", + in: "123456,12345", + }, + { + name: "valid max integer", + in: "999999999999999", + want: 999999999999999, + wantOk: true, + }, + { + name: "valid min integer", + in: "-999999999999999", + want: -999999999999999, + wantOk: true, + }, + { + name: "invalid integer too high", + in: "9999999999999999", + }, + { + name: "invalid integer too low", + in: "-9999999999999999", + }, + { + name: "invalid integer with fraction", + in: "-123456789012.123", + }, + } + + for _, tc := range tests { + got, ok := ParseInteger(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + +func TestParseDecimal(t *testing.T) { + tests := []struct { + name string + in string + want float64 + wantOk bool + }{ + { + name: "valid decimal", + in: "123456.789", + want: 123456.789, + wantOk: true, + }, + { + name: "valid decimal with more content after", + in: "123456.789, 123", + }, + { + name: "invalid decimal with no fraction", + in: "123456", + }, + { + name: "invalid decimal integer component too long", + in: "1234567890123.1", + }, + { + name: "invalid decimal fraction component too long", + in: "1.1234", + }, + { + name: "invalid decimal trailing dot", + in: "1.", + }, + } + + for _, tc := range tests { + got, ok := ParseDecimal(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + func TestConsumeString(t *testing.T) { tests := []struct { name string @@ -708,6 +808,54 @@ func TestConsumeString(t *testing.T) { } } +func TestParseString(t *testing.T) { + tests := []struct { + name string + in string + want string + wantOk bool + }{ + { + name: "valid basic string", + in: `"foo bar"`, + want: "foo bar", + wantOk: true, + }, + { + name: "valid basic string with more content after", + in: `"foo bar", a=3`, + }, + { + name: "valid string with escaped dquote", + in: `"foo bar \""`, + want: `foo bar \"`, + wantOk: true, + }, + { + name: "invalid string no starting dquote", + in: `foo bar"`, + }, + { + name: "invalid string no closing dquote", + in: `"foo bar`, + }, + { + name: "invalid string invalid character", + in: string([]byte{'"', 0x00, '"'}), + }, + } + + for _, tc := range tests { + got, ok := ParseString(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + func TestConsumeToken(t *testing.T) { tests := []struct { name string @@ -747,6 +895,46 @@ func TestConsumeToken(t *testing.T) { } } +func TestParseToken(t *testing.T) { + tests := []struct { + name string + in string + want string + wantOk bool + }{ + { + name: "valid token", + in: "a_b-c.d3:f%00/*", + want: "a_b-c.d3:f%00/*", + wantOk: true, + }, + { + name: "valid token with uppercase", + in: "FOOBAR", + want: "FOOBAR", + wantOk: true, + }, + { + name: "valid token with content after", + in: "FOOBAR, foobar", + }, + { + name: "invalid token", + in: "0invalid", + }, + } + + for _, tc := range tests { + got, ok := ParseToken(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + func TestConsumeByteSequence(t *testing.T) { tests := []struct { name string @@ -794,6 +982,48 @@ func TestConsumeByteSequence(t *testing.T) { } } +func TestParseByteSequence(t *testing.T) { + tests := []struct { + name string + in string + want []byte + wantOk bool + }{ + { + name: "valid byte sequence", + in: ":aGVsbG8gd29ybGQ=:", + want: []byte("aGVsbG8gd29ybGQ="), + wantOk: true, + }, + { + name: "valid byte sequence with more content after", + in: ":aGVsbG8gd29ybGQ=::aGVsbG8gd29ybGQ=:", + }, + { + name: "invalid byte sequence character", + in: ":-:", + }, + { + name: "invalid byte sequence opening", + in: "aGVsbG8gd29ybGQ=:", + }, + { + name: "invalid byte sequence closing", + in: ":aGVsbG8gd29ybGQ=", + }, + } + + for _, tc := range tests { + got, ok := ParseByteSequence(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if !slices.Equal(tc.want, got) { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + func TestConsumeBoolean(t *testing.T) { tests := []struct { name string @@ -833,6 +1063,46 @@ func TestConsumeBoolean(t *testing.T) { } } +func TestParseBoolean(t *testing.T) { + tests := []struct { + name string + in string + want bool + wantOk bool + }{ + { + name: "valid boolean false", + in: "?0", + want: false, + wantOk: true, + }, + { + name: "valid boolean true", + in: "?1", + want: true, + wantOk: true, + }, + { + name: "valid boolean with more content after", + in: "?1, a=1", + }, + { + name: "invalid boolean", + in: "?2", + }, + } + + for _, tc := range tests { + got, ok := ParseBoolean(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + func TestConsumeDate(t *testing.T) { tests := []struct { name string