internal/httpsfv: add functionality to walk Parameters in HTTP SFV.

This change implements the minimum set of functionality within RFC 8491
that is needed in order for us to be able to extract information out of
Parameters type.

Rather than parsing the given Structured Field Values as usual, we
instead allow users to give us functions that will be invoked as we walk
through the SFV. This allows users to still extract information out of
SFV, without incurring significant memory allocation, especially when
the input is large.

If the current API & approach is good, we will proceed further by
implementing walk functionality for the rest of the types within RFC
8491: Dictionary, List, Item, and Inner List. After that, we will also
add support for Date and Display String to fully support RFC 9651.

For golang/go#75500

Change-Id: I838a7267a54fcd64b019be0ac10fe86b1e3e2c8b
Reviewed-on: https://go-review.googlesource.com/c/net/+/706755
Auto-Submit: Nicholas Husin <nsh@golang.org>
Reviewed-by: Nicholas Husin <husin@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Nicholas S. Husin
2025-09-25 16:40:33 -04:00
committed by Gopher Robot
parent 10342476f5
commit c492e3c189
2 changed files with 744 additions and 0 deletions

284
internal/httpsfv/httpsfv.go Normal file
View File

@@ -0,0 +1,284 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package httpsfv provide functionality for dealing with HTTP Structured Field
// Values.
package httpsfv
import (
"slices"
)
func isLCAlpha(b byte) bool {
return (b >= 'a' && b <= 'z')
}
func isAlpha(b byte) bool {
return isLCAlpha(b) || (b >= 'A' && b <= 'Z')
}
func isDigit(b byte) bool {
return b >= '0' && b <= '9'
}
func isVChar(b byte) bool {
return b >= 0x21 && b <= 0x7e
}
func isSP(b byte) bool {
return b == 0x20
}
func isTChar(b byte) bool {
if isAlpha(b) || isDigit(b) {
return true
}
return slices.Contains([]byte{'!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~'}, b)
}
func countLeftWhitespace(s string) int {
i := 0
for _, ch := range []byte(s) {
if ch != ' ' && ch != '\t' {
break
}
i++
}
return i
}
// TODO(nsh): Implement other consume functions that will be needed to fully
// deal with all possible HTTP SFV, specifically:
// - consumeDictionary(s string, f func(key, val, param string)) (consumed, rest string, ok bool)
// For example, given `a=123,b;a="a", i`, ConsumeDictionary will call f() 3 times
// with the following args:
// - key: `a`, val: `123`, param: ``
// - key: `b`, val: ``, param:`;a="a"`
// - key: `i`, val: ``, param: ``
//
// - consumeList(s string, f func(member, param string)) (consumed, rest string, ok bool)
// For example, given `123.456;i, ("foo" "bar"; lvl=2); lvl=1`, ConsumeList will
// call f() 2 times with the following args:
// - member: `123.456`, param: `i`
// - member: `("foo" "bar"; lvl=2)`, param: `; lvl=1`
//
// - consumeItem(s string, f func(bareItem, param string)) (consumed, rest string, ok bool)
// For example, given `"foo"; bar=baz;foo=bar`, ConsumeItem will call f() with
// the following args:
// - bareItem: `"foo"`, param: `; bar=baz;foo=bar`
//
// - consumeInnerList(s string f func(bareItem, param, listParam string)) (consumed, rest string, ok bool)
// For example, given `("foo"; a=1;b=2 "bar";baz;lvl=2);lvl=1`, ConsumeInnerList
// will call f() 2 times with the following args:
// - bareItem: `"foo"`, param: `; a=1;b=2`, listParam: `;lvl=1`
// - bareItem: `"bar"`, param: `;baz;lvl=2`, listParam: `;lvl=1`
// TODO(nsh): Implement corresponding parse functions for all consume functions
// that exists.
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-param.
func consumeParameter(s string, f func(key, val string)) (consumed, rest string, ok bool) {
rest = s
for len(rest) != 0 {
var key, val string
val = "?1" // Default value for empty val is boolean true.
if rest[0] != ';' {
break
}
rest = rest[1:]
if i := countLeftWhitespace(rest); i > 0 {
rest = rest[i:]
}
key, rest, ok = consumeKey(rest)
if !ok {
return "", s, ok
}
if len(rest) != 0 && rest[0] == '=' {
rest = rest[1:]
val, rest, ok = consumeBareItem(rest)
if !ok {
return "", s, ok
}
}
if f != nil {
f(key, val)
}
}
return s[:len(s)-len(rest)], rest, true
}
// ParseParameter is used to parse a string that represents a parameter in an
// HTTP Structured Field Values.
//
// Given a string that represents a parameter, it will call the given function
// using each of the keys and values contained in the parameter. This allows
// the caller to extract information out of the parameter.
//
// This function will return once it encounters the end of the string, or
// something that is not a parameter. If it cannot consume the entire given
// string, the ok value returned will be false.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-param.
func ParseParameter(s string, f func(key, val string)) (ok bool) {
_, rest, ok := consumeParameter(s, f)
if rest != "" {
return false
}
return ok
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-key.
func consumeKey(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || (!isLCAlpha(s[0]) && s[0] != '*') {
return "", s, false
}
i := 0
for _, ch := range []byte(s) {
if !isLCAlpha(ch) && !isDigit(ch) && !slices.Contains([]byte("_-.*"), ch) {
break
}
i++
}
return s[:i], s[i:], true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-integer-or-decim.
func consumeIntegerOrDecimal(s string) (consumed, rest string, ok bool) {
var i, signOffset, periodIndex int
var isDecimal bool
if i < len(s) && s[i] == '-' {
i++
signOffset++
}
if i >= len(s) {
return "", s, false
}
if !isDigit(s[i]) {
return "", s, false
}
for i < len(s) {
ch := s[i]
if isDigit(ch) {
i++
continue
}
if !isDecimal && ch == '.' {
if i-signOffset > 12 {
return "", s, false
}
periodIndex = i
isDecimal = true
i++
continue
}
break
}
if !isDecimal && i-signOffset > 15 {
return "", s, false
}
if isDecimal {
if i-signOffset > 16 {
return "", s, false
}
if s[i-1] == '.' {
return "", s, false
}
if i-periodIndex-1 > 3 {
return "", s, false
}
}
return s[:i], s[i:], true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-string.
func consumeString(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '"' {
return "", s, false
}
for i := 1; i < len(s); i++ {
switch ch := s[i]; ch {
case '\\':
if i+1 >= len(s) {
return "", s, false
}
i++
if ch = s[i]; ch != '"' && ch != '\\' {
return "", s, false
}
case '"':
return s[:i+1], s[i+1:], true
default:
if !isVChar(ch) && !isSP(ch) {
return "", s, false
}
}
}
return "", s, false
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-token
func consumeToken(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || (!isAlpha(s[0]) && s[0] != '*') {
return "", s, false
}
i := 0
for _, ch := range []byte(s) {
if !isTChar(ch) && !slices.Contains([]byte(":/"), ch) {
break
}
i++
}
return s[:i], s[i:], true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-byte-sequence.
func consumeByteSequence(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != ':' {
return "", s, false
}
for i := 1; i < len(s); i++ {
if ch := s[i]; ch == ':' {
return s[:i+1], s[i+1:], true
}
if ch := s[i]; !isAlpha(ch) && !isDigit(ch) && !slices.Contains([]byte("+/="), ch) {
return "", s, false
}
}
return "", s, false
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-boolean.
func consumeBoolean(s string) (consumed, rest string, ok bool) {
if len(s) >= 2 && (s[:2] == "?0" || s[:2] == "?1") {
return s[:2], s[2:], true
}
return "", s, false
}
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
func consumeBareItem(s string) (consumed, rest string, ok bool) {
if len(s) == 0 {
return "", s, false
}
// TODO(nsh): This is currently only up to date with RFC 8941. Implement
// Date and Display string for full feature parity with RFC 9651.
ch := s[0]
switch {
case ch == '-' || isDigit(ch):
return consumeIntegerOrDecimal(s)
case ch == '"':
return consumeString(s)
case ch == '*' || isAlpha(ch):
return consumeToken(s)
case ch == ':':
return consumeByteSequence(s)
case ch == '?':
return consumeBoolean(s)
default:
return "", s, false
}
}

View File

@@ -0,0 +1,460 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package httpsfv
import (
"strconv"
"strings"
"testing"
)
func TestConsumeParameter(t *testing.T) {
tests := []struct {
name string
in string
want any
wantOk bool
}{
{
name: "valid string",
in: `;parameter;want="wantvalue"`,
want: "wantvalue",
wantOk: true,
},
{
name: "valid integer",
in: `;parameter;want=123456;something`,
want: 123456,
wantOk: true,
},
{
name: "valid decimal",
in: `;parameter;want=3.14;something`,
want: 3.14,
wantOk: true,
},
{
name: "valid implicit bool",
in: `;parameter;want;something`,
want: true,
wantOk: true,
},
{
name: "valid token",
in: `;want=*atoken;something`,
want: "*atoken",
wantOk: true,
},
{
name: "valid byte sequence",
in: `;want=:eWF5Cg==:;something`,
want: "eWF5Cg==",
wantOk: true,
},
{
name: "valid repeated key",
in: `;want=:eWF5Cg==:;now;want=1;is;repeated;want="overwritten!"`,
want: "overwritten!",
wantOk: true,
},
{
name: "valid parameter with content after",
in: `;want=:eWF5Cg==:;now;want=1;is;repeated;want="overwritten!", some=stuff`,
want: "overwritten!",
wantOk: true,
},
{
name: "invalid parameter",
in: `;UPPERCASEKEY=NOT_ACCEPTED`,
},
}
for _, tc := range tests[len(tests)-1:] {
var got any
f := func(key, val string) {
if key != "want" {
return
}
switch {
case strings.HasPrefix(val, "?"): // Bool
got = val == "?1"
case strings.HasPrefix(val, `"`): // String
got = val[1 : len(val)-1]
case strings.HasPrefix(val, "*"): // Token
got = val
case strings.HasPrefix(val, ":"): // Byte sequence
got = val[1 : len(val)-1]
default:
if valConv, err := strconv.Atoi(val); err == nil { // Integer
got = valConv
return
}
if valConv, err := strconv.ParseFloat(val, 64); err == nil { // Float
got = valConv
return
}
}
}
consumed, rest, ok := consumeParameter(tc.in, f)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if got != tc.want {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if consumed+rest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, rest, tc.in)
}
}
}
func TestParseParameter(t *testing.T) {
tests := []struct {
name string
in string
want any
wantOk bool
}{
{
name: "valid parameter",
in: `;parameter;want="wantvalue"`,
want: "wantvalue",
wantOk: true,
},
{
name: "valid parameter with content after",
in: `;want=:eWF5Cg==:;now;want=1;is;repeated;want="overwritten!", some=stuff`,
want: "overwritten!",
},
{
name: "invalid parameter",
in: `;UPPERCASEKEY=NOT_ACCEPTED`,
},
}
for _, tc := range tests[len(tests)-1:] {
var got any
f := func(key, val string) {
if key != "want" {
return
}
switch {
case strings.HasPrefix(val, "?"): // Bool
got = val == "?1"
case strings.HasPrefix(val, `"`): // String
got = val[1 : len(val)-1]
case strings.HasPrefix(val, "*"): // Token
got = val
case strings.HasPrefix(val, ":"): // Byte sequence
got = val[1 : len(val)-1]
default:
if valConv, err := strconv.Atoi(val); err == nil { // Integer
got = valConv
return
}
if valConv, err := strconv.ParseFloat(val, 64); err == nil { // Float
got = valConv
return
}
}
}
ok := ParseParameter(tc.in, f)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if got != tc.want {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeKey(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid basic key",
in: `fookey`,
want: `fookey`,
wantOk: true,
},
{
name: "valid basic key with more content after",
in: `fookey,u=7`,
want: `fookey`,
wantOk: true,
},
{
name: "invalid key",
in: `1keycannotstartwithnum`,
},
}
for _, tc := range tests {
got, gotRest, ok := consumeKey(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}
func TestConsumeIntegerOrDecimal(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid integer",
in: "123456",
want: "123456",
wantOk: true,
},
{
name: "valid integer with more content after",
in: "123456,12345",
want: "123456",
wantOk: true,
},
{
name: "valid max integer",
in: "999999999999999",
want: "999999999999999",
wantOk: true,
},
{
name: "valid min integer",
in: "-999999999999999",
want: "-999999999999999",
wantOk: true,
},
{
name: "invalid integer too high",
in: "9999999999999999",
},
{
name: "invalid integer too low",
in: "-9999999999999999",
},
{
name: "valid decimal",
in: "-123456789012.123",
want: "-123456789012.123",
wantOk: true,
},
{
name: "invalid decimal integer component too long",
in: "1234567890123.1",
},
{
name: "invalid decimal fraction component too long",
in: "1.1234",
},
{
name: "invalid decimal trailing dot",
in: "1.",
},
}
for _, tc := range tests {
got, gotRest, ok := consumeIntegerOrDecimal(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}
func TestConsumeString(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid basic string",
in: `"foo bar"`,
want: `"foo bar"`,
wantOk: true,
},
{
name: "valid basic string with more content after",
in: `"foo bar", a=3`,
want: `"foo bar"`,
wantOk: true,
},
{
name: "valid string with escaped dquote",
in: `"foo bar \""`,
want: `"foo bar \""`,
wantOk: true,
},
{
name: "invalid string no starting dquote",
in: `foo bar"`,
},
{
name: "invalid string no closing dquote",
in: `"foo bar`,
},
{
name: "invalid string invalid character",
in: string([]byte{'"', 0x00, '"'}),
},
}
for _, tc := range tests {
got, gotRest, ok := consumeString(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}
func TestConsumeToken(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid token",
in: "*atoken",
want: "*atoken",
wantOk: true,
},
{
name: "valid token with more content after",
in: "*atoken something",
want: "*atoken",
wantOk: true,
},
{
name: "invalid token",
in: "0invalid",
},
}
for _, tc := range tests {
got, gotRest, ok := consumeToken(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}
func TestConsumeByteSequence(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid byte sequence",
in: ":aGVsbG8gd29ybGQ=:",
want: ":aGVsbG8gd29ybGQ=:",
wantOk: true,
},
{
name: "valid byte sequence with more content after",
in: ":aGVsbG8gd29ybGQ=::aGVsbG8gd29ybGQ=:",
want: ":aGVsbG8gd29ybGQ=:",
wantOk: true,
},
{
name: "invalid byte sequence character",
in: ":-:",
},
{
name: "invalid byte sequence opening",
in: "aGVsbG8gd29ybGQ=:",
},
{
name: "invalid byte sequence closing",
in: ":aGVsbG8gd29ybGQ=",
},
}
for _, tc := range tests {
got, gotRest, ok := consumeByteSequence(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}
func TestConsumeBoolean(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid boolean",
in: "?0",
want: "?0",
wantOk: true,
},
{
name: "valid boolean with more content after",
in: "?1, a=1",
want: "?1",
wantOk: true,
},
{
name: "invalid boolean",
in: "!2",
},
}
for _, tc := range tests {
got, gotRest, ok := consumeBoolean(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}