internal/httpsfv: implement parsing support for date and display string

This change adds support for parsing date and display string, meaning
this package can now fully parse any HTTP SFV that is compliant with
RFC 9651.

This package is still intended only for internal use at this point.

For golang/go#75500

Change-Id: I07626b45f01e0c5cb4e92aa3fea04cc7e2d0c814
Reviewed-on: https://go-review.googlesource.com/c/net/+/708437
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Nicholas S. Husin
2025-10-02 10:16:18 -04:00
committed by Nicholas Husin
parent edb764c229
commit b2ab3712bc
2 changed files with 211 additions and 3 deletions

View File

@@ -10,6 +10,7 @@ import (
"slices"
"strconv"
"strings"
"time"
"unicode/utf8"
)
@@ -72,9 +73,6 @@ func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) {
return ch1<<4 | ch2, true
}
// TODO(nsh): Implement parse functions for date and display string to make
// this package fully support parsing RFC 9651-compliant HTTP SFV.
// ParseList parses a list from a given HTTP Structured Field Values.
//
// Given an HTTP SFV string that represents a list, it will call the given
@@ -534,6 +532,23 @@ func consumeDate(s string) (consumed, rest string, ok bool) {
return consumed, rest, ok
}
// ParseDate parses a date from a given HTTP Structured Field Values.
//
// The entire HTTP SFV string must consist of a valid date. It returns the
// parsed date and an ok boolean value, indicating success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date.
func ParseDate(s string) (parsed time.Time, ok bool) {
if _, rest, ok := consumeDate(s); !ok || rest != "" {
return time.Time{}, false
}
if n, ok := ParseInteger(s[1:]); !ok {
return time.Time{}, false
} else {
return time.Unix(n, 0), true
}
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string.
func consumeDisplayString(s string) (consumed, rest string, ok bool) {
// To prevent excessive allocation, especially when input is large, we
@@ -593,6 +608,36 @@ func consumeDisplayString(s string) (consumed, rest string, ok bool) {
return "", s, false
}
// ParseDisplayString parses a display string from a given HTTP Structured
// Field Values.
//
// The entire HTTP SFV string must consist of a valid display string. It
// returns the parsed display string and an ok boolean value, indicating
// success or not.
//
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string.
func ParseDisplayString(s string) (parsed string, ok bool) {
if _, rest, ok := consumeDisplayString(s); !ok || rest != "" {
return "", false
}
// consumeDisplayString() already validates that we have a valid display
// string. Therefore, we can just construct the display string, without
// validating it again.
s = s[2 : len(s)-1]
var b strings.Builder
for i := 0; i < len(s); {
if s[i] == '%' {
decoded, _ := decOctetHex(s[i+1], s[i+2])
b.WriteByte(decoded)
i += 3
continue
}
b.WriteByte(s[i])
i++
}
return b.String(), true
}
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
func consumeBareItem(s string) (consumed, rest string, ok bool) {
if len(s) == 0 {

View File

@@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"testing"
"time"
)
func TestParseList(t *testing.T) {
@@ -1166,6 +1167,64 @@ func TestConsumeDate(t *testing.T) {
}
}
func TestParseDate(t *testing.T) {
tests := []struct {
name string
in string
want time.Time
wantOk bool
}{
{
name: "valid zero date",
in: "@0",
want: time.Unix(0, 0),
wantOk: true,
},
{
name: "valid positive date",
in: "@1659578233",
want: time.Date(2022, 8, 4, 1, 57, 13, 0, time.UTC).Local(),
wantOk: true,
},
{
name: "valid negative date",
in: "@-1659578233",
want: time.Date(1917, 5, 30, 22, 2, 47, 0, time.UTC).Local(),
wantOk: true,
},
{
name: "valid max date required",
in: "@253402214400",
want: time.Date(9999, 12, 31, 0, 0, 0, 0, time.UTC).Local(),
wantOk: true,
},
{
name: "valid min date required",
in: "@-62135596800",
want: time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC).Local(),
wantOk: true,
},
{
name: "invalid date with fraction",
in: "@0.123",
},
{
name: "valid date with more content after",
in: "@0, @0",
},
}
for _, tc := range tests {
got, ok := ParseDate(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}
func TestConsumeDisplayString(t *testing.T) {
tests := []struct {
name string
@@ -1274,3 +1333,107 @@ func TestConsumeDisplayString(t *testing.T) {
}
}
}
func TestParseDisplayString(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid ascii string",
in: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
want: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
wantOk: true,
},
{
name: "valid lowercase non-ascii string",
in: `%"f%c3%bc%c3%bc"`,
want: "füü",
wantOk: true,
},
{
name: "invalid uppercase non-ascii string",
in: `%"f%C3%BC%C3%BC"`,
},
{
name: "invalid unqouted string",
in: "%foo",
},
{
name: "invalid string missing initial quote",
in: `%foo"`,
},
{
name: "invalid string missing closing quote",
in: `%"foo`,
},
{
name: "invalid tab in string",
in: "%\"\t\"",
},
{
name: "invalid newline in string",
in: "%\"\n\"",
},
{
name: "invalid single quoted string",
in: `%'foo'`,
},
{
name: "invalid string bad escaping",
in: `%\"foo %a"`,
},
{
name: "valid string with escaped quotes",
in: "%\"foo %22bar%22 \\ baz\"",
want: "foo \"bar\" \\ baz",
wantOk: true,
},
{
name: "invalid sequence id utf-8 string",
in: `%"%a0%a1"`,
},
{
name: "invalid 2 bytes sequence utf-8 string",
in: `%"%c3%28"`,
},
{
name: "invalid 3 bytes sequence utf-8 string",
in: `%"%e2%28%a1"`,
},
{
name: "invalid 4 bytes sequence utf-8 string",
in: `%"%f0%28%8c%28"`,
},
{
name: "invalid hex utf-8 string",
in: `%"%g0%1w"`,
},
{
name: "valid byte order mark in display string",
in: `%"BOM: %ef%bb%bf"`,
want: "BOM: \uFEFF",
wantOk: true,
},
{
name: "valid string with content after",
in: `%"foo\nbar", foo;bar`,
},
{
name: "invalid unfinished 4 bytes rune",
in: `%"%f0%9f%98"`,
},
}
for _, tc := range tests {
got, ok := ParseDisplayString(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
}
}