From b2ab3712bcebf329a84241fdd92c9210dc40e6f1 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Husin" Date: Thu, 2 Oct 2025 10:16:18 -0400 Subject: [PATCH] internal/httpsfv: implement parsing support for date and display string This change adds support for parsing date and display string, meaning this package can now fully parse any HTTP SFV that is compliant with RFC 9651. This package is still intended only for internal use at this point. For golang/go#75500 Change-Id: I07626b45f01e0c5cb4e92aa3fea04cc7e2d0c814 Reviewed-on: https://go-review.googlesource.com/c/net/+/708437 Reviewed-by: Damien Neil Reviewed-by: Carlos Amedee LUCI-TryBot-Result: Go LUCI --- internal/httpsfv/httpsfv.go | 51 +++++++++- internal/httpsfv/httpsfv_test.go | 163 +++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 3 deletions(-) diff --git a/internal/httpsfv/httpsfv.go b/internal/httpsfv/httpsfv.go index f8e2fcc0..4ae2ca5b 100644 --- a/internal/httpsfv/httpsfv.go +++ b/internal/httpsfv/httpsfv.go @@ -10,6 +10,7 @@ import ( "slices" "strconv" "strings" + "time" "unicode/utf8" ) @@ -72,9 +73,6 @@ func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) { return ch1<<4 | ch2, true } -// TODO(nsh): Implement parse functions for date and display string to make -// this package fully support parsing RFC 9651-compliant HTTP SFV. - // ParseList parses a list from a given HTTP Structured Field Values. // // Given an HTTP SFV string that represents a list, it will call the given @@ -534,6 +532,23 @@ func consumeDate(s string) (consumed, rest string, ok bool) { return consumed, rest, ok } +// ParseDate parses a date from a given HTTP Structured Field Values. +// +// The entire HTTP SFV string must consist of a valid date. It returns the +// parsed date and an ok boolean value, indicating success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date. +func ParseDate(s string) (parsed time.Time, ok bool) { + if _, rest, ok := consumeDate(s); !ok || rest != "" { + return time.Time{}, false + } + if n, ok := ParseInteger(s[1:]); !ok { + return time.Time{}, false + } else { + return time.Unix(n, 0), true + } +} + // https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string. func consumeDisplayString(s string) (consumed, rest string, ok bool) { // To prevent excessive allocation, especially when input is large, we @@ -593,6 +608,36 @@ func consumeDisplayString(s string) (consumed, rest string, ok bool) { return "", s, false } +// ParseDisplayString parses a display string from a given HTTP Structured +// Field Values. +// +// The entire HTTP SFV string must consist of a valid display string. It +// returns the parsed display string and an ok boolean value, indicating +// success or not. +// +// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string. +func ParseDisplayString(s string) (parsed string, ok bool) { + if _, rest, ok := consumeDisplayString(s); !ok || rest != "" { + return "", false + } + // consumeDisplayString() already validates that we have a valid display + // string. Therefore, we can just construct the display string, without + // validating it again. + s = s[2 : len(s)-1] + var b strings.Builder + for i := 0; i < len(s); { + if s[i] == '%' { + decoded, _ := decOctetHex(s[i+1], s[i+2]) + b.WriteByte(decoded) + i += 3 + continue + } + b.WriteByte(s[i]) + i++ + } + return b.String(), true +} + // https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item. func consumeBareItem(s string) (consumed, rest string, ok bool) { if len(s) == 0 { diff --git a/internal/httpsfv/httpsfv_test.go b/internal/httpsfv/httpsfv_test.go index 9e004e09..6ccd9066 100644 --- a/internal/httpsfv/httpsfv_test.go +++ b/internal/httpsfv/httpsfv_test.go @@ -8,6 +8,7 @@ import ( "strconv" "strings" "testing" + "time" ) func TestParseList(t *testing.T) { @@ -1166,6 +1167,64 @@ func TestConsumeDate(t *testing.T) { } } +func TestParseDate(t *testing.T) { + tests := []struct { + name string + in string + want time.Time + wantOk bool + }{ + { + name: "valid zero date", + in: "@0", + want: time.Unix(0, 0), + wantOk: true, + }, + { + name: "valid positive date", + in: "@1659578233", + want: time.Date(2022, 8, 4, 1, 57, 13, 0, time.UTC).Local(), + wantOk: true, + }, + { + name: "valid negative date", + in: "@-1659578233", + want: time.Date(1917, 5, 30, 22, 2, 47, 0, time.UTC).Local(), + wantOk: true, + }, + { + name: "valid max date required", + in: "@253402214400", + want: time.Date(9999, 12, 31, 0, 0, 0, 0, time.UTC).Local(), + wantOk: true, + }, + { + name: "valid min date required", + in: "@-62135596800", + want: time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC).Local(), + wantOk: true, + }, + { + name: "invalid date with fraction", + in: "@0.123", + }, + { + name: "valid date with more content after", + in: "@0, @0", + }, + } + + for _, tc := range tests { + got, ok := ParseDate(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +} + func TestConsumeDisplayString(t *testing.T) { tests := []struct { name string @@ -1274,3 +1333,107 @@ func TestConsumeDisplayString(t *testing.T) { } } } + +func TestParseDisplayString(t *testing.T) { + tests := []struct { + name string + in string + want string + wantOk bool + }{ + { + name: "valid ascii string", + in: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"", + want: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", + wantOk: true, + }, + { + name: "valid lowercase non-ascii string", + in: `%"f%c3%bc%c3%bc"`, + want: "füü", + wantOk: true, + }, + { + name: "invalid uppercase non-ascii string", + in: `%"f%C3%BC%C3%BC"`, + }, + { + name: "invalid unqouted string", + in: "%foo", + }, + { + name: "invalid string missing initial quote", + in: `%foo"`, + }, + { + name: "invalid string missing closing quote", + in: `%"foo`, + }, + { + name: "invalid tab in string", + in: "%\"\t\"", + }, + { + name: "invalid newline in string", + in: "%\"\n\"", + }, + { + name: "invalid single quoted string", + in: `%'foo'`, + }, + { + name: "invalid string bad escaping", + in: `%\"foo %a"`, + }, + { + name: "valid string with escaped quotes", + in: "%\"foo %22bar%22 \\ baz\"", + want: "foo \"bar\" \\ baz", + wantOk: true, + }, + { + name: "invalid sequence id utf-8 string", + in: `%"%a0%a1"`, + }, + { + name: "invalid 2 bytes sequence utf-8 string", + in: `%"%c3%28"`, + }, + { + name: "invalid 3 bytes sequence utf-8 string", + in: `%"%e2%28%a1"`, + }, + { + name: "invalid 4 bytes sequence utf-8 string", + in: `%"%f0%28%8c%28"`, + }, + { + name: "invalid hex utf-8 string", + in: `%"%g0%1w"`, + }, + { + name: "valid byte order mark in display string", + in: `%"BOM: %ef%bb%bf"`, + want: "BOM: \uFEFF", + wantOk: true, + }, + { + name: "valid string with content after", + in: `%"foo\nbar", foo;bar`, + }, + { + name: "invalid unfinished 4 bytes rune", + in: `%"%f0%9f%98"`, + }, + } + + for _, tc := range tests { + got, ok := ParseDisplayString(tc.in) + if ok != tc.wantOk { + t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok) + } + if tc.want != got { + t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want) + } + } +}