internal/httpsfv: add support for consuming Display String and Date type

This CL adds consumeDisplayString() and consumeDate() function, meaning
that we can now consume all types that are defined within RFC 9651. In
future CL, we will add the corresponding parsing function for all the
types, so callers of this package will not have to implement their own
parsing / formatting.

For golang/go#75500

Change-Id: I90aa132d3ab1385b310d821997da13a095cd71bc
Reviewed-on: https://go-review.googlesource.com/c/net/+/708015
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Nicholas Husin <husin@google.com>
This commit is contained in:
Nicholas S. Husin
2025-09-30 13:09:40 -04:00
committed by Nicholas Husin
parent 47a241fc51
commit fbba2c22cb
2 changed files with 272 additions and 3 deletions

View File

@@ -8,6 +8,7 @@ package httpsfv
import (
"slices"
"unicode/utf8"
)
func isLCAlpha(b byte) bool {
@@ -48,6 +49,27 @@ func countLeftWhitespace(s string) int {
return i
}
// https://www.rfc-editor.org/rfc/rfc4648#section-8.
func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) {
decBase16 := func(in byte) (out byte, ok bool) {
if !isDigit(in) && !(in >= 'a' && in <= 'f') {
return 0, false
}
if isDigit(in) {
return in - '0', true
}
return in - 'a' + 10, true
}
if ch1, ok = decBase16(ch1); !ok {
return 0, ok
}
if ch2, ok = decBase16(ch2); !ok {
return 0, ok
}
return ch1<<4 | ch2, true
}
// TODO(nsh): Implement corresponding parse functions for all consume functions
// that exists.
@@ -409,14 +431,85 @@ func consumeBoolean(s string) (consumed, rest string, ok bool) {
return "", s, false
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date.
func consumeDate(s string) (consumed, rest string, ok bool) {
if len(s) == 0 || s[0] != '@' {
return "", s, false
}
if _, rest, ok = consumeIntegerOrDecimal(s[1:]); !ok {
return "", s, ok
}
consumed = s[:len(s)-len(rest)]
if slices.Contains([]byte(consumed), '.') {
return "", s, false
}
return consumed, rest, ok
}
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string.
func consumeDisplayString(s string) (consumed, rest string, ok bool) {
// To prevent excessive allocation, especially when input is large, we
// maintain a buffer of 4 bytes to keep track of the last rune we
// encounter. This way, we can validate that the display string conforms to
// UTF-8 without actually building the whole string.
var lastRune [4]byte
var runeLen int
isPartOfValidRune := func(ch byte) bool {
lastRune[runeLen] = ch
runeLen++
if utf8.FullRune(lastRune[:runeLen]) {
r, s := utf8.DecodeRune(lastRune[:runeLen])
if r == utf8.RuneError {
return false
}
copy(lastRune[:], lastRune[s:runeLen])
runeLen -= s
return true
}
return runeLen <= 4
}
if len(s) <= 1 || s[:2] != `%"` {
return "", s, false
}
i := 2
for i < len(s) {
ch := s[i]
if !isVChar(ch) && !isSP(ch) {
return "", s, false
}
switch ch {
case '"':
if runeLen > 0 {
return "", s, false
}
return s[:i+1], s[i+1:], true
case '%':
if i+2 >= len(s) {
return "", s, false
}
if ch, ok = decOctetHex(s[i+1], s[i+2]); !ok {
return "", s, ok
}
if ok = isPartOfValidRune(ch); !ok {
return "", s, ok
}
i += 3
default:
if ok = isPartOfValidRune(ch); !ok {
return "", s, ok
}
i++
}
}
return "", s, false
}
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
func consumeBareItem(s string) (consumed, rest string, ok bool) {
if len(s) == 0 {
return "", s, false
}
// TODO(nsh): This is currently only up to date with RFC 8941. Implement
// Date and Display string for full feature parity with RFC 9651.
ch := s[0]
switch {
case ch == '-' || isDigit(ch):
@@ -429,6 +522,10 @@ func consumeBareItem(s string) (consumed, rest string, ok bool) {
return consumeByteSequence(s)
case ch == '?':
return consumeBoolean(s)
case ch == '@':
return consumeDate(s)
case ch == '%':
return consumeDisplayString(s)
default:
return "", s, false
}

View File

@@ -832,3 +832,175 @@ func TestConsumeBoolean(t *testing.T) {
}
}
}
func TestConsumeDate(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid zero date",
in: "@0",
want: "@0",
wantOk: true,
},
{
name: "valid positive date",
in: "@1659578233",
want: "@1659578233",
wantOk: true,
},
{
name: "valid negative date",
in: "@-1659578233",
want: "@-1659578233",
wantOk: true,
},
{
name: "valid large date",
in: "@25340221440",
want: "@25340221440",
wantOk: true,
},
{
name: "valid small date",
in: "@-62135596800",
want: "@-62135596800",
wantOk: true,
},
{
name: "invalid decimal date",
in: "@1.2",
},
{
name: "valid date with more content after",
in: "@1659578233, foo;bar",
want: "@1659578233",
wantOk: true,
},
}
for _, tc := range tests {
got, gotRest, ok := consumeDate(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}
func TestConsumeDisplayString(t *testing.T) {
tests := []struct {
name string
in string
want string
wantOk bool
}{
{
name: "valid ascii string",
in: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
want: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
wantOk: true,
},
{
name: "valid lowercase non-ascii string",
in: `%"f%c3%bc%c3%bc"`,
want: `%"f%c3%bc%c3%bc"`,
wantOk: true,
},
{
name: "invalid uppercase non-ascii string",
in: `%"f%C3%BC%C3%BC"`,
},
{
name: "invalid unqouted string",
in: "%foo",
},
{
name: "invalid string missing initial quote",
in: `%foo"`,
},
{
name: "invalid string missing closing quote",
in: `%"foo`,
},
{
name: "invalid tab in string",
in: "%\"\t\"",
},
{
name: "invalid newline in string",
in: "%\"\n\"",
},
{
name: "invalid single quoted string",
in: `%'foo'`,
},
{
name: "invalid string bad escaping",
in: `%\"foo %a"`,
},
{
name: "valid string with escaped quotes",
in: `%"foo %22bar%22 \\ baz"`,
want: `%"foo %22bar%22 \\ baz"`,
wantOk: true,
},
{
name: "invalid sequence id utf-8 string",
in: `%"%a0%a1"`,
},
{
name: "invalid 2 bytes sequence utf-8 string",
in: `%"%c3%28"`,
},
{
name: "invalid 3 bytes sequence utf-8 string",
in: `%"%e2%28%a1"`,
},
{
name: "invalid 4 bytes sequence utf-8 string",
in: `%"%f0%28%8c%28"`,
},
{
name: "invalid hex utf-8 string",
in: `%"%g0%1w"`,
},
{
name: "valid byte order mark in display string",
in: `%"BOM: %ef%bb%bf"`,
want: `%"BOM: %ef%bb%bf"`,
wantOk: true,
},
{
name: "valid string with content after",
in: `%"foo\nbar", foo;bar`,
want: `%"foo\nbar"`,
wantOk: true,
},
{
name: "invalid unfinished 4 bytes rune",
in: `%"%f0%9f%98"`,
},
}
for _, tc := range tests {
got, gotRest, ok := consumeDisplayString(tc.in)
if ok != tc.wantOk {
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
}
if tc.want != got {
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
}
if got+gotRest != tc.in {
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
}
}
}