mirror of
https://github.com/golang/net.git
synced 2026-03-31 18:37:08 +09:00
internal/httpsfv: add support for consuming Display String and Date type
This CL adds consumeDisplayString() and consumeDate() function, meaning that we can now consume all types that are defined within RFC 9651. In future CL, we will add the corresponding parsing function for all the types, so callers of this package will not have to implement their own parsing / formatting. For golang/go#75500 Change-Id: I90aa132d3ab1385b310d821997da13a095cd71bc Reviewed-on: https://go-review.googlesource.com/c/net/+/708015 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Damien Neil <dneil@google.com> Reviewed-by: Nicholas Husin <husin@google.com>
This commit is contained in:
committed by
Nicholas Husin
parent
47a241fc51
commit
fbba2c22cb
@@ -8,6 +8,7 @@ package httpsfv
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func isLCAlpha(b byte) bool {
|
||||
@@ -48,6 +49,27 @@ func countLeftWhitespace(s string) int {
|
||||
return i
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc4648#section-8.
|
||||
func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) {
|
||||
decBase16 := func(in byte) (out byte, ok bool) {
|
||||
if !isDigit(in) && !(in >= 'a' && in <= 'f') {
|
||||
return 0, false
|
||||
}
|
||||
if isDigit(in) {
|
||||
return in - '0', true
|
||||
}
|
||||
return in - 'a' + 10, true
|
||||
}
|
||||
|
||||
if ch1, ok = decBase16(ch1); !ok {
|
||||
return 0, ok
|
||||
}
|
||||
if ch2, ok = decBase16(ch2); !ok {
|
||||
return 0, ok
|
||||
}
|
||||
return ch1<<4 | ch2, true
|
||||
}
|
||||
|
||||
// TODO(nsh): Implement corresponding parse functions for all consume functions
|
||||
// that exists.
|
||||
|
||||
@@ -409,14 +431,85 @@ func consumeBoolean(s string) (consumed, rest string, ok bool) {
|
||||
return "", s, false
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date.
|
||||
func consumeDate(s string) (consumed, rest string, ok bool) {
|
||||
if len(s) == 0 || s[0] != '@' {
|
||||
return "", s, false
|
||||
}
|
||||
if _, rest, ok = consumeIntegerOrDecimal(s[1:]); !ok {
|
||||
return "", s, ok
|
||||
}
|
||||
consumed = s[:len(s)-len(rest)]
|
||||
if slices.Contains([]byte(consumed), '.') {
|
||||
return "", s, false
|
||||
}
|
||||
return consumed, rest, ok
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string.
|
||||
func consumeDisplayString(s string) (consumed, rest string, ok bool) {
|
||||
// To prevent excessive allocation, especially when input is large, we
|
||||
// maintain a buffer of 4 bytes to keep track of the last rune we
|
||||
// encounter. This way, we can validate that the display string conforms to
|
||||
// UTF-8 without actually building the whole string.
|
||||
var lastRune [4]byte
|
||||
var runeLen int
|
||||
isPartOfValidRune := func(ch byte) bool {
|
||||
lastRune[runeLen] = ch
|
||||
runeLen++
|
||||
if utf8.FullRune(lastRune[:runeLen]) {
|
||||
r, s := utf8.DecodeRune(lastRune[:runeLen])
|
||||
if r == utf8.RuneError {
|
||||
return false
|
||||
}
|
||||
copy(lastRune[:], lastRune[s:runeLen])
|
||||
runeLen -= s
|
||||
return true
|
||||
}
|
||||
return runeLen <= 4
|
||||
}
|
||||
|
||||
if len(s) <= 1 || s[:2] != `%"` {
|
||||
return "", s, false
|
||||
}
|
||||
i := 2
|
||||
for i < len(s) {
|
||||
ch := s[i]
|
||||
if !isVChar(ch) && !isSP(ch) {
|
||||
return "", s, false
|
||||
}
|
||||
switch ch {
|
||||
case '"':
|
||||
if runeLen > 0 {
|
||||
return "", s, false
|
||||
}
|
||||
return s[:i+1], s[i+1:], true
|
||||
case '%':
|
||||
if i+2 >= len(s) {
|
||||
return "", s, false
|
||||
}
|
||||
if ch, ok = decOctetHex(s[i+1], s[i+2]); !ok {
|
||||
return "", s, ok
|
||||
}
|
||||
if ok = isPartOfValidRune(ch); !ok {
|
||||
return "", s, ok
|
||||
}
|
||||
i += 3
|
||||
default:
|
||||
if ok = isPartOfValidRune(ch); !ok {
|
||||
return "", s, ok
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
return "", s, false
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
|
||||
func consumeBareItem(s string) (consumed, rest string, ok bool) {
|
||||
if len(s) == 0 {
|
||||
return "", s, false
|
||||
}
|
||||
|
||||
// TODO(nsh): This is currently only up to date with RFC 8941. Implement
|
||||
// Date and Display string for full feature parity with RFC 9651.
|
||||
ch := s[0]
|
||||
switch {
|
||||
case ch == '-' || isDigit(ch):
|
||||
@@ -429,6 +522,10 @@ func consumeBareItem(s string) (consumed, rest string, ok bool) {
|
||||
return consumeByteSequence(s)
|
||||
case ch == '?':
|
||||
return consumeBoolean(s)
|
||||
case ch == '@':
|
||||
return consumeDate(s)
|
||||
case ch == '%':
|
||||
return consumeDisplayString(s)
|
||||
default:
|
||||
return "", s, false
|
||||
}
|
||||
|
||||
@@ -832,3 +832,175 @@ func TestConsumeBoolean(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumeDate(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
wantOk bool
|
||||
}{
|
||||
{
|
||||
name: "valid zero date",
|
||||
in: "@0",
|
||||
want: "@0",
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "valid positive date",
|
||||
in: "@1659578233",
|
||||
want: "@1659578233",
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "valid negative date",
|
||||
in: "@-1659578233",
|
||||
want: "@-1659578233",
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "valid large date",
|
||||
in: "@25340221440",
|
||||
want: "@25340221440",
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "valid small date",
|
||||
in: "@-62135596800",
|
||||
want: "@-62135596800",
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "invalid decimal date",
|
||||
in: "@1.2",
|
||||
},
|
||||
{
|
||||
name: "valid date with more content after",
|
||||
in: "@1659578233, foo;bar",
|
||||
want: "@1659578233",
|
||||
wantOk: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
got, gotRest, ok := consumeDate(tc.in)
|
||||
if ok != tc.wantOk {
|
||||
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
|
||||
}
|
||||
if tc.want != got {
|
||||
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
|
||||
}
|
||||
if got+gotRest != tc.in {
|
||||
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumeDisplayString(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
wantOk bool
|
||||
}{
|
||||
{
|
||||
name: "valid ascii string",
|
||||
in: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
|
||||
want: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "valid lowercase non-ascii string",
|
||||
in: `%"f%c3%bc%c3%bc"`,
|
||||
want: `%"f%c3%bc%c3%bc"`,
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "invalid uppercase non-ascii string",
|
||||
in: `%"f%C3%BC%C3%BC"`,
|
||||
},
|
||||
{
|
||||
name: "invalid unqouted string",
|
||||
in: "%foo",
|
||||
},
|
||||
{
|
||||
name: "invalid string missing initial quote",
|
||||
in: `%foo"`,
|
||||
},
|
||||
{
|
||||
name: "invalid string missing closing quote",
|
||||
in: `%"foo`,
|
||||
},
|
||||
{
|
||||
name: "invalid tab in string",
|
||||
in: "%\"\t\"",
|
||||
},
|
||||
{
|
||||
name: "invalid newline in string",
|
||||
in: "%\"\n\"",
|
||||
},
|
||||
{
|
||||
name: "invalid single quoted string",
|
||||
in: `%'foo'`,
|
||||
},
|
||||
{
|
||||
name: "invalid string bad escaping",
|
||||
in: `%\"foo %a"`,
|
||||
},
|
||||
{
|
||||
name: "valid string with escaped quotes",
|
||||
in: `%"foo %22bar%22 \\ baz"`,
|
||||
want: `%"foo %22bar%22 \\ baz"`,
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "invalid sequence id utf-8 string",
|
||||
in: `%"%a0%a1"`,
|
||||
},
|
||||
{
|
||||
name: "invalid 2 bytes sequence utf-8 string",
|
||||
in: `%"%c3%28"`,
|
||||
},
|
||||
{
|
||||
name: "invalid 3 bytes sequence utf-8 string",
|
||||
in: `%"%e2%28%a1"`,
|
||||
},
|
||||
{
|
||||
name: "invalid 4 bytes sequence utf-8 string",
|
||||
in: `%"%f0%28%8c%28"`,
|
||||
},
|
||||
{
|
||||
name: "invalid hex utf-8 string",
|
||||
in: `%"%g0%1w"`,
|
||||
},
|
||||
{
|
||||
name: "valid byte order mark in display string",
|
||||
in: `%"BOM: %ef%bb%bf"`,
|
||||
want: `%"BOM: %ef%bb%bf"`,
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "valid string with content after",
|
||||
in: `%"foo\nbar", foo;bar`,
|
||||
want: `%"foo\nbar"`,
|
||||
wantOk: true,
|
||||
},
|
||||
{
|
||||
name: "invalid unfinished 4 bytes rune",
|
||||
in: `%"%f0%9f%98"`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
got, gotRest, ok := consumeDisplayString(tc.in)
|
||||
if ok != tc.wantOk {
|
||||
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
|
||||
}
|
||||
if tc.want != got {
|
||||
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
|
||||
}
|
||||
if got+gotRest != tc.in {
|
||||
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user