image/jpeg: add support for non-standard chroma subsampling ratios

Add "flex mode" decoding for JPEG images with non-standard YCbCr subsampling ratios that do not match the predefined YCbCrSubsampleRatio values. This includes cases where: 1. Cb and Cr components have different sampling factors 2. The Y component does not have the maximum sampling factors Such images were previously rejected with "unsupported luma/chroma subsampling ratio" but should be valid according to the JPEG specification: https://www.w3.org/Graphics/JPEG/itu-t81.pdf Flex mode allocates a YCbCr444 backing buffer and manually expands pixels according to each component's sampling factors relative to the maximum. This approach mirrors the implementation in kovidgoyal/imaging. Fixes #2362 goos: darwin goarch: arm64 pkg: image/jpeg cpu: Apple M4 Max │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ FDCT-16 576.9n ± 1% 578.9n ± 1% ~ (p=0.565 n=10) IDCT-16 550.1n ± 0% 573.6n ± 3% +4.27% (p=0.000 n=10) DecodeBaseline-16 520.6µ ± 4% 523.8µ ± 2% ~ (p=0.796 n=10) DecodeProgressive-16 767.9µ ± 3% 747.0µ ± 10% ~ (p=0.123 n=10) EncodeRGBA-16 7.869m ± 3% 8.485m ± 6% +7.82% (p=0.001 n=10) EncodeYCbCr-16 8.761m ± 6% 8.021m ± 2% -8.45% (p=0.001 n=10) geomean 143.5µ 143.8µ +0.18% │ old.txt │ new.txt │ │ B/s │ B/s vs base │ DecodeBaseline-16 113.2Mi ± 4% 112.5Mi ± 2% ~ (p=0.796 n=10) DecodeProgressive-16 76.75Mi ± 3% 78.90Mi ± 10% ~ (p=0.123 n=10) EncodeRGBA-16 148.9Mi ± 3% 138.1Mi ± 7% -7.25% (p=0.001 n=10) EncodeYCbCr-16 100.3Mi ± 7% 109.6Mi ± 2% +9.23% (p=0.001 n=10) geomean 106.7Mi 107.7Mi +0.86% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ DecodeBaseline-16 61.55Ki ± 0% 61.55Ki ± 0% ~ (p=1.000 n=10) ¹ DecodeProgressive-16 253.6Ki ± 0% 253.6Ki ± 0% ~ (p=0.124 n=10) EncodeRGBA-16 4.438Ki ± 0% 4.438Ki ± 0% ~ (p=1.000 n=10) ¹ EncodeYCbCr-16 4.438Ki ± 0% 4.438Ki ± 0% ~ (p=1.000 n=10) ¹ geomean 23.55Ki 23.55Ki +0.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ DecodeBaseline-16 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=10) ¹ DecodeProgressive-16 13.00 ± 0% 13.00 ± 0% ~ (p=1.000 n=10) ¹ EncodeRGBA-16 7.000 ± 0% 7.000 ± 0% ~ (p=1.000 n=10) ¹ EncodeYCbCr-16 7.000 ± 0% 7.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 7.512 7.512 +0.00% ¹ all samples are equal Co-authored-by: Kovid Goyal <kovidgoyal@gmail.com> Change-Id: Ic7353ce6a0b229cb6aa775bb05044d6bcded7ab2 Reviewed-on: https://go-review.googlesource.com/c/go/+/738280 Auto-Submit: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Nigel Tao <nigeltao@golang.org> Reviewed-by: Nigel Tao <nigeltao@google.com>
2026-04-02 01:10:27 +09:00 · 2026-01-22 16:04:46 +09:00
parent 0b9bcbc58c
commit 30d873462f
7 changed files with 138 additions and 66 deletions
--- a/src/image/jpeg/reader.go
+++ b/src/image/jpeg/reader.go
@@ -28,10 +28,12 @@ var errUnsupportedSubsamplingRatio = UnsupportedError("luma/chroma subsampling r

 // Component specification, specified in section B.2.2.
 type component struct {
-	h  int   // Horizontal sampling factor.
-	v  int   // Vertical sampling factor.
-	c  uint8 // Component identifier.
-	tq uint8 // Quantization table destination selector.
+	h       int   // Horizontal sampling factor.
+	v       int   // Vertical sampling factor.
+	c       uint8 // Component identifier.
+	tq      uint8 // Quantization table destination selector.
+	expandH int   // Horizontal expansion factor for non-standard subsampling.
+	expandV int   // Vertical expansion factor for non-standard subsampling.
 }

 const (
@@ -124,6 +126,10 @@ type decoder struct {
 	blackPix    []byte
 	blackStride int

+	// For non-standard subsampling ratios (flex mode).
+	flex       bool // True if using non-standard subsampling that requires manual pixel expansion.
+	maxH, maxV int  // Maximum horizontal and vertical sampling factors across all components.
+
 	ri    int // Restart Interval.
 	nComp int

@@ -364,30 +370,11 @@ func (d *decoder) processSOF(n int) error {
 			h, v = 1, 1

 		case 3:
-			// For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2, 4:2:0,
-			// 4:1:1 or 4:1:0 chroma subsampling ratios. This implies that the
-			// (h, v) values for the Y component are either (1, 1), (1, 2),
-			// (2, 1), (2, 2), (4, 1) or (4, 2), and the Y component's values
-			// must be a multiple of the Cb and Cr component's values. We also
-			// assume that the two chroma components have the same subsampling
-			// ratio.
-			switch i {
-			case 0: // Y.
-				// We have already verified, above, that h and v are both
-				// either 1, 2 or 4, so invalid (h, v) combinations are those
-				// with v == 4.
-				if v == 4 {
-					return errUnsupportedSubsamplingRatio
-				}
-			case 1: // Cb.
-				if d.comp[0].h%h != 0 || d.comp[0].v%v != 0 {
-					return errUnsupportedSubsamplingRatio
-				}
-			case 2: // Cr.
-				if d.comp[1].h != h || d.comp[1].v != v {
-					return errUnsupportedSubsamplingRatio
-				}
-			}
+			// For YCbCr images, we support both standard subsampling ratios
+			// (4:4:4, 4:4:0, 4:2:2, 4:2:0, 4:1:1, 4:1:0) and non-standard ratios
+			// where components may have different sampling factors. The only
+			// restriction is that each component's sampling factors must evenly
+			// divide the maximum factors (validated after the loop).

 		case 4:
 			// For 4-component images (either CMYK or YCbCrK), we only support two
@@ -415,9 +402,27 @@ func (d *decoder) processSOF(n int) error {
 			}
 		}

+		d.maxH, d.maxV = max(d.maxH, h), max(d.maxV, v)
 		d.comp[i].h = h
 		d.comp[i].v = v
 	}
+
+	// For 3-component images, validate that maxH and maxV are evenly divisible
+	// by each component's sampling factors.
+	if d.nComp == 3 {
+		for i := 0; i < 3; i++ {
+			if d.maxH%d.comp[i].h != 0 || d.maxV%d.comp[i].v != 0 {
+				return errUnsupportedSubsamplingRatio
+			}
+		}
+	}
+
+	// Compute expansion factors for each component.
+	for i := 0; i < d.nComp; i++ {
+		d.comp[i].expandH = d.maxH / d.comp[i].h
+		d.comp[i].expandV = d.maxV / d.comp[i].v
+	}
+
 	return nil
 }

--- a/src/image/jpeg/reader_test.go
+++ b/src/image/jpeg/reader_test.go
@@ -546,6 +546,43 @@ func TestBadRestartMarker(t *testing.T) {
 	}
 }

+// TestDecodeFlexSubsampling tests that decoding images with non-standard
+// (flex) subsampling ratios works correctly.
+func TestDecodeFlexSubsampling(t *testing.T) {
+	// These test cases have non-standard subsampling ratios where either:
+	// - Cb and Cr have different sampling factors, or
+	// - Y doesn't have the maximum sampling factors.
+	testCases := []struct {
+		name     string
+		filename string
+	}{
+		{"2x2,1x1,2x2", "../testdata/video-001.q50.221122.jpeg"}, // Cb differs from Cr
+		{"2x1,1x2,1x1", "../testdata/video-001.q50.211211.jpeg"}, // All three differ
+		{"2x2,2x1,1x2", "../testdata/video-001.q50.222112.jpeg"}, // All three differ
+		{"1x2,1x1,2x1", "../testdata/video-001.q50.121121.jpeg"}, // Y not max, all differ
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			m, err := decodeFile(tc.filename)
+			if err != nil {
+				t.Fatalf("decodeFile(%q): %v", tc.filename, err)
+			}
+			// All video-001 images are 150x103.
+			if got, want := m.Bounds(), image.Rect(0, 0, 150, 103); got != want {
+				t.Errorf("bounds: got %v, want %v", got, want)
+			}
+			// Flex subsampling should produce YCbCr images with 4:4:4 ratio.
+			ycbcr, ok := m.(*image.YCbCr)
+			if !ok {
+				t.Fatalf("got %T, want *image.YCbCr", m)
+			}
+			if got, want := ycbcr.SubsampleRatio, image.YCbCrSubsampleRatio444; got != want {
+				t.Errorf("subsample ratio: got %v, want %v", got, want)
+			}
+		})
+	}
+}
+
 func benchmarkDecode(b *testing.B, filename string) {
 	data, err := os.ReadFile(filename)
 	if err != nil {
--- a/src/image/jpeg/scan.go
+++ b/src/image/jpeg/scan.go
@@ -16,28 +16,37 @@ func (d *decoder) makeImg(mxx, myy int) {
 		return
 	}

-	h0 := d.comp[0].h
-	v0 := d.comp[0].v
-	hRatio := h0 / d.comp[1].h
-	vRatio := v0 / d.comp[1].v
-	var subsampleRatio image.YCbCrSubsampleRatio
-	switch hRatio<<4 | vRatio {
-	case 0x11:
-		subsampleRatio = image.YCbCrSubsampleRatio444
-	case 0x12:
-		subsampleRatio = image.YCbCrSubsampleRatio440
-	case 0x21:
-		subsampleRatio = image.YCbCrSubsampleRatio422
-	case 0x22:
-		subsampleRatio = image.YCbCrSubsampleRatio420
-	case 0x41:
-		subsampleRatio = image.YCbCrSubsampleRatio411
-	case 0x42:
-		subsampleRatio = image.YCbCrSubsampleRatio410
-	default:
-		panic("unreachable")
+	// Determine if we need flex mode for non-standard subsampling.
+	// Flex mode is needed when:
+	// - Cb and Cr have different sampling factors, or
+	// - The Y component doesn't have the maximum sampling factors, or
+	// - The ratio doesn't match any standard YCbCrSubsampleRatio.
+	subsampleRatio := image.YCbCrSubsampleRatio444
+	if d.comp[1].h != d.comp[2].h || d.comp[1].v != d.comp[2].v ||
+		d.maxH != d.comp[0].h || d.maxV != d.comp[0].v {
+		d.flex = true
+	} else {
+		hRatio := d.maxH / d.comp[1].h
+		vRatio := d.maxV / d.comp[1].v
+		switch hRatio<<4 | vRatio {
+		case 0x11:
+			subsampleRatio = image.YCbCrSubsampleRatio444
+		case 0x12:
+			subsampleRatio = image.YCbCrSubsampleRatio440
+		case 0x21:
+			subsampleRatio = image.YCbCrSubsampleRatio422
+		case 0x22:
+			subsampleRatio = image.YCbCrSubsampleRatio420
+		case 0x41:
+			subsampleRatio = image.YCbCrSubsampleRatio411
+		case 0x42:
+			subsampleRatio = image.YCbCrSubsampleRatio410
+		default:
+			d.flex = true
+		}
 	}
-	m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio)
+
+	m := image.NewYCbCr(image.Rect(0, 0, 8*d.maxH*mxx, 8*d.maxV*myy), subsampleRatio)
 	d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr)

 	if d.nComp == 4 {
@@ -143,9 +152,11 @@ func (d *decoder) processSOS(n int) error {
 	}

 	// mxx and myy are the number of MCUs (Minimum Coded Units) in the image.
-	h0, v0 := d.comp[0].h, d.comp[0].v // The h and v values from the Y components.
-	mxx := (d.width + 8*h0 - 1) / (8 * h0)
-	myy := (d.height + 8*v0 - 1) / (8 * v0)
+	// The MCU dimensions are based on the maximum sampling factors.
+	// For standard subsampling, maxH/maxV equals h0/v0 (Y's factors).
+	// For flex mode, Y may not have the maximum factors.
+	mxx := (d.width + 8*d.maxH - 1) / (8 * d.maxH)
+	myy := (d.height + 8*d.maxV - 1) / (8 * d.maxV)
 	if d.img1 == nil && d.img3 == nil {
 		d.makeImg(mxx, myy)
 	}
@@ -439,16 +450,15 @@ func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32
 }

 func (d *decoder) reconstructProgressiveImage() error {
-	// The h0, mxx, by and bx variables have the same meaning as in the
+	// The mxx, by and bx variables have the same meaning as in the
 	// processSOS method.
-	h0 := d.comp[0].h
-	mxx := (d.width + 8*h0 - 1) / (8 * h0)
+	mxx := (d.width + 8*d.maxH - 1) / (8 * d.maxH)
 	for i := 0; i < d.nComp; i++ {
 		if d.progCoeffs[i] == nil {
 			continue
 		}
-		v := 8 * d.comp[0].v / d.comp[i].v
-		h := 8 * d.comp[0].h / d.comp[i].h
+		v := 8 * d.maxV / d.comp[i].v
+		h := 8 * d.maxH / d.comp[i].h
 		stride := mxx * d.comp[i].h
 		for by := 0; by*v < d.height; by++ {
 			for bx := 0; bx*h < d.width; bx++ {
@@ -469,6 +479,15 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
 		b[unzig[zig]] *= qt[zig]
 	}
 	idct(b)
+
+	var h, v int
+	if d.flex {
+		// Flex mode: scale bx and by according to the component's sampling factors.
+		h = d.comp[compIndex].expandH
+		v = d.comp[compIndex].expandV
+		bx, by = bx*h, by*v
+	}
+
 	dst, stride := []byte(nil), 0
 	if d.nComp == 1 {
 		dst, stride = d.img1.Pix[8*(by*d.img1.Stride+bx):], d.img1.Stride
@@ -486,20 +505,31 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
 			return UnsupportedError("too many components")
 		}
 	}
+
+	if d.flex {
+		// Flex mode: expand each source pixel to h×v destination pixels.
+		for y := 0; y < 8; y++ {
+			y8 := y * 8
+			yv := y * v
+			for x := 0; x < 8; x++ {
+				val := uint8(max(0, min(255, b[y8+x]+128)))
+				xh := x * h
+				for yy := 0; yy < v; yy++ {
+					for xx := 0; xx < h; xx++ {
+						dst[(yv+yy)*stride+xh+xx] = val
+					}
+				}
+			}
+		}
+		return nil
+	}
+
 	// Level shift by +128, clip to [0, 255], and write to dst.
 	for y := 0; y < 8; y++ {
 		y8 := y * 8
 		yStride := y * stride
 		for x := 0; x < 8; x++ {
-			c := b[y8+x]
-			if c < -128 {
-				c = 0
-			} else if c > 127 {
-				c = 255
-			} else {
-				c += 128
-			}
-			dst[yStride+x] = uint8(c)
+			dst[yStride+x] = uint8(max(0, min(255, b[y8+x]+128)))
 		}
 	}
 	return nil
--- a/src/image/testdata/video-001.q50.121121.jpeg
+++ b/src/image/testdata/video-001.q50.121121.jpeg
--- a/src/image/testdata/video-001.q50.211211.jpeg
+++ b/src/image/testdata/video-001.q50.211211.jpeg
--- a/src/image/testdata/video-001.q50.221122.jpeg
+++ b/src/image/testdata/video-001.q50.221122.jpeg
--- a/src/image/testdata/video-001.q50.222112.jpeg
+++ b/src/image/testdata/video-001.q50.222112.jpeg