image/jpeg: add support for non-standard chroma subsampling ratios

Add "flex mode" decoding for JPEG images with non-standard YCbCr
subsampling ratios that do not match the predefined YCbCrSubsampleRatio
values. This includes cases where:
1. Cb and Cr components have different sampling factors
2. The Y component does not have the maximum sampling factors

Such images were previously rejected with
"unsupported luma/chroma subsampling ratio"
but should be valid according to the JPEG specification:
https://www.w3.org/Graphics/JPEG/itu-t81.pdf

Flex mode allocates a YCbCr444 backing buffer and manually expands
pixels according to each component's sampling factors relative to the
maximum. This approach mirrors the implementation in kovidgoyal/imaging.

Fixes #2362

goos: darwin
goarch: arm64
pkg: image/jpeg
cpu: Apple M4 Max
                     │   old.txt   │               new.txt               │
                     │   sec/op    │    sec/op     vs base               │
FDCT-16                576.9n ± 1%   578.9n ±  1%       ~ (p=0.565 n=10)
IDCT-16                550.1n ± 0%   573.6n ±  3%  +4.27% (p=0.000 n=10)
DecodeBaseline-16      520.6µ ± 4%   523.8µ ±  2%       ~ (p=0.796 n=10)
DecodeProgressive-16   767.9µ ± 3%   747.0µ ± 10%       ~ (p=0.123 n=10)
EncodeRGBA-16          7.869m ± 3%   8.485m ±  6%  +7.82% (p=0.001 n=10)
EncodeYCbCr-16         8.761m ± 6%   8.021m ±  2%  -8.45% (p=0.001 n=10)
geomean                143.5µ        143.8µ        +0.18%

                     │   old.txt    │               new.txt                │
                     │     B/s      │      B/s       vs base               │
DecodeBaseline-16      113.2Mi ± 4%   112.5Mi ±  2%       ~ (p=0.796 n=10)
DecodeProgressive-16   76.75Mi ± 3%   78.90Mi ± 10%       ~ (p=0.123 n=10)
EncodeRGBA-16          148.9Mi ± 3%   138.1Mi ±  7%  -7.25% (p=0.001 n=10)
EncodeYCbCr-16         100.3Mi ± 7%   109.6Mi ±  2%  +9.23% (p=0.001 n=10)
geomean                106.7Mi        107.7Mi        +0.86%

                     │   old.txt    │                new.txt                │
                     │     B/op     │     B/op      vs base                 │
DecodeBaseline-16      61.55Ki ± 0%   61.55Ki ± 0%       ~ (p=1.000 n=10) ¹
DecodeProgressive-16   253.6Ki ± 0%   253.6Ki ± 0%       ~ (p=0.124 n=10)
EncodeRGBA-16          4.438Ki ± 0%   4.438Ki ± 0%       ~ (p=1.000 n=10) ¹
EncodeYCbCr-16         4.438Ki ± 0%   4.438Ki ± 0%       ~ (p=1.000 n=10) ¹
geomean                23.55Ki        23.55Ki       +0.00%
¹ all samples are equal

                     │  old.txt   │               new.txt               │
                     │ allocs/op  │ allocs/op   vs base                 │
DecodeBaseline-16      5.000 ± 0%   5.000 ± 0%       ~ (p=1.000 n=10) ¹
DecodeProgressive-16   13.00 ± 0%   13.00 ± 0%       ~ (p=1.000 n=10) ¹
EncodeRGBA-16          7.000 ± 0%   7.000 ± 0%       ~ (p=1.000 n=10) ¹
EncodeYCbCr-16         7.000 ± 0%   7.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                7.512        7.512       +0.00%
¹ all samples are equal

Co-authored-by: Kovid Goyal <kovidgoyal@gmail.com>

Change-Id: Ic7353ce6a0b229cb6aa775bb05044d6bcded7ab2
Reviewed-on: https://go-review.googlesource.com/c/go/+/738280
Auto-Submit: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
Reviewed-by: Nigel Tao <nigeltao@google.com>
This commit is contained in:
Taichi Maeda
2026-01-22 16:04:46 +09:00
committed by Gopher Robot
parent 0b9bcbc58c
commit 30d873462f
7 changed files with 138 additions and 66 deletions

View File

@@ -28,10 +28,12 @@ var errUnsupportedSubsamplingRatio = UnsupportedError("luma/chroma subsampling r
// Component specification, specified in section B.2.2.
type component struct {
h int // Horizontal sampling factor.
v int // Vertical sampling factor.
c uint8 // Component identifier.
tq uint8 // Quantization table destination selector.
h int // Horizontal sampling factor.
v int // Vertical sampling factor.
c uint8 // Component identifier.
tq uint8 // Quantization table destination selector.
expandH int // Horizontal expansion factor for non-standard subsampling.
expandV int // Vertical expansion factor for non-standard subsampling.
}
const (
@@ -124,6 +126,10 @@ type decoder struct {
blackPix []byte
blackStride int
// For non-standard subsampling ratios (flex mode).
flex bool // True if using non-standard subsampling that requires manual pixel expansion.
maxH, maxV int // Maximum horizontal and vertical sampling factors across all components.
ri int // Restart Interval.
nComp int
@@ -364,30 +370,11 @@ func (d *decoder) processSOF(n int) error {
h, v = 1, 1
case 3:
// For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2, 4:2:0,
// 4:1:1 or 4:1:0 chroma subsampling ratios. This implies that the
// (h, v) values for the Y component are either (1, 1), (1, 2),
// (2, 1), (2, 2), (4, 1) or (4, 2), and the Y component's values
// must be a multiple of the Cb and Cr component's values. We also
// assume that the two chroma components have the same subsampling
// ratio.
switch i {
case 0: // Y.
// We have already verified, above, that h and v are both
// either 1, 2 or 4, so invalid (h, v) combinations are those
// with v == 4.
if v == 4 {
return errUnsupportedSubsamplingRatio
}
case 1: // Cb.
if d.comp[0].h%h != 0 || d.comp[0].v%v != 0 {
return errUnsupportedSubsamplingRatio
}
case 2: // Cr.
if d.comp[1].h != h || d.comp[1].v != v {
return errUnsupportedSubsamplingRatio
}
}
// For YCbCr images, we support both standard subsampling ratios
// (4:4:4, 4:4:0, 4:2:2, 4:2:0, 4:1:1, 4:1:0) and non-standard ratios
// where components may have different sampling factors. The only
// restriction is that each component's sampling factors must evenly
// divide the maximum factors (validated after the loop).
case 4:
// For 4-component images (either CMYK or YCbCrK), we only support two
@@ -415,9 +402,27 @@ func (d *decoder) processSOF(n int) error {
}
}
d.maxH, d.maxV = max(d.maxH, h), max(d.maxV, v)
d.comp[i].h = h
d.comp[i].v = v
}
// For 3-component images, validate that maxH and maxV are evenly divisible
// by each component's sampling factors.
if d.nComp == 3 {
for i := 0; i < 3; i++ {
if d.maxH%d.comp[i].h != 0 || d.maxV%d.comp[i].v != 0 {
return errUnsupportedSubsamplingRatio
}
}
}
// Compute expansion factors for each component.
for i := 0; i < d.nComp; i++ {
d.comp[i].expandH = d.maxH / d.comp[i].h
d.comp[i].expandV = d.maxV / d.comp[i].v
}
return nil
}

View File

@@ -546,6 +546,43 @@ func TestBadRestartMarker(t *testing.T) {
}
}
// TestDecodeFlexSubsampling tests that decoding images with non-standard
// (flex) subsampling ratios works correctly.
func TestDecodeFlexSubsampling(t *testing.T) {
// These test cases have non-standard subsampling ratios where either:
// - Cb and Cr have different sampling factors, or
// - Y doesn't have the maximum sampling factors.
testCases := []struct {
name string
filename string
}{
{"2x2,1x1,2x2", "../testdata/video-001.q50.221122.jpeg"}, // Cb differs from Cr
{"2x1,1x2,1x1", "../testdata/video-001.q50.211211.jpeg"}, // All three differ
{"2x2,2x1,1x2", "../testdata/video-001.q50.222112.jpeg"}, // All three differ
{"1x2,1x1,2x1", "../testdata/video-001.q50.121121.jpeg"}, // Y not max, all differ
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
m, err := decodeFile(tc.filename)
if err != nil {
t.Fatalf("decodeFile(%q): %v", tc.filename, err)
}
// All video-001 images are 150x103.
if got, want := m.Bounds(), image.Rect(0, 0, 150, 103); got != want {
t.Errorf("bounds: got %v, want %v", got, want)
}
// Flex subsampling should produce YCbCr images with 4:4:4 ratio.
ycbcr, ok := m.(*image.YCbCr)
if !ok {
t.Fatalf("got %T, want *image.YCbCr", m)
}
if got, want := ycbcr.SubsampleRatio, image.YCbCrSubsampleRatio444; got != want {
t.Errorf("subsample ratio: got %v, want %v", got, want)
}
})
}
}
func benchmarkDecode(b *testing.B, filename string) {
data, err := os.ReadFile(filename)
if err != nil {

View File

@@ -16,28 +16,37 @@ func (d *decoder) makeImg(mxx, myy int) {
return
}
h0 := d.comp[0].h
v0 := d.comp[0].v
hRatio := h0 / d.comp[1].h
vRatio := v0 / d.comp[1].v
var subsampleRatio image.YCbCrSubsampleRatio
switch hRatio<<4 | vRatio {
case 0x11:
subsampleRatio = image.YCbCrSubsampleRatio444
case 0x12:
subsampleRatio = image.YCbCrSubsampleRatio440
case 0x21:
subsampleRatio = image.YCbCrSubsampleRatio422
case 0x22:
subsampleRatio = image.YCbCrSubsampleRatio420
case 0x41:
subsampleRatio = image.YCbCrSubsampleRatio411
case 0x42:
subsampleRatio = image.YCbCrSubsampleRatio410
default:
panic("unreachable")
// Determine if we need flex mode for non-standard subsampling.
// Flex mode is needed when:
// - Cb and Cr have different sampling factors, or
// - The Y component doesn't have the maximum sampling factors, or
// - The ratio doesn't match any standard YCbCrSubsampleRatio.
subsampleRatio := image.YCbCrSubsampleRatio444
if d.comp[1].h != d.comp[2].h || d.comp[1].v != d.comp[2].v ||
d.maxH != d.comp[0].h || d.maxV != d.comp[0].v {
d.flex = true
} else {
hRatio := d.maxH / d.comp[1].h
vRatio := d.maxV / d.comp[1].v
switch hRatio<<4 | vRatio {
case 0x11:
subsampleRatio = image.YCbCrSubsampleRatio444
case 0x12:
subsampleRatio = image.YCbCrSubsampleRatio440
case 0x21:
subsampleRatio = image.YCbCrSubsampleRatio422
case 0x22:
subsampleRatio = image.YCbCrSubsampleRatio420
case 0x41:
subsampleRatio = image.YCbCrSubsampleRatio411
case 0x42:
subsampleRatio = image.YCbCrSubsampleRatio410
default:
d.flex = true
}
}
m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio)
m := image.NewYCbCr(image.Rect(0, 0, 8*d.maxH*mxx, 8*d.maxV*myy), subsampleRatio)
d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr)
if d.nComp == 4 {
@@ -143,9 +152,11 @@ func (d *decoder) processSOS(n int) error {
}
// mxx and myy are the number of MCUs (Minimum Coded Units) in the image.
h0, v0 := d.comp[0].h, d.comp[0].v // The h and v values from the Y components.
mxx := (d.width + 8*h0 - 1) / (8 * h0)
myy := (d.height + 8*v0 - 1) / (8 * v0)
// The MCU dimensions are based on the maximum sampling factors.
// For standard subsampling, maxH/maxV equals h0/v0 (Y's factors).
// For flex mode, Y may not have the maximum factors.
mxx := (d.width + 8*d.maxH - 1) / (8 * d.maxH)
myy := (d.height + 8*d.maxV - 1) / (8 * d.maxV)
if d.img1 == nil && d.img3 == nil {
d.makeImg(mxx, myy)
}
@@ -439,16 +450,15 @@ func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32
}
func (d *decoder) reconstructProgressiveImage() error {
// The h0, mxx, by and bx variables have the same meaning as in the
// The mxx, by and bx variables have the same meaning as in the
// processSOS method.
h0 := d.comp[0].h
mxx := (d.width + 8*h0 - 1) / (8 * h0)
mxx := (d.width + 8*d.maxH - 1) / (8 * d.maxH)
for i := 0; i < d.nComp; i++ {
if d.progCoeffs[i] == nil {
continue
}
v := 8 * d.comp[0].v / d.comp[i].v
h := 8 * d.comp[0].h / d.comp[i].h
v := 8 * d.maxV / d.comp[i].v
h := 8 * d.maxH / d.comp[i].h
stride := mxx * d.comp[i].h
for by := 0; by*v < d.height; by++ {
for bx := 0; bx*h < d.width; bx++ {
@@ -469,6 +479,15 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
b[unzig[zig]] *= qt[zig]
}
idct(b)
var h, v int
if d.flex {
// Flex mode: scale bx and by according to the component's sampling factors.
h = d.comp[compIndex].expandH
v = d.comp[compIndex].expandV
bx, by = bx*h, by*v
}
dst, stride := []byte(nil), 0
if d.nComp == 1 {
dst, stride = d.img1.Pix[8*(by*d.img1.Stride+bx):], d.img1.Stride
@@ -486,20 +505,31 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
return UnsupportedError("too many components")
}
}
if d.flex {
// Flex mode: expand each source pixel to h×v destination pixels.
for y := 0; y < 8; y++ {
y8 := y * 8
yv := y * v
for x := 0; x < 8; x++ {
val := uint8(max(0, min(255, b[y8+x]+128)))
xh := x * h
for yy := 0; yy < v; yy++ {
for xx := 0; xx < h; xx++ {
dst[(yv+yy)*stride+xh+xx] = val
}
}
}
}
return nil
}
// Level shift by +128, clip to [0, 255], and write to dst.
for y := 0; y < 8; y++ {
y8 := y * 8
yStride := y * stride
for x := 0; x < 8; x++ {
c := b[y8+x]
if c < -128 {
c = 0
} else if c > 127 {
c = 255
} else {
c += 128
}
dst[yStride+x] = uint8(c)
dst[yStride+x] = uint8(max(0, min(255, b[y8+x]+128)))
}
}
return nil

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB