mirror of
https://github.com/golang/go.git
synced 2026-04-02 01:10:27 +09:00
image/jpeg: add support for non-standard chroma subsampling ratios
Add "flex mode" decoding for JPEG images with non-standard YCbCr subsampling ratios that do not match the predefined YCbCrSubsampleRatio values. This includes cases where: 1. Cb and Cr components have different sampling factors 2. The Y component does not have the maximum sampling factors Such images were previously rejected with "unsupported luma/chroma subsampling ratio" but should be valid according to the JPEG specification: https://www.w3.org/Graphics/JPEG/itu-t81.pdf Flex mode allocates a YCbCr444 backing buffer and manually expands pixels according to each component's sampling factors relative to the maximum. This approach mirrors the implementation in kovidgoyal/imaging. Fixes #2362 goos: darwin goarch: arm64 pkg: image/jpeg cpu: Apple M4 Max │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ FDCT-16 576.9n ± 1% 578.9n ± 1% ~ (p=0.565 n=10) IDCT-16 550.1n ± 0% 573.6n ± 3% +4.27% (p=0.000 n=10) DecodeBaseline-16 520.6µ ± 4% 523.8µ ± 2% ~ (p=0.796 n=10) DecodeProgressive-16 767.9µ ± 3% 747.0µ ± 10% ~ (p=0.123 n=10) EncodeRGBA-16 7.869m ± 3% 8.485m ± 6% +7.82% (p=0.001 n=10) EncodeYCbCr-16 8.761m ± 6% 8.021m ± 2% -8.45% (p=0.001 n=10) geomean 143.5µ 143.8µ +0.18% │ old.txt │ new.txt │ │ B/s │ B/s vs base │ DecodeBaseline-16 113.2Mi ± 4% 112.5Mi ± 2% ~ (p=0.796 n=10) DecodeProgressive-16 76.75Mi ± 3% 78.90Mi ± 10% ~ (p=0.123 n=10) EncodeRGBA-16 148.9Mi ± 3% 138.1Mi ± 7% -7.25% (p=0.001 n=10) EncodeYCbCr-16 100.3Mi ± 7% 109.6Mi ± 2% +9.23% (p=0.001 n=10) geomean 106.7Mi 107.7Mi +0.86% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ DecodeBaseline-16 61.55Ki ± 0% 61.55Ki ± 0% ~ (p=1.000 n=10) ¹ DecodeProgressive-16 253.6Ki ± 0% 253.6Ki ± 0% ~ (p=0.124 n=10) EncodeRGBA-16 4.438Ki ± 0% 4.438Ki ± 0% ~ (p=1.000 n=10) ¹ EncodeYCbCr-16 4.438Ki ± 0% 4.438Ki ± 0% ~ (p=1.000 n=10) ¹ geomean 23.55Ki 23.55Ki +0.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ DecodeBaseline-16 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=10) ¹ DecodeProgressive-16 13.00 ± 0% 13.00 ± 0% ~ (p=1.000 n=10) ¹ EncodeRGBA-16 7.000 ± 0% 7.000 ± 0% ~ (p=1.000 n=10) ¹ EncodeYCbCr-16 7.000 ± 0% 7.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 7.512 7.512 +0.00% ¹ all samples are equal Co-authored-by: Kovid Goyal <kovidgoyal@gmail.com> Change-Id: Ic7353ce6a0b229cb6aa775bb05044d6bcded7ab2 Reviewed-on: https://go-review.googlesource.com/c/go/+/738280 Auto-Submit: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Nigel Tao <nigeltao@golang.org> Reviewed-by: Nigel Tao <nigeltao@google.com>
This commit is contained in:
committed by
Gopher Robot
parent
0b9bcbc58c
commit
30d873462f
@@ -28,10 +28,12 @@ var errUnsupportedSubsamplingRatio = UnsupportedError("luma/chroma subsampling r
|
||||
|
||||
// Component specification, specified in section B.2.2.
|
||||
type component struct {
|
||||
h int // Horizontal sampling factor.
|
||||
v int // Vertical sampling factor.
|
||||
c uint8 // Component identifier.
|
||||
tq uint8 // Quantization table destination selector.
|
||||
h int // Horizontal sampling factor.
|
||||
v int // Vertical sampling factor.
|
||||
c uint8 // Component identifier.
|
||||
tq uint8 // Quantization table destination selector.
|
||||
expandH int // Horizontal expansion factor for non-standard subsampling.
|
||||
expandV int // Vertical expansion factor for non-standard subsampling.
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -124,6 +126,10 @@ type decoder struct {
|
||||
blackPix []byte
|
||||
blackStride int
|
||||
|
||||
// For non-standard subsampling ratios (flex mode).
|
||||
flex bool // True if using non-standard subsampling that requires manual pixel expansion.
|
||||
maxH, maxV int // Maximum horizontal and vertical sampling factors across all components.
|
||||
|
||||
ri int // Restart Interval.
|
||||
nComp int
|
||||
|
||||
@@ -364,30 +370,11 @@ func (d *decoder) processSOF(n int) error {
|
||||
h, v = 1, 1
|
||||
|
||||
case 3:
|
||||
// For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2, 4:2:0,
|
||||
// 4:1:1 or 4:1:0 chroma subsampling ratios. This implies that the
|
||||
// (h, v) values for the Y component are either (1, 1), (1, 2),
|
||||
// (2, 1), (2, 2), (4, 1) or (4, 2), and the Y component's values
|
||||
// must be a multiple of the Cb and Cr component's values. We also
|
||||
// assume that the two chroma components have the same subsampling
|
||||
// ratio.
|
||||
switch i {
|
||||
case 0: // Y.
|
||||
// We have already verified, above, that h and v are both
|
||||
// either 1, 2 or 4, so invalid (h, v) combinations are those
|
||||
// with v == 4.
|
||||
if v == 4 {
|
||||
return errUnsupportedSubsamplingRatio
|
||||
}
|
||||
case 1: // Cb.
|
||||
if d.comp[0].h%h != 0 || d.comp[0].v%v != 0 {
|
||||
return errUnsupportedSubsamplingRatio
|
||||
}
|
||||
case 2: // Cr.
|
||||
if d.comp[1].h != h || d.comp[1].v != v {
|
||||
return errUnsupportedSubsamplingRatio
|
||||
}
|
||||
}
|
||||
// For YCbCr images, we support both standard subsampling ratios
|
||||
// (4:4:4, 4:4:0, 4:2:2, 4:2:0, 4:1:1, 4:1:0) and non-standard ratios
|
||||
// where components may have different sampling factors. The only
|
||||
// restriction is that each component's sampling factors must evenly
|
||||
// divide the maximum factors (validated after the loop).
|
||||
|
||||
case 4:
|
||||
// For 4-component images (either CMYK or YCbCrK), we only support two
|
||||
@@ -415,9 +402,27 @@ func (d *decoder) processSOF(n int) error {
|
||||
}
|
||||
}
|
||||
|
||||
d.maxH, d.maxV = max(d.maxH, h), max(d.maxV, v)
|
||||
d.comp[i].h = h
|
||||
d.comp[i].v = v
|
||||
}
|
||||
|
||||
// For 3-component images, validate that maxH and maxV are evenly divisible
|
||||
// by each component's sampling factors.
|
||||
if d.nComp == 3 {
|
||||
for i := 0; i < 3; i++ {
|
||||
if d.maxH%d.comp[i].h != 0 || d.maxV%d.comp[i].v != 0 {
|
||||
return errUnsupportedSubsamplingRatio
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute expansion factors for each component.
|
||||
for i := 0; i < d.nComp; i++ {
|
||||
d.comp[i].expandH = d.maxH / d.comp[i].h
|
||||
d.comp[i].expandV = d.maxV / d.comp[i].v
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -546,6 +546,43 @@ func TestBadRestartMarker(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestDecodeFlexSubsampling tests that decoding images with non-standard
|
||||
// (flex) subsampling ratios works correctly.
|
||||
func TestDecodeFlexSubsampling(t *testing.T) {
|
||||
// These test cases have non-standard subsampling ratios where either:
|
||||
// - Cb and Cr have different sampling factors, or
|
||||
// - Y doesn't have the maximum sampling factors.
|
||||
testCases := []struct {
|
||||
name string
|
||||
filename string
|
||||
}{
|
||||
{"2x2,1x1,2x2", "../testdata/video-001.q50.221122.jpeg"}, // Cb differs from Cr
|
||||
{"2x1,1x2,1x1", "../testdata/video-001.q50.211211.jpeg"}, // All three differ
|
||||
{"2x2,2x1,1x2", "../testdata/video-001.q50.222112.jpeg"}, // All three differ
|
||||
{"1x2,1x1,2x1", "../testdata/video-001.q50.121121.jpeg"}, // Y not max, all differ
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
m, err := decodeFile(tc.filename)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeFile(%q): %v", tc.filename, err)
|
||||
}
|
||||
// All video-001 images are 150x103.
|
||||
if got, want := m.Bounds(), image.Rect(0, 0, 150, 103); got != want {
|
||||
t.Errorf("bounds: got %v, want %v", got, want)
|
||||
}
|
||||
// Flex subsampling should produce YCbCr images with 4:4:4 ratio.
|
||||
ycbcr, ok := m.(*image.YCbCr)
|
||||
if !ok {
|
||||
t.Fatalf("got %T, want *image.YCbCr", m)
|
||||
}
|
||||
if got, want := ycbcr.SubsampleRatio, image.YCbCrSubsampleRatio444; got != want {
|
||||
t.Errorf("subsample ratio: got %v, want %v", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkDecode(b *testing.B, filename string) {
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
|
||||
@@ -16,28 +16,37 @@ func (d *decoder) makeImg(mxx, myy int) {
|
||||
return
|
||||
}
|
||||
|
||||
h0 := d.comp[0].h
|
||||
v0 := d.comp[0].v
|
||||
hRatio := h0 / d.comp[1].h
|
||||
vRatio := v0 / d.comp[1].v
|
||||
var subsampleRatio image.YCbCrSubsampleRatio
|
||||
switch hRatio<<4 | vRatio {
|
||||
case 0x11:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio444
|
||||
case 0x12:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio440
|
||||
case 0x21:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio422
|
||||
case 0x22:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio420
|
||||
case 0x41:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio411
|
||||
case 0x42:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio410
|
||||
default:
|
||||
panic("unreachable")
|
||||
// Determine if we need flex mode for non-standard subsampling.
|
||||
// Flex mode is needed when:
|
||||
// - Cb and Cr have different sampling factors, or
|
||||
// - The Y component doesn't have the maximum sampling factors, or
|
||||
// - The ratio doesn't match any standard YCbCrSubsampleRatio.
|
||||
subsampleRatio := image.YCbCrSubsampleRatio444
|
||||
if d.comp[1].h != d.comp[2].h || d.comp[1].v != d.comp[2].v ||
|
||||
d.maxH != d.comp[0].h || d.maxV != d.comp[0].v {
|
||||
d.flex = true
|
||||
} else {
|
||||
hRatio := d.maxH / d.comp[1].h
|
||||
vRatio := d.maxV / d.comp[1].v
|
||||
switch hRatio<<4 | vRatio {
|
||||
case 0x11:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio444
|
||||
case 0x12:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio440
|
||||
case 0x21:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio422
|
||||
case 0x22:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio420
|
||||
case 0x41:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio411
|
||||
case 0x42:
|
||||
subsampleRatio = image.YCbCrSubsampleRatio410
|
||||
default:
|
||||
d.flex = true
|
||||
}
|
||||
}
|
||||
m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio)
|
||||
|
||||
m := image.NewYCbCr(image.Rect(0, 0, 8*d.maxH*mxx, 8*d.maxV*myy), subsampleRatio)
|
||||
d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr)
|
||||
|
||||
if d.nComp == 4 {
|
||||
@@ -143,9 +152,11 @@ func (d *decoder) processSOS(n int) error {
|
||||
}
|
||||
|
||||
// mxx and myy are the number of MCUs (Minimum Coded Units) in the image.
|
||||
h0, v0 := d.comp[0].h, d.comp[0].v // The h and v values from the Y components.
|
||||
mxx := (d.width + 8*h0 - 1) / (8 * h0)
|
||||
myy := (d.height + 8*v0 - 1) / (8 * v0)
|
||||
// The MCU dimensions are based on the maximum sampling factors.
|
||||
// For standard subsampling, maxH/maxV equals h0/v0 (Y's factors).
|
||||
// For flex mode, Y may not have the maximum factors.
|
||||
mxx := (d.width + 8*d.maxH - 1) / (8 * d.maxH)
|
||||
myy := (d.height + 8*d.maxV - 1) / (8 * d.maxV)
|
||||
if d.img1 == nil && d.img3 == nil {
|
||||
d.makeImg(mxx, myy)
|
||||
}
|
||||
@@ -439,16 +450,15 @@ func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32
|
||||
}
|
||||
|
||||
func (d *decoder) reconstructProgressiveImage() error {
|
||||
// The h0, mxx, by and bx variables have the same meaning as in the
|
||||
// The mxx, by and bx variables have the same meaning as in the
|
||||
// processSOS method.
|
||||
h0 := d.comp[0].h
|
||||
mxx := (d.width + 8*h0 - 1) / (8 * h0)
|
||||
mxx := (d.width + 8*d.maxH - 1) / (8 * d.maxH)
|
||||
for i := 0; i < d.nComp; i++ {
|
||||
if d.progCoeffs[i] == nil {
|
||||
continue
|
||||
}
|
||||
v := 8 * d.comp[0].v / d.comp[i].v
|
||||
h := 8 * d.comp[0].h / d.comp[i].h
|
||||
v := 8 * d.maxV / d.comp[i].v
|
||||
h := 8 * d.maxH / d.comp[i].h
|
||||
stride := mxx * d.comp[i].h
|
||||
for by := 0; by*v < d.height; by++ {
|
||||
for bx := 0; bx*h < d.width; bx++ {
|
||||
@@ -469,6 +479,15 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
|
||||
b[unzig[zig]] *= qt[zig]
|
||||
}
|
||||
idct(b)
|
||||
|
||||
var h, v int
|
||||
if d.flex {
|
||||
// Flex mode: scale bx and by according to the component's sampling factors.
|
||||
h = d.comp[compIndex].expandH
|
||||
v = d.comp[compIndex].expandV
|
||||
bx, by = bx*h, by*v
|
||||
}
|
||||
|
||||
dst, stride := []byte(nil), 0
|
||||
if d.nComp == 1 {
|
||||
dst, stride = d.img1.Pix[8*(by*d.img1.Stride+bx):], d.img1.Stride
|
||||
@@ -486,20 +505,31 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
|
||||
return UnsupportedError("too many components")
|
||||
}
|
||||
}
|
||||
|
||||
if d.flex {
|
||||
// Flex mode: expand each source pixel to h×v destination pixels.
|
||||
for y := 0; y < 8; y++ {
|
||||
y8 := y * 8
|
||||
yv := y * v
|
||||
for x := 0; x < 8; x++ {
|
||||
val := uint8(max(0, min(255, b[y8+x]+128)))
|
||||
xh := x * h
|
||||
for yy := 0; yy < v; yy++ {
|
||||
for xx := 0; xx < h; xx++ {
|
||||
dst[(yv+yy)*stride+xh+xx] = val
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Level shift by +128, clip to [0, 255], and write to dst.
|
||||
for y := 0; y < 8; y++ {
|
||||
y8 := y * 8
|
||||
yStride := y * stride
|
||||
for x := 0; x < 8; x++ {
|
||||
c := b[y8+x]
|
||||
if c < -128 {
|
||||
c = 0
|
||||
} else if c > 127 {
|
||||
c = 255
|
||||
} else {
|
||||
c += 128
|
||||
}
|
||||
dst[yStride+x] = uint8(c)
|
||||
dst[yStride+x] = uint8(max(0, min(255, b[y8+x]+128)))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
BIN
src/image/testdata/video-001.q50.121121.jpeg
vendored
Normal file
BIN
src/image/testdata/video-001.q50.121121.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.1 KiB |
BIN
src/image/testdata/video-001.q50.211211.jpeg
vendored
Normal file
BIN
src/image/testdata/video-001.q50.211211.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.1 KiB |
BIN
src/image/testdata/video-001.q50.221122.jpeg
vendored
Normal file
BIN
src/image/testdata/video-001.q50.221122.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.4 KiB |
BIN
src/image/testdata/video-001.q50.222112.jpeg
vendored
Normal file
BIN
src/image/testdata/video-001.q50.222112.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.3 KiB |
Reference in New Issue
Block a user