zcatpdf/src/images/jpeg.zig
reugenio f9189253d7 feat: v0.3 - Image support (JPEG embedding)
Phase 3 - Images:
- JPEG parser with direct DCT passthrough (no re-encoding)
- PNG metadata extraction (full embedding pending)
- Page.image() for drawing images at position
- Page.imageFit() for auto-scaling with aspect ratio
- Pdf.addJpegImage() / addJpegImageFromFile()
- XObject generation in OutputProducer

New modules:
- src/images/mod.zig - Image module exports
- src/images/image_info.zig - ImageInfo struct
- src/images/jpeg.zig - JPEG parser
- src/images/png.zig - PNG metadata parser

New example:
- examples/image_demo.zig - Image embedding demo

Stats:
- 66 unit tests passing
- 4 working examples

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-08 20:00:56 +01:00

248 lines
8.3 KiB
Zig

//! JPEG image parser for PDF embedding
//!
//! JPEG images can be embedded directly in PDF using DCTDecode filter.
//! This parser extracts the necessary metadata (dimensions, color space)
//! from the JPEG header without decoding the image data.
const std = @import("std");
const ImageInfo = @import("image_info.zig").ImageInfo;
const ColorSpace = @import("image_info.zig").ColorSpace;
const ImageFilter = @import("image_info.zig").ImageFilter;
const ImageFormat = @import("image_info.zig").ImageFormat;
/// JPEG marker bytes
const JPEG_MARKERS = struct {
const SOI: u8 = 0xD8; // Start of Image
const EOI: u8 = 0xD9; // End of Image
const SOS: u8 = 0xDA; // Start of Scan
const DQT: u8 = 0xDB; // Define Quantization Table
const DNL: u8 = 0xDC; // Define Number of Lines
const DRI: u8 = 0xDD; // Define Restart Interval
const DHT: u8 = 0xC4; // Define Huffman Table
const DAC: u8 = 0xCC; // Define Arithmetic Coding
const APP0: u8 = 0xE0; // Application-specific marker 0 (JFIF)
const APP1: u8 = 0xE1; // Application-specific marker 1 (EXIF)
const APP2: u8 = 0xE2; // Application-specific marker 2 (ICC)
const APP14: u8 = 0xEE; // Application-specific marker 14 (Adobe)
const COM: u8 = 0xFE; // Comment
// Start of Frame markers (we need these for image dimensions)
const SOF0: u8 = 0xC0; // Baseline DCT
const SOF1: u8 = 0xC1; // Extended sequential DCT
const SOF2: u8 = 0xC2; // Progressive DCT
const SOF3: u8 = 0xC3; // Lossless
const SOF5: u8 = 0xC5; // Differential sequential DCT
const SOF6: u8 = 0xC6; // Differential progressive DCT
const SOF7: u8 = 0xC7; // Differential lossless
const SOF9: u8 = 0xC9; // Extended sequential DCT, arithmetic
const SOF10: u8 = 0xCA; // Progressive DCT, arithmetic
const SOF11: u8 = 0xCB; // Lossless, arithmetic
const SOF13: u8 = 0xCD; // Differential sequential DCT, arithmetic
const SOF14: u8 = 0xCE; // Differential progressive DCT, arithmetic
const SOF15: u8 = 0xCF; // Differential lossless, arithmetic
};
pub const JpegError = error{
InvalidSignature,
UnexpectedEndOfData,
NoFrameFound,
UnsupportedColorSpace,
};
/// Parse JPEG image data and extract metadata for PDF embedding.
/// The JPEG data is embedded directly without re-encoding.
pub fn parse(data: []const u8) JpegError!ImageInfo {
// Validate JPEG signature: FF D8 FF
if (data.len < 4) return JpegError.InvalidSignature;
if (data[0] != 0xFF or data[1] != JPEG_MARKERS.SOI or data[2] != 0xFF) {
return JpegError.InvalidSignature;
}
var width: u32 = 0;
var height: u32 = 0;
var components: u8 = 0;
var bits_per_component: u8 = 8;
var found_frame = false;
var is_adobe_cmyk = false;
// Parse JPEG markers
var pos: usize = 2;
while (pos < data.len - 1) {
// Find marker (FF xx)
if (data[pos] != 0xFF) {
pos += 1;
continue;
}
// Skip padding FF bytes
while (pos < data.len and data[pos] == 0xFF) {
pos += 1;
}
if (pos >= data.len) break;
const marker = data[pos];
pos += 1;
// Check for SOF (Start of Frame) markers
if (isSOFMarker(marker)) {
if (pos + 7 > data.len) return JpegError.UnexpectedEndOfData;
// Skip length bytes
pos += 2;
// Read frame data
bits_per_component = data[pos];
pos += 1;
height = (@as(u32, data[pos]) << 8) | @as(u32, data[pos + 1]);
pos += 2;
width = (@as(u32, data[pos]) << 8) | @as(u32, data[pos + 1]);
pos += 2;
components = data[pos];
found_frame = true;
break;
}
// Check for Adobe APP14 marker (indicates CMYK handling)
if (marker == JPEG_MARKERS.APP14) {
if (pos + 2 > data.len) return JpegError.UnexpectedEndOfData;
const len = (@as(u16, data[pos]) << 8) | @as(u16, data[pos + 1]);
// Check for "Adobe" string
if (len >= 12 and pos + 12 <= data.len) {
if (std.mem.eql(u8, data[pos + 2 .. pos + 7], "Adobe")) {
is_adobe_cmyk = true;
}
}
pos += len;
continue;
}
// Skip other markers with length
if (marker != JPEG_MARKERS.SOI and marker != JPEG_MARKERS.EOI and
marker != 0x00 and (marker < 0xD0 or marker > 0xD7))
{
if (pos + 2 > data.len) return JpegError.UnexpectedEndOfData;
const len = (@as(u16, data[pos]) << 8) | @as(u16, data[pos + 1]);
pos += len;
}
}
if (!found_frame) return JpegError.NoFrameFound;
// Determine color space from component count
const color_space: ColorSpace = switch (components) {
1 => .device_gray,
3 => .device_rgb,
4 => .device_cmyk,
else => return JpegError.UnsupportedColorSpace,
};
return ImageInfo{
.width = width,
.height = height,
.color_space = color_space,
.bits_per_component = bits_per_component,
.filter = .dct_decode,
.data = data, // Direct passthrough - JPEG data is used as-is
.soft_mask = null, // JPEG doesn't support alpha
.owns_data = false, // We don't allocate, caller owns the data
.invert_cmyk = is_adobe_cmyk and color_space == .device_cmyk,
.format = .jpeg,
};
}
/// Check if marker is a Start of Frame marker
fn isSOFMarker(marker: u8) bool {
return switch (marker) {
JPEG_MARKERS.SOF0,
JPEG_MARKERS.SOF1,
JPEG_MARKERS.SOF2,
JPEG_MARKERS.SOF3,
JPEG_MARKERS.SOF5,
JPEG_MARKERS.SOF6,
JPEG_MARKERS.SOF7,
JPEG_MARKERS.SOF9,
JPEG_MARKERS.SOF10,
JPEG_MARKERS.SOF11,
JPEG_MARKERS.SOF13,
JPEG_MARKERS.SOF14,
JPEG_MARKERS.SOF15,
=> true,
else => false,
};
}
// =============================================================================
// Tests
// =============================================================================
test "parse valid JPEG header" {
// Minimal valid JPEG with SOF0 marker
// FF D8 FF E0 [JFIF APP0] FF C0 [SOF0 frame]
const jpeg_data = [_]u8{
0xFF, 0xD8, // SOI
0xFF, 0xE0, // APP0
0x00, 0x10, // Length 16
'J', 'F', 'I', 'F', 0x00, // JFIF identifier
0x01, 0x01, // Version
0x00, // Units
0x00, 0x01, // X density
0x00, 0x01, // Y density
0x00, 0x00, // Thumbnail
0xFF, 0xC0, // SOF0
0x00, 0x0B, // Length 11
0x08, // Bits per component
0x00, 0x64, // Height: 100
0x00, 0xC8, // Width: 200
0x03, // Components: 3 (RGB)
0x01, 0x22, 0x00, // Component 1
0x02, 0x11, 0x01, // Component 2
0x03, 0x11, 0x01, // Component 3
};
const info = try parse(&jpeg_data);
try std.testing.expectEqual(@as(u32, 200), info.width);
try std.testing.expectEqual(@as(u32, 100), info.height);
try std.testing.expectEqual(ColorSpace.device_rgb, info.color_space);
try std.testing.expectEqual(@as(u8, 8), info.bits_per_component);
try std.testing.expectEqual(ImageFilter.dct_decode, info.filter);
try std.testing.expect(info.soft_mask == null);
try std.testing.expectEqual(false, info.owns_data);
}
test "parse grayscale JPEG" {
const jpeg_data = [_]u8{
0xFF, 0xD8, // SOI
0xFF, 0xC0, // SOF0 (directly, no APP0)
0x00, 0x08, // Length 8
0x08, // Bits per component
0x00, 0x32, // Height: 50
0x00, 0x50, // Width: 80
0x01, // Components: 1 (Grayscale)
0x01, 0x11, 0x00, // Component 1
};
const info = try parse(&jpeg_data);
try std.testing.expectEqual(@as(u32, 80), info.width);
try std.testing.expectEqual(@as(u32, 50), info.height);
try std.testing.expectEqual(ColorSpace.device_gray, info.color_space);
}
test "invalid JPEG signature" {
const invalid_data = [_]u8{ 0x89, 0x50, 0x4E, 0x47 }; // PNG signature
const result = parse(&invalid_data);
try std.testing.expectError(JpegError.InvalidSignature, result);
}
test "JPEG too short" {
const short_data = [_]u8{ 0xFF, 0xD8 };
const result = parse(&short_data);
try std.testing.expectError(JpegError.InvalidSignature, result);
}