Phase 3 - Images: - JPEG parser with direct DCT passthrough (no re-encoding) - PNG metadata extraction (full embedding pending) - Page.image() for drawing images at position - Page.imageFit() for auto-scaling with aspect ratio - Pdf.addJpegImage() / addJpegImageFromFile() - XObject generation in OutputProducer New modules: - src/images/mod.zig - Image module exports - src/images/image_info.zig - ImageInfo struct - src/images/jpeg.zig - JPEG parser - src/images/png.zig - PNG metadata parser New example: - examples/image_demo.zig - Image embedding demo Stats: - 66 unit tests passing - 4 working examples 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
248 lines
8.3 KiB
Zig
248 lines
8.3 KiB
Zig
//! JPEG image parser for PDF embedding
|
|
//!
|
|
//! JPEG images can be embedded directly in PDF using DCTDecode filter.
|
|
//! This parser extracts the necessary metadata (dimensions, color space)
|
|
//! from the JPEG header without decoding the image data.
|
|
|
|
const std = @import("std");
|
|
const ImageInfo = @import("image_info.zig").ImageInfo;
|
|
const ColorSpace = @import("image_info.zig").ColorSpace;
|
|
const ImageFilter = @import("image_info.zig").ImageFilter;
|
|
const ImageFormat = @import("image_info.zig").ImageFormat;
|
|
|
|
/// JPEG marker bytes
|
|
const JPEG_MARKERS = struct {
|
|
const SOI: u8 = 0xD8; // Start of Image
|
|
const EOI: u8 = 0xD9; // End of Image
|
|
const SOS: u8 = 0xDA; // Start of Scan
|
|
const DQT: u8 = 0xDB; // Define Quantization Table
|
|
const DNL: u8 = 0xDC; // Define Number of Lines
|
|
const DRI: u8 = 0xDD; // Define Restart Interval
|
|
const DHT: u8 = 0xC4; // Define Huffman Table
|
|
const DAC: u8 = 0xCC; // Define Arithmetic Coding
|
|
const APP0: u8 = 0xE0; // Application-specific marker 0 (JFIF)
|
|
const APP1: u8 = 0xE1; // Application-specific marker 1 (EXIF)
|
|
const APP2: u8 = 0xE2; // Application-specific marker 2 (ICC)
|
|
const APP14: u8 = 0xEE; // Application-specific marker 14 (Adobe)
|
|
const COM: u8 = 0xFE; // Comment
|
|
|
|
// Start of Frame markers (we need these for image dimensions)
|
|
const SOF0: u8 = 0xC0; // Baseline DCT
|
|
const SOF1: u8 = 0xC1; // Extended sequential DCT
|
|
const SOF2: u8 = 0xC2; // Progressive DCT
|
|
const SOF3: u8 = 0xC3; // Lossless
|
|
const SOF5: u8 = 0xC5; // Differential sequential DCT
|
|
const SOF6: u8 = 0xC6; // Differential progressive DCT
|
|
const SOF7: u8 = 0xC7; // Differential lossless
|
|
const SOF9: u8 = 0xC9; // Extended sequential DCT, arithmetic
|
|
const SOF10: u8 = 0xCA; // Progressive DCT, arithmetic
|
|
const SOF11: u8 = 0xCB; // Lossless, arithmetic
|
|
const SOF13: u8 = 0xCD; // Differential sequential DCT, arithmetic
|
|
const SOF14: u8 = 0xCE; // Differential progressive DCT, arithmetic
|
|
const SOF15: u8 = 0xCF; // Differential lossless, arithmetic
|
|
};
|
|
|
|
pub const JpegError = error{
|
|
InvalidSignature,
|
|
UnexpectedEndOfData,
|
|
NoFrameFound,
|
|
UnsupportedColorSpace,
|
|
};
|
|
|
|
/// Parse JPEG image data and extract metadata for PDF embedding.
|
|
/// The JPEG data is embedded directly without re-encoding.
|
|
pub fn parse(data: []const u8) JpegError!ImageInfo {
|
|
// Validate JPEG signature: FF D8 FF
|
|
if (data.len < 4) return JpegError.InvalidSignature;
|
|
if (data[0] != 0xFF or data[1] != JPEG_MARKERS.SOI or data[2] != 0xFF) {
|
|
return JpegError.InvalidSignature;
|
|
}
|
|
|
|
var width: u32 = 0;
|
|
var height: u32 = 0;
|
|
var components: u8 = 0;
|
|
var bits_per_component: u8 = 8;
|
|
var found_frame = false;
|
|
var is_adobe_cmyk = false;
|
|
|
|
// Parse JPEG markers
|
|
var pos: usize = 2;
|
|
while (pos < data.len - 1) {
|
|
// Find marker (FF xx)
|
|
if (data[pos] != 0xFF) {
|
|
pos += 1;
|
|
continue;
|
|
}
|
|
|
|
// Skip padding FF bytes
|
|
while (pos < data.len and data[pos] == 0xFF) {
|
|
pos += 1;
|
|
}
|
|
|
|
if (pos >= data.len) break;
|
|
|
|
const marker = data[pos];
|
|
pos += 1;
|
|
|
|
// Check for SOF (Start of Frame) markers
|
|
if (isSOFMarker(marker)) {
|
|
if (pos + 7 > data.len) return JpegError.UnexpectedEndOfData;
|
|
|
|
// Skip length bytes
|
|
pos += 2;
|
|
|
|
// Read frame data
|
|
bits_per_component = data[pos];
|
|
pos += 1;
|
|
|
|
height = (@as(u32, data[pos]) << 8) | @as(u32, data[pos + 1]);
|
|
pos += 2;
|
|
|
|
width = (@as(u32, data[pos]) << 8) | @as(u32, data[pos + 1]);
|
|
pos += 2;
|
|
|
|
components = data[pos];
|
|
found_frame = true;
|
|
break;
|
|
}
|
|
|
|
// Check for Adobe APP14 marker (indicates CMYK handling)
|
|
if (marker == JPEG_MARKERS.APP14) {
|
|
if (pos + 2 > data.len) return JpegError.UnexpectedEndOfData;
|
|
const len = (@as(u16, data[pos]) << 8) | @as(u16, data[pos + 1]);
|
|
|
|
// Check for "Adobe" string
|
|
if (len >= 12 and pos + 12 <= data.len) {
|
|
if (std.mem.eql(u8, data[pos + 2 .. pos + 7], "Adobe")) {
|
|
is_adobe_cmyk = true;
|
|
}
|
|
}
|
|
|
|
pos += len;
|
|
continue;
|
|
}
|
|
|
|
// Skip other markers with length
|
|
if (marker != JPEG_MARKERS.SOI and marker != JPEG_MARKERS.EOI and
|
|
marker != 0x00 and (marker < 0xD0 or marker > 0xD7))
|
|
{
|
|
if (pos + 2 > data.len) return JpegError.UnexpectedEndOfData;
|
|
const len = (@as(u16, data[pos]) << 8) | @as(u16, data[pos + 1]);
|
|
pos += len;
|
|
}
|
|
}
|
|
|
|
if (!found_frame) return JpegError.NoFrameFound;
|
|
|
|
// Determine color space from component count
|
|
const color_space: ColorSpace = switch (components) {
|
|
1 => .device_gray,
|
|
3 => .device_rgb,
|
|
4 => .device_cmyk,
|
|
else => return JpegError.UnsupportedColorSpace,
|
|
};
|
|
|
|
return ImageInfo{
|
|
.width = width,
|
|
.height = height,
|
|
.color_space = color_space,
|
|
.bits_per_component = bits_per_component,
|
|
.filter = .dct_decode,
|
|
.data = data, // Direct passthrough - JPEG data is used as-is
|
|
.soft_mask = null, // JPEG doesn't support alpha
|
|
.owns_data = false, // We don't allocate, caller owns the data
|
|
.invert_cmyk = is_adobe_cmyk and color_space == .device_cmyk,
|
|
.format = .jpeg,
|
|
};
|
|
}
|
|
|
|
/// Check if marker is a Start of Frame marker
|
|
fn isSOFMarker(marker: u8) bool {
|
|
return switch (marker) {
|
|
JPEG_MARKERS.SOF0,
|
|
JPEG_MARKERS.SOF1,
|
|
JPEG_MARKERS.SOF2,
|
|
JPEG_MARKERS.SOF3,
|
|
JPEG_MARKERS.SOF5,
|
|
JPEG_MARKERS.SOF6,
|
|
JPEG_MARKERS.SOF7,
|
|
JPEG_MARKERS.SOF9,
|
|
JPEG_MARKERS.SOF10,
|
|
JPEG_MARKERS.SOF11,
|
|
JPEG_MARKERS.SOF13,
|
|
JPEG_MARKERS.SOF14,
|
|
JPEG_MARKERS.SOF15,
|
|
=> true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
// =============================================================================
|
|
// Tests
|
|
// =============================================================================
|
|
|
|
test "parse valid JPEG header" {
|
|
// Minimal valid JPEG with SOF0 marker
|
|
// FF D8 FF E0 [JFIF APP0] FF C0 [SOF0 frame]
|
|
const jpeg_data = [_]u8{
|
|
0xFF, 0xD8, // SOI
|
|
0xFF, 0xE0, // APP0
|
|
0x00, 0x10, // Length 16
|
|
'J', 'F', 'I', 'F', 0x00, // JFIF identifier
|
|
0x01, 0x01, // Version
|
|
0x00, // Units
|
|
0x00, 0x01, // X density
|
|
0x00, 0x01, // Y density
|
|
0x00, 0x00, // Thumbnail
|
|
0xFF, 0xC0, // SOF0
|
|
0x00, 0x0B, // Length 11
|
|
0x08, // Bits per component
|
|
0x00, 0x64, // Height: 100
|
|
0x00, 0xC8, // Width: 200
|
|
0x03, // Components: 3 (RGB)
|
|
0x01, 0x22, 0x00, // Component 1
|
|
0x02, 0x11, 0x01, // Component 2
|
|
0x03, 0x11, 0x01, // Component 3
|
|
};
|
|
|
|
const info = try parse(&jpeg_data);
|
|
|
|
try std.testing.expectEqual(@as(u32, 200), info.width);
|
|
try std.testing.expectEqual(@as(u32, 100), info.height);
|
|
try std.testing.expectEqual(ColorSpace.device_rgb, info.color_space);
|
|
try std.testing.expectEqual(@as(u8, 8), info.bits_per_component);
|
|
try std.testing.expectEqual(ImageFilter.dct_decode, info.filter);
|
|
try std.testing.expect(info.soft_mask == null);
|
|
try std.testing.expectEqual(false, info.owns_data);
|
|
}
|
|
|
|
test "parse grayscale JPEG" {
|
|
const jpeg_data = [_]u8{
|
|
0xFF, 0xD8, // SOI
|
|
0xFF, 0xC0, // SOF0 (directly, no APP0)
|
|
0x00, 0x08, // Length 8
|
|
0x08, // Bits per component
|
|
0x00, 0x32, // Height: 50
|
|
0x00, 0x50, // Width: 80
|
|
0x01, // Components: 1 (Grayscale)
|
|
0x01, 0x11, 0x00, // Component 1
|
|
};
|
|
|
|
const info = try parse(&jpeg_data);
|
|
|
|
try std.testing.expectEqual(@as(u32, 80), info.width);
|
|
try std.testing.expectEqual(@as(u32, 50), info.height);
|
|
try std.testing.expectEqual(ColorSpace.device_gray, info.color_space);
|
|
}
|
|
|
|
test "invalid JPEG signature" {
|
|
const invalid_data = [_]u8{ 0x89, 0x50, 0x4E, 0x47 }; // PNG signature
|
|
const result = parse(&invalid_data);
|
|
try std.testing.expectError(JpegError.InvalidSignature, result);
|
|
}
|
|
|
|
test "JPEG too short" {
|
|
const short_data = [_]u8{ 0xFF, 0xD8 };
|
|
const result = parse(&short_data);
|
|
try std.testing.expectError(JpegError.InvalidSignature, result);
|
|
}
|