//! JPEG image parser for PDF embedding //! //! JPEG images can be embedded directly in PDF using DCTDecode filter. //! This parser extracts the necessary metadata (dimensions, color space) //! from the JPEG header without decoding the image data. const std = @import("std"); const ImageInfo = @import("image_info.zig").ImageInfo; const ColorSpace = @import("image_info.zig").ColorSpace; const ImageFilter = @import("image_info.zig").ImageFilter; const ImageFormat = @import("image_info.zig").ImageFormat; /// JPEG marker bytes const JPEG_MARKERS = struct { const SOI: u8 = 0xD8; // Start of Image const EOI: u8 = 0xD9; // End of Image const SOS: u8 = 0xDA; // Start of Scan const DQT: u8 = 0xDB; // Define Quantization Table const DNL: u8 = 0xDC; // Define Number of Lines const DRI: u8 = 0xDD; // Define Restart Interval const DHT: u8 = 0xC4; // Define Huffman Table const DAC: u8 = 0xCC; // Define Arithmetic Coding const APP0: u8 = 0xE0; // Application-specific marker 0 (JFIF) const APP1: u8 = 0xE1; // Application-specific marker 1 (EXIF) const APP2: u8 = 0xE2; // Application-specific marker 2 (ICC) const APP14: u8 = 0xEE; // Application-specific marker 14 (Adobe) const COM: u8 = 0xFE; // Comment // Start of Frame markers (we need these for image dimensions) const SOF0: u8 = 0xC0; // Baseline DCT const SOF1: u8 = 0xC1; // Extended sequential DCT const SOF2: u8 = 0xC2; // Progressive DCT const SOF3: u8 = 0xC3; // Lossless const SOF5: u8 = 0xC5; // Differential sequential DCT const SOF6: u8 = 0xC6; // Differential progressive DCT const SOF7: u8 = 0xC7; // Differential lossless const SOF9: u8 = 0xC9; // Extended sequential DCT, arithmetic const SOF10: u8 = 0xCA; // Progressive DCT, arithmetic const SOF11: u8 = 0xCB; // Lossless, arithmetic const SOF13: u8 = 0xCD; // Differential sequential DCT, arithmetic const SOF14: u8 = 0xCE; // Differential progressive DCT, arithmetic const SOF15: u8 = 0xCF; // Differential lossless, arithmetic }; pub const JpegError = error{ InvalidSignature, UnexpectedEndOfData, NoFrameFound, UnsupportedColorSpace, }; /// Parse JPEG image data and extract metadata for PDF embedding. /// The JPEG data is embedded directly without re-encoding. pub fn parse(data: []const u8) JpegError!ImageInfo { // Validate JPEG signature: FF D8 FF if (data.len < 4) return JpegError.InvalidSignature; if (data[0] != 0xFF or data[1] != JPEG_MARKERS.SOI or data[2] != 0xFF) { return JpegError.InvalidSignature; } var width: u32 = 0; var height: u32 = 0; var components: u8 = 0; var bits_per_component: u8 = 8; var found_frame = false; var is_adobe_cmyk = false; // Parse JPEG markers var pos: usize = 2; while (pos < data.len - 1) { // Find marker (FF xx) if (data[pos] != 0xFF) { pos += 1; continue; } // Skip padding FF bytes while (pos < data.len and data[pos] == 0xFF) { pos += 1; } if (pos >= data.len) break; const marker = data[pos]; pos += 1; // Check for SOF (Start of Frame) markers if (isSOFMarker(marker)) { if (pos + 7 > data.len) return JpegError.UnexpectedEndOfData; // Skip length bytes pos += 2; // Read frame data bits_per_component = data[pos]; pos += 1; height = (@as(u32, data[pos]) << 8) | @as(u32, data[pos + 1]); pos += 2; width = (@as(u32, data[pos]) << 8) | @as(u32, data[pos + 1]); pos += 2; components = data[pos]; found_frame = true; break; } // Check for Adobe APP14 marker (indicates CMYK handling) if (marker == JPEG_MARKERS.APP14) { if (pos + 2 > data.len) return JpegError.UnexpectedEndOfData; const len = (@as(u16, data[pos]) << 8) | @as(u16, data[pos + 1]); // Check for "Adobe" string if (len >= 12 and pos + 12 <= data.len) { if (std.mem.eql(u8, data[pos + 2 .. pos + 7], "Adobe")) { is_adobe_cmyk = true; } } pos += len; continue; } // Skip other markers with length if (marker != JPEG_MARKERS.SOI and marker != JPEG_MARKERS.EOI and marker != 0x00 and (marker < 0xD0 or marker > 0xD7)) { if (pos + 2 > data.len) return JpegError.UnexpectedEndOfData; const len = (@as(u16, data[pos]) << 8) | @as(u16, data[pos + 1]); pos += len; } } if (!found_frame) return JpegError.NoFrameFound; // Determine color space from component count const color_space: ColorSpace = switch (components) { 1 => .device_gray, 3 => .device_rgb, 4 => .device_cmyk, else => return JpegError.UnsupportedColorSpace, }; return ImageInfo{ .width = width, .height = height, .color_space = color_space, .bits_per_component = bits_per_component, .filter = .dct_decode, .data = data, // Direct passthrough - JPEG data is used as-is .soft_mask = null, // JPEG doesn't support alpha .owns_data = false, // We don't allocate, caller owns the data .invert_cmyk = is_adobe_cmyk and color_space == .device_cmyk, .format = .jpeg, }; } /// Check if marker is a Start of Frame marker fn isSOFMarker(marker: u8) bool { return switch (marker) { JPEG_MARKERS.SOF0, JPEG_MARKERS.SOF1, JPEG_MARKERS.SOF2, JPEG_MARKERS.SOF3, JPEG_MARKERS.SOF5, JPEG_MARKERS.SOF6, JPEG_MARKERS.SOF7, JPEG_MARKERS.SOF9, JPEG_MARKERS.SOF10, JPEG_MARKERS.SOF11, JPEG_MARKERS.SOF13, JPEG_MARKERS.SOF14, JPEG_MARKERS.SOF15, => true, else => false, }; } // ============================================================================= // Tests // ============================================================================= test "parse valid JPEG header" { // Minimal valid JPEG with SOF0 marker // FF D8 FF E0 [JFIF APP0] FF C0 [SOF0 frame] const jpeg_data = [_]u8{ 0xFF, 0xD8, // SOI 0xFF, 0xE0, // APP0 0x00, 0x10, // Length 16 'J', 'F', 'I', 'F', 0x00, // JFIF identifier 0x01, 0x01, // Version 0x00, // Units 0x00, 0x01, // X density 0x00, 0x01, // Y density 0x00, 0x00, // Thumbnail 0xFF, 0xC0, // SOF0 0x00, 0x0B, // Length 11 0x08, // Bits per component 0x00, 0x64, // Height: 100 0x00, 0xC8, // Width: 200 0x03, // Components: 3 (RGB) 0x01, 0x22, 0x00, // Component 1 0x02, 0x11, 0x01, // Component 2 0x03, 0x11, 0x01, // Component 3 }; const info = try parse(&jpeg_data); try std.testing.expectEqual(@as(u32, 200), info.width); try std.testing.expectEqual(@as(u32, 100), info.height); try std.testing.expectEqual(ColorSpace.device_rgb, info.color_space); try std.testing.expectEqual(@as(u8, 8), info.bits_per_component); try std.testing.expectEqual(ImageFilter.dct_decode, info.filter); try std.testing.expect(info.soft_mask == null); try std.testing.expectEqual(false, info.owns_data); } test "parse grayscale JPEG" { const jpeg_data = [_]u8{ 0xFF, 0xD8, // SOI 0xFF, 0xC0, // SOF0 (directly, no APP0) 0x00, 0x08, // Length 8 0x08, // Bits per component 0x00, 0x32, // Height: 50 0x00, 0x50, // Width: 80 0x01, // Components: 1 (Grayscale) 0x01, 0x11, 0x00, // Component 1 }; const info = try parse(&jpeg_data); try std.testing.expectEqual(@as(u32, 80), info.width); try std.testing.expectEqual(@as(u32, 50), info.height); try std.testing.expectEqual(ColorSpace.device_gray, info.color_space); } test "invalid JPEG signature" { const invalid_data = [_]u8{ 0x89, 0x50, 0x4E, 0x47 }; // PNG signature const result = parse(&invalid_data); try std.testing.expectError(JpegError.InvalidSignature, result); } test "JPEG too short" { const short_data = [_]u8{ 0xFF, 0xD8 }; const result = parse(&short_data); try std.testing.expectError(JpegError.InvalidSignature, result); }