From a928fc55fd6d5d69b3958e00a7c11e7cdb6a4b27 Mon Sep 17 00:00:00 2001 From: reugenio Date: Mon, 8 Dec 2025 18:57:20 +0100 Subject: [PATCH] feat: Add Unicode width calculation and terminal capability detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unicode width (src/unicode.zig): - charWidth(): Returns display width of Unicode codepoints (0, 1, or 2) - stringWidth(): Calculates display width of UTF-8 strings - truncateToWidth(): Truncates strings to fit display columns - stringWidthBounded(): Width calculation with byte tracking - Handles CJK (double-width), combining marks (zero-width), control characters, and emoji Terminal capabilities (src/termcap.zig): - detect(): Auto-detects terminal features from environment - ColorSupport: none, basic(8), extended(16), 256, true_color - Detects: hyperlinks, images, clipboard, mouse, unicode - Known terminals: kitty, WezTerm, iTerm, VSCode, Alacritty, etc. - Respects NO_COLOR and FORCE_COLOR env vars - rgbTo256(): Converts RGB to 256-color palette - rgbToBasic(): Converts RGB to 8/16 ANSI colors - adaptColor(): Downgrades colors based on terminal support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/root.zig | 14 ++ src/termcap.zig | 405 ++++++++++++++++++++++++++++++++++++++++++++++++ src/unicode.zig | 330 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 749 insertions(+) create mode 100644 src/termcap.zig create mode 100644 src/unicode.zig diff --git a/src/root.zig b/src/root.zig index b6732e1..8b92743 100644 --- a/src/root.zig +++ b/src/root.zig @@ -282,6 +282,18 @@ pub const SimpleFocusable = focus.SimpleFocusable; pub const theme = @import("theme.zig"); pub const Theme = theme.Theme; +// Unicode width calculation (wcwidth) +pub const unicode = @import("unicode.zig"); +pub const charWidth = unicode.charWidth; +pub const stringWidth = unicode.stringWidth; +pub const truncateToWidth = unicode.truncateToWidth; + +// Terminal capability detection +pub const termcap = @import("termcap.zig"); +pub const Capabilities = termcap.Capabilities; +pub const ColorSupport = termcap.ColorSupport; +pub const detectCapabilities = termcap.detect; + // ============================================================================ // Tests // ============================================================================ @@ -300,6 +312,8 @@ test { _ = @import("cursor.zig"); _ = @import("focus.zig"); _ = @import("theme.zig"); + _ = @import("unicode.zig"); + _ = @import("termcap.zig"); // Comprehensive test suite _ = @import("tests/tests.zig"); diff --git a/src/termcap.zig b/src/termcap.zig new file mode 100644 index 0000000..ac6d5d0 --- /dev/null +++ b/src/termcap.zig @@ -0,0 +1,405 @@ +//! Terminal capability detection. +//! +//! This module detects the color and feature support of the current terminal +//! by examining environment variables and terminal responses. +//! +//! ## Color Support Levels +//! +//! - **No color**: Monochrome terminal +//! - **Basic (8)**: 8 standard colors (black, red, green, yellow, blue, magenta, cyan, white) +//! - **Extended (16)**: 8 colors + 8 bright variants +//! - **256**: 256 color palette (6x6x6 cube + 24 grayscale) +//! - **TrueColor (24-bit)**: Full RGB support (16 million colors) +//! +//! ## Example +//! +//! ```zig +//! const termcap = @import("termcap.zig"); +//! +//! const caps = termcap.detect(); +//! +//! if (caps.color_support.hasTrueColor()) { +//! // Use RGB colors +//! } else if (caps.color_support.has256()) { +//! // Fall back to 256 colors +//! } else { +//! // Use basic colors +//! } +//! ``` + +const std = @import("std"); +const builtin = @import("builtin"); + +/// Level of color support. +pub const ColorSupport = enum(u8) { + /// No color support (monochrome). + none = 0, + /// Basic 8 colors. + basic = 8, + /// 16 colors (8 + bright variants). + extended = 16, + /// 256 color palette. +palette_256 = 255, + /// 24-bit true color (16M colors). + true_color = 254, + + /// Returns true if the terminal supports at least 256 colors. + pub fn has256(self: ColorSupport) bool { + return self == .palette_256 or self == .true_color; + } + + /// Returns true if the terminal supports true color (24-bit RGB). + pub fn hasTrueColor(self: ColorSupport) bool { + return self == .true_color; + } + + /// Returns true if the terminal supports any colors. + pub fn hasColor(self: ColorSupport) bool { + return self != .none; + } + + /// Returns the maximum number of colors supported. + pub fn maxColors(self: ColorSupport) u32 { + return switch (self) { + .none => 1, + .basic => 8, + .extended => 16, + .palette_256 => 256, + .true_color => 16777216, + }; + } +}; + +/// Terminal capabilities. +pub const Capabilities = struct { + /// Color support level. + color_support: ColorSupport = .basic, + /// Terminal name (from TERM). + term_name: ?[]const u8 = null, + /// Terminal program (from TERM_PROGRAM). + term_program: ?[]const u8 = null, + /// Whether the terminal supports Unicode. + unicode: bool = true, + /// Whether the terminal supports hyperlinks (OSC 8). + hyperlinks: bool = false, + /// Whether the terminal supports images (Kitty/Sixel/iTerm2). + images: bool = false, + /// Whether the terminal supports OSC 52 clipboard. + clipboard: bool = false, + /// Whether the terminal supports bracketed paste. + bracketed_paste: bool = true, + /// Whether the terminal supports mouse reporting. + mouse: bool = true, + /// Whether the terminal supports alternate screen buffer. + alternate_screen: bool = true, + /// Whether the terminal supports styled underlines. + styled_underline: bool = false, + + /// Returns true if this is a known modern terminal with good support. + pub fn isModern(self: Capabilities) bool { + return self.color_support.hasTrueColor() and + self.hyperlinks and + self.styled_underline; + } +}; + +/// Known terminal programs and their capabilities. +const KnownTerminal = struct { + name: []const u8, + color: ColorSupport, + hyperlinks: bool = false, + images: bool = false, + clipboard: bool = false, + styled_underline: bool = false, +}; + +const known_terminals = [_]KnownTerminal{ + // Modern terminals with full support + .{ .name = "kitty", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true }, + .{ .name = "WezTerm", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true }, + .{ .name = "iTerm.app", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true }, + .{ .name = "vscode", .color = .true_color, .hyperlinks = true, .clipboard = true, .styled_underline = true }, + .{ .name = "Hyper", .color = .true_color, .hyperlinks = true, .clipboard = true }, + .{ .name = "Alacritty", .color = .true_color, .hyperlinks = true, .clipboard = true }, + .{ .name = "foot", .color = .true_color, .hyperlinks = true, .clipboard = true, .styled_underline = true }, + .{ .name = "contour", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true }, + + // Good terminals + .{ .name = "gnome-terminal", .color = .true_color, .hyperlinks = true }, + .{ .name = "konsole", .color = .true_color, .hyperlinks = true }, + .{ .name = "xfce4-terminal", .color = .true_color, .hyperlinks = true }, + .{ .name = "terminator", .color = .true_color, .hyperlinks = true }, + .{ .name = "tilix", .color = .true_color, .hyperlinks = true }, + .{ .name = "rio", .color = .true_color, .hyperlinks = true, .images = true }, + + // Apple Terminal + .{ .name = "Apple_Terminal", .color = .palette_256 }, + + // tmux/screen (pass-through) + .{ .name = "tmux", .color = .true_color, .clipboard = true }, + .{ .name = "screen", .color = .palette_256 }, + + // Basic terminals + .{ .name = "linux", .color = .basic }, // Linux console + .{ .name = "xterm", .color = .palette_256 }, + .{ .name = "rxvt", .color = .palette_256 }, +}; + +/// Detects terminal capabilities from environment variables. +pub fn detect() Capabilities { + var caps = Capabilities{}; + + // Get TERM + caps.term_name = std.posix.getenv("TERM"); + + // Get TERM_PROGRAM + caps.term_program = std.posix.getenv("TERM_PROGRAM"); + + // Check for known terminal programs first + if (caps.term_program) |prog| { + for (known_terminals) |kt| { + if (std.mem.eql(u8, prog, kt.name)) { + caps.color_support = kt.color; + caps.hyperlinks = kt.hyperlinks; + caps.images = kt.images; + caps.clipboard = kt.clipboard; + caps.styled_underline = kt.styled_underline; + return caps; + } + } + } + + // Check COLORTERM for true color + if (std.posix.getenv("COLORTERM")) |colorterm| { + if (std.mem.eql(u8, colorterm, "truecolor") or std.mem.eql(u8, colorterm, "24bit")) { + caps.color_support = .true_color; + } + } + + // Check TERM for color hints + if (caps.term_name) |term| { + // True color indicators + if (std.mem.indexOf(u8, term, "truecolor") != null or + std.mem.indexOf(u8, term, "24bit") != null or + std.mem.indexOf(u8, term, "direct") != null) + { + caps.color_support = .true_color; + } + // 256 color indicators + else if (std.mem.indexOf(u8, term, "256color") != null or + std.mem.indexOf(u8, term, "256") != null) + { + if (caps.color_support != .true_color) { + caps.color_support = .palette_256; + } + } + // Known terminal types + else { + for (known_terminals) |kt| { + if (std.mem.startsWith(u8, term, kt.name)) { + if (@intFromEnum(kt.color) > @intFromEnum(caps.color_support)) { + caps.color_support = kt.color; + } + caps.hyperlinks = caps.hyperlinks or kt.hyperlinks; + caps.images = caps.images or kt.images; + caps.clipboard = caps.clipboard or kt.clipboard; + break; + } + } + } + } + + // Check for specific feature environment variables + if (std.posix.getenv("KITTY_WINDOW_ID") != null) { + caps.color_support = .true_color; + caps.hyperlinks = true; + caps.images = true; + caps.clipboard = true; + caps.styled_underline = true; + } + + if (std.posix.getenv("WEZTERM_PANE") != null) { + caps.color_support = .true_color; + caps.hyperlinks = true; + caps.images = true; + caps.clipboard = true; + caps.styled_underline = true; + } + + if (std.posix.getenv("ITERM_SESSION_ID") != null) { + caps.color_support = .true_color; + caps.hyperlinks = true; + caps.images = true; + caps.clipboard = true; + } + + if (std.posix.getenv("VSCODE_INJECTION") != null or + std.posix.getenv("TERM_PROGRAM_VERSION") != null and caps.term_program != null and + std.mem.eql(u8, caps.term_program.?, "vscode")) + { + caps.color_support = .true_color; + caps.hyperlinks = true; + caps.clipboard = true; + } + + // Check for NO_COLOR environment variable (https://no-color.org/) + if (std.posix.getenv("NO_COLOR") != null) { + caps.color_support = .none; + } + + // Check for FORCE_COLOR environment variable + if (std.posix.getenv("FORCE_COLOR")) |force| { + if (force.len == 0 or std.mem.eql(u8, force, "1") or std.mem.eql(u8, force, "true")) { + if (caps.color_support == .none) { + caps.color_support = .basic; + } + } else if (std.mem.eql(u8, force, "2")) { + caps.color_support = .palette_256; + } else if (std.mem.eql(u8, force, "3")) { + caps.color_support = .true_color; + } + } + + // Check Unicode support via LANG/LC_ALL + const lang = std.posix.getenv("LC_ALL") orelse std.posix.getenv("LC_CTYPE") orelse std.posix.getenv("LANG"); + if (lang) |l| { + caps.unicode = std.mem.indexOf(u8, l, "UTF-8") != null or + std.mem.indexOf(u8, l, "utf-8") != null or + std.mem.indexOf(u8, l, "UTF8") != null or + std.mem.indexOf(u8, l, "utf8") != null; + } + + return caps; +} + +/// Returns a color value appropriate for the terminal's color support level. +/// If the terminal doesn't support the given color depth, it will be +/// downgraded to a supported format. +pub fn adaptColor(caps: Capabilities, r: u8, g: u8, b: u8) union(enum) { + rgb: struct { r: u8, g: u8, b: u8 }, + palette: u8, + basic: u8, + none: void, +} { + return switch (caps.color_support) { + .true_color => .{ .rgb = .{ .r = r, .g = g, .b = b } }, + .palette_256 => .{ .palette = rgbTo256(r, g, b) }, + .extended => .{ .basic = rgbToBasic(r, g, b, true) }, + .basic => .{ .basic = rgbToBasic(r, g, b, false) }, + .none => .{ .none = {} }, + }; +} + +/// Converts RGB to the closest 256-color palette index. +pub fn rgbTo256(r: u8, g: u8, b: u8) u8 { + // Check if it's a grayscale + if (r == g and g == b) { + if (r < 8) return 16; // black + if (r > 248) return 231; // white + return @intCast((((@as(u16, r) - 8) * 24) / 240) + 232); + } + + // Convert to 6x6x6 cube + const r6: u8 = @intCast(((@as(u16, r) * 6) / 256)); + const g6: u8 = @intCast(((@as(u16, g) * 6) / 256)); + const b6: u8 = @intCast(((@as(u16, b) * 6) / 256)); + + return 16 + 36 * r6 + 6 * g6 + b6; +} + +/// Converts RGB to basic 8/16 color index. +pub fn rgbToBasic(r: u8, g: u8, b: u8, bright_support: bool) u8 { + // Determine "brightness" based on max channel value + const max_channel = @max(r, @max(g, b)); + const is_bright = bright_support and max_channel > 170; + + // Threshold for color detection + const threshold: u8 = 85; + + var color: u8 = 0; + if (r >= threshold) color |= 1; // red + if (g >= threshold) color |= 2; // green + if (b >= threshold) color |= 4; // blue + + // Calculate luminance for black/white decision + const lum: u32 = (@as(u32, r) * 299 + @as(u32, g) * 587 + @as(u32, b) * 114) / 1000; + + // Map to ANSI colors + const base: u8 = switch (color) { + 0 => if (lum > 64) 7 else 0, // black/white based on luminance + 1 => 1, // red + 2 => 2, // green + 3 => 3, // yellow + 4 => 4, // blue + 5 => 5, // magenta + 6 => 6, // cyan + 7 => 7, // white + else => 7, + }; + + return if (is_bright) base + 8 else base; +} + +// ============================================================================ +// Tests +// ============================================================================ + +test "ColorSupport methods" { + try std.testing.expect(ColorSupport.true_color.hasTrueColor()); + try std.testing.expect(ColorSupport.true_color.has256()); + try std.testing.expect(ColorSupport.true_color.hasColor()); + + try std.testing.expect(!ColorSupport.palette_256.hasTrueColor()); + try std.testing.expect(ColorSupport.palette_256.has256()); + try std.testing.expect(ColorSupport.palette_256.hasColor()); + + try std.testing.expect(!ColorSupport.basic.hasTrueColor()); + try std.testing.expect(!ColorSupport.basic.has256()); + try std.testing.expect(ColorSupport.basic.hasColor()); + + try std.testing.expect(!ColorSupport.none.hasColor()); +} + +test "maxColors" { + try std.testing.expectEqual(@as(u32, 16777216), ColorSupport.true_color.maxColors()); + try std.testing.expectEqual(@as(u32, 256), ColorSupport.palette_256.maxColors()); + try std.testing.expectEqual(@as(u32, 16), ColorSupport.extended.maxColors()); + try std.testing.expectEqual(@as(u32, 8), ColorSupport.basic.maxColors()); + try std.testing.expectEqual(@as(u32, 1), ColorSupport.none.maxColors()); +} + +test "rgbTo256 grayscale" { + try std.testing.expectEqual(@as(u8, 16), rgbTo256(0, 0, 0)); + try std.testing.expectEqual(@as(u8, 231), rgbTo256(255, 255, 255)); +} + +test "rgbTo256 colors" { + // Pure red should be in the cube + const red = rgbTo256(255, 0, 0); + try std.testing.expect(red >= 16 and red <= 231); + + // Pure green + const green = rgbTo256(0, 255, 0); + try std.testing.expect(green >= 16 and green <= 231); + + // Pure blue + const blue = rgbTo256(0, 0, 255); + try std.testing.expect(blue >= 16 and blue <= 231); +} + +test "rgbToBasic" { + // Black + try std.testing.expectEqual(@as(u8, 0), rgbToBasic(0, 0, 0, false)); + // White + try std.testing.expectEqual(@as(u8, 7), rgbToBasic(255, 255, 255, false)); + // Red + try std.testing.expectEqual(@as(u8, 1), rgbToBasic(255, 0, 0, false)); + // Bright red + try std.testing.expectEqual(@as(u8, 9), rgbToBasic(255, 0, 0, true)); +} + +test "detect returns valid capabilities" { + const caps = detect(); + // Just verify it doesn't crash and returns something valid + try std.testing.expect(@intFromEnum(caps.color_support) <= 255); +} diff --git a/src/unicode.zig b/src/unicode.zig new file mode 100644 index 0000000..6ce0d7e --- /dev/null +++ b/src/unicode.zig @@ -0,0 +1,330 @@ +//! Unicode width calculation for TUI rendering. +//! +//! This module provides functions to calculate the display width of Unicode +//! characters and strings, essential for proper text alignment in terminal UIs. +//! +//! Most characters are single-width (1 cell), but: +//! - CJK characters are double-width (2 cells) +//! - Combining characters are zero-width (0 cells) +//! - Control characters are zero-width (0 cells) +//! - Some emojis are double-width (2 cells) +//! +//! ## Example +//! +//! ```zig +//! const unicode = @import("unicode.zig"); +//! +//! // Single-width ASCII +//! try testing.expectEqual(@as(usize, 5), unicode.stringWidth("Hello")); +//! +//! // Double-width CJK +//! try testing.expectEqual(@as(usize, 4), unicode.stringWidth("日本")); +//! +//! // Mixed content +//! try testing.expectEqual(@as(usize, 9), unicode.stringWidth("Hello日本")); +//! ``` + +const std = @import("std"); + +/// Returns the display width of a Unicode codepoint. +/// +/// - Returns 0 for control characters and combining marks +/// - Returns 1 for most characters (ASCII, Latin, etc.) +/// - Returns 2 for wide characters (CJK, some emojis) +/// - Returns -1 for non-printable characters (use 0 in most cases) +pub fn charWidth(codepoint: u21) i8 { + // Control characters (C0 and DEL) + if (codepoint < 0x20 or codepoint == 0x7F) { + return 0; + } + + // C1 control characters + if (codepoint >= 0x80 and codepoint < 0xA0) { + return 0; + } + + // Combining characters (zero-width) + if (isCombining(codepoint)) { + return 0; + } + + // Zero-width characters + if (isZeroWidth(codepoint)) { + return 0; + } + + // Wide characters (CJK, etc.) + if (isWide(codepoint)) { + return 2; + } + + // Default: single width + return 1; +} + +/// Returns the display width of a Unicode codepoint as usize. +/// Non-printable characters return 0. +pub fn charWidthUnsigned(codepoint: u21) usize { + const w = charWidth(codepoint); + return if (w < 0) 0 else @intCast(w); +} + +/// Calculates the display width of a UTF-8 encoded string. +pub fn stringWidth(str: []const u8) usize { + var width: usize = 0; + var iter = std.unicode.Utf8Iterator{ .bytes = str, .i = 0 }; + + while (iter.nextCodepoint()) |cp| { + width += charWidthUnsigned(cp); + } + + return width; +} + +/// Calculates the display width of a UTF-8 string, stopping at max_width. +/// Returns the number of bytes consumed and the display width. +pub fn stringWidthBounded(str: []const u8, max_width: usize) struct { bytes: usize, width: usize } { + var width: usize = 0; + var byte_pos: usize = 0; + var iter = std.unicode.Utf8Iterator{ .bytes = str, .i = 0 }; + + while (iter.nextCodepoint()) |cp| { + const cw = charWidthUnsigned(cp); + if (width + cw > max_width) break; + width += cw; + byte_pos = iter.i; + } + + return .{ .bytes = byte_pos, .width = width }; +} + +/// Truncates a string to fit within max_width display columns. +/// Returns a slice of the original string. +pub fn truncateToWidth(str: []const u8, max_width: usize) []const u8 { + const result = stringWidthBounded(str, max_width); + return str[0..result.bytes]; +} + +/// Pads a string to exactly the specified width. +/// If the string is wider, it is truncated. +/// Returns a new slice (or the original if no padding needed). +pub fn padToWidth(allocator: std.mem.Allocator, str: []const u8, target_width: usize) ![]u8 { + const current_width = stringWidth(str); + + if (current_width >= target_width) { + // Truncate if needed + const truncated = truncateToWidth(str, target_width); + const result = try allocator.alloc(u8, truncated.len); + @memcpy(result, truncated); + return result; + } + + // Pad with spaces + const padding = target_width - current_width; + const result = try allocator.alloc(u8, str.len + padding); + @memcpy(result[0..str.len], str); + @memset(result[str.len..], ' '); + return result; +} + +/// Checks if a codepoint is a combining character (zero-width). +fn isCombining(cp: u21) bool { + // Combining Diacritical Marks + if (cp >= 0x0300 and cp <= 0x036F) return true; + // Combining Diacritical Marks Extended + if (cp >= 0x1AB0 and cp <= 0x1AFF) return true; + // Combining Diacritical Marks Supplement + if (cp >= 0x1DC0 and cp <= 0x1DFF) return true; + // Combining Diacritical Marks for Symbols + if (cp >= 0x20D0 and cp <= 0x20FF) return true; + // Combining Half Marks + if (cp >= 0xFE20 and cp <= 0xFE2F) return true; + + // Thai combining marks + if (cp >= 0x0E31 and cp <= 0x0E3A) return true; + if (cp >= 0x0E47 and cp <= 0x0E4E) return true; + + // Hebrew combining marks + if (cp >= 0x0591 and cp <= 0x05BD) return true; + if (cp == 0x05BF or cp == 0x05C1 or cp == 0x05C2 or cp == 0x05C4 or cp == 0x05C5 or cp == 0x05C7) return true; + + // Arabic combining marks + if (cp >= 0x0610 and cp <= 0x061A) return true; + if (cp >= 0x064B and cp <= 0x065F) return true; + if (cp == 0x0670) return true; + if (cp >= 0x06D6 and cp <= 0x06DC) return true; + if (cp >= 0x06DF and cp <= 0x06E4) return true; + if (cp >= 0x06E7 and cp <= 0x06E8) return true; + if (cp >= 0x06EA and cp <= 0x06ED) return true; + + // Variation selectors + if (cp >= 0xFE00 and cp <= 0xFE0F) return true; + if (cp >= 0xE0100 and cp <= 0xE01EF) return true; + + return false; +} + +/// Checks if a codepoint is zero-width (but not combining). +fn isZeroWidth(cp: u21) bool { + // Soft hyphen + if (cp == 0x00AD) return true; + // Zero-width space + if (cp == 0x200B) return true; + // Zero-width non-joiner + if (cp == 0x200C) return true; + // Zero-width joiner + if (cp == 0x200D) return true; + // Word joiner + if (cp == 0x2060) return true; + // Zero-width no-break space (BOM when not at start) + if (cp == 0xFEFF) return true; + + // Default ignorables + if (cp >= 0x2060 and cp <= 0x206F) return true; + + // Hangul fillers + if (cp == 0x115F or cp == 0x1160) return true; + if (cp >= 0x3164 and cp <= 0x3164) return true; + if (cp == 0xFFA0) return true; + + return false; +} + +/// Checks if a codepoint is a wide character (2 cells). +fn isWide(cp: u21) bool { + // CJK Radicals Supplement + if (cp >= 0x2E80 and cp <= 0x2EFF) return true; + // Kangxi Radicals + if (cp >= 0x2F00 and cp <= 0x2FDF) return true; + // CJK Symbols and Punctuation + if (cp >= 0x3000 and cp <= 0x303F) return true; + // Hiragana + if (cp >= 0x3040 and cp <= 0x309F) return true; + // Katakana + if (cp >= 0x30A0 and cp <= 0x30FF) return true; + // Bopomofo + if (cp >= 0x3100 and cp <= 0x312F) return true; + // Hangul Compatibility Jamo + if (cp >= 0x3130 and cp <= 0x318F) return true; + // Kanbun + if (cp >= 0x3190 and cp <= 0x319F) return true; + // Bopomofo Extended + if (cp >= 0x31A0 and cp <= 0x31BF) return true; + // CJK Strokes + if (cp >= 0x31C0 and cp <= 0x31EF) return true; + // Katakana Phonetic Extensions + if (cp >= 0x31F0 and cp <= 0x31FF) return true; + // Enclosed CJK Letters and Months + if (cp >= 0x3200 and cp <= 0x32FF) return true; + // CJK Compatibility + if (cp >= 0x3300 and cp <= 0x33FF) return true; + // CJK Unified Ideographs Extension A + if (cp >= 0x3400 and cp <= 0x4DBF) return true; + // CJK Unified Ideographs + if (cp >= 0x4E00 and cp <= 0x9FFF) return true; + // Yi Syllables + if (cp >= 0xA000 and cp <= 0xA48F) return true; + // Yi Radicals + if (cp >= 0xA490 and cp <= 0xA4CF) return true; + // Hangul Syllables + if (cp >= 0xAC00 and cp <= 0xD7AF) return true; + // CJK Compatibility Ideographs + if (cp >= 0xF900 and cp <= 0xFAFF) return true; + // Halfwidth and Fullwidth Forms (fullwidth only) + if (cp >= 0xFF00 and cp <= 0xFF60) return true; + if (cp >= 0xFFE0 and cp <= 0xFFE6) return true; + // CJK Unified Ideographs Extension B-F + if (cp >= 0x20000 and cp <= 0x2A6DF) return true; + if (cp >= 0x2A700 and cp <= 0x2B73F) return true; + if (cp >= 0x2B740 and cp <= 0x2B81F) return true; + if (cp >= 0x2B820 and cp <= 0x2CEAF) return true; + if (cp >= 0x2CEB0 and cp <= 0x2EBEF) return true; + if (cp >= 0x30000 and cp <= 0x3134F) return true; + + // Some emoji are wide + // Emoji modifiers and ZWJ sequences handled separately + // Basic wide emoji ranges + if (cp >= 0x1F300 and cp <= 0x1F64F) return true; // Misc Symbols and Pictographs + Emoticons + if (cp >= 0x1F680 and cp <= 0x1F6FF) return true; // Transport and Map Symbols + if (cp >= 0x1F900 and cp <= 0x1F9FF) return true; // Supplemental Symbols and Pictographs + if (cp >= 0x1FA00 and cp <= 0x1FA6F) return true; // Chess Symbols + if (cp >= 0x1FA70 and cp <= 0x1FAFF) return true; // Symbols and Pictographs Extended-A + + return false; +} + +// ============================================================================ +// Tests +// ============================================================================ + +test "ASCII characters are single-width" { + try std.testing.expectEqual(@as(i8, 1), charWidth('a')); + try std.testing.expectEqual(@as(i8, 1), charWidth('Z')); + try std.testing.expectEqual(@as(i8, 1), charWidth('0')); + try std.testing.expectEqual(@as(i8, 1), charWidth('!')); + try std.testing.expectEqual(@as(i8, 1), charWidth(' ')); +} + +test "Control characters are zero-width" { + try std.testing.expectEqual(@as(i8, 0), charWidth(0x00)); // NUL + try std.testing.expectEqual(@as(i8, 0), charWidth(0x0A)); // LF + try std.testing.expectEqual(@as(i8, 0), charWidth(0x0D)); // CR + try std.testing.expectEqual(@as(i8, 0), charWidth(0x1B)); // ESC + try std.testing.expectEqual(@as(i8, 0), charWidth(0x7F)); // DEL +} + +test "CJK characters are double-width" { + try std.testing.expectEqual(@as(i8, 2), charWidth(0x4E2D)); // 中 + try std.testing.expectEqual(@as(i8, 2), charWidth(0x6587)); // 文 + try std.testing.expectEqual(@as(i8, 2), charWidth(0x65E5)); // 日 + try std.testing.expectEqual(@as(i8, 2), charWidth(0x672C)); // 本 +} + +test "Hiragana/Katakana are double-width" { + try std.testing.expectEqual(@as(i8, 2), charWidth(0x3042)); // あ + try std.testing.expectEqual(@as(i8, 2), charWidth(0x30A2)); // ア +} + +test "Combining characters are zero-width" { + try std.testing.expectEqual(@as(i8, 0), charWidth(0x0301)); // combining acute + try std.testing.expectEqual(@as(i8, 0), charWidth(0x0308)); // combining diaeresis +} + +test "Zero-width characters" { + try std.testing.expectEqual(@as(i8, 0), charWidth(0x200B)); // ZWSP + try std.testing.expectEqual(@as(i8, 0), charWidth(0x200D)); // ZWJ + try std.testing.expectEqual(@as(i8, 0), charWidth(0xFEFF)); // BOM +} + +test "stringWidth for ASCII" { + try std.testing.expectEqual(@as(usize, 5), stringWidth("Hello")); + try std.testing.expectEqual(@as(usize, 0), stringWidth("")); + try std.testing.expectEqual(@as(usize, 1), stringWidth("a")); +} + +test "stringWidth for CJK" { + try std.testing.expectEqual(@as(usize, 4), stringWidth("日本")); + try std.testing.expectEqual(@as(usize, 6), stringWidth("中文字")); +} + +test "stringWidth for mixed content" { + try std.testing.expectEqual(@as(usize, 9), stringWidth("Hello日本")); + try std.testing.expectEqual(@as(usize, 7), stringWidth("a日b本c")); +} + +test "truncateToWidth" { + const result = truncateToWidth("Hello World", 5); + try std.testing.expectEqualStrings("Hello", result); +} + +test "truncateToWidth with CJK" { + // "日本" = 4 width, truncate to 3 should give "日" (2 width) + const result = truncateToWidth("日本語", 3); + try std.testing.expectEqual(@as(usize, 3), result.len); // 日 is 3 bytes +} + +test "stringWidthBounded" { + const result = stringWidthBounded("Hello World", 5); + try std.testing.expectEqual(@as(usize, 5), result.bytes); + try std.testing.expectEqual(@as(usize, 5), result.width); +}