feat: Add Unicode width calculation and terminal capability detection

Unicode width (src/unicode.zig):
- charWidth(): Returns display width of Unicode codepoints (0, 1, or 2)
- stringWidth(): Calculates display width of UTF-8 strings
- truncateToWidth(): Truncates strings to fit display columns
- stringWidthBounded(): Width calculation with byte tracking
- Handles CJK (double-width), combining marks (zero-width),
  control characters, and emoji

Terminal capabilities (src/termcap.zig):
- detect(): Auto-detects terminal features from environment
- ColorSupport: none, basic(8), extended(16), 256, true_color
- Detects: hyperlinks, images, clipboard, mouse, unicode
- Known terminals: kitty, WezTerm, iTerm, VSCode, Alacritty, etc.
- Respects NO_COLOR and FORCE_COLOR env vars
- rgbTo256(): Converts RGB to 256-color palette
- rgbToBasic(): Converts RGB to 8/16 ANSI colors
- adaptColor(): Downgrades colors based on terminal support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
reugenio 2025-12-08 18:57:20 +01:00
parent 96810d80ea
commit a928fc55fd
3 changed files with 749 additions and 0 deletions

View file

@ -282,6 +282,18 @@ pub const SimpleFocusable = focus.SimpleFocusable;
pub const theme = @import("theme.zig");
pub const Theme = theme.Theme;
// Unicode width calculation (wcwidth)
pub const unicode = @import("unicode.zig");
pub const charWidth = unicode.charWidth;
pub const stringWidth = unicode.stringWidth;
pub const truncateToWidth = unicode.truncateToWidth;
// Terminal capability detection
pub const termcap = @import("termcap.zig");
pub const Capabilities = termcap.Capabilities;
pub const ColorSupport = termcap.ColorSupport;
pub const detectCapabilities = termcap.detect;
// ============================================================================
// Tests
// ============================================================================
@ -300,6 +312,8 @@ test {
_ = @import("cursor.zig");
_ = @import("focus.zig");
_ = @import("theme.zig");
_ = @import("unicode.zig");
_ = @import("termcap.zig");
// Comprehensive test suite
_ = @import("tests/tests.zig");

405
src/termcap.zig Normal file
View file

@ -0,0 +1,405 @@
//! Terminal capability detection.
//!
//! This module detects the color and feature support of the current terminal
//! by examining environment variables and terminal responses.
//!
//! ## Color Support Levels
//!
//! - **No color**: Monochrome terminal
//! - **Basic (8)**: 8 standard colors (black, red, green, yellow, blue, magenta, cyan, white)
//! - **Extended (16)**: 8 colors + 8 bright variants
//! - **256**: 256 color palette (6x6x6 cube + 24 grayscale)
//! - **TrueColor (24-bit)**: Full RGB support (16 million colors)
//!
//! ## Example
//!
//! ```zig
//! const termcap = @import("termcap.zig");
//!
//! const caps = termcap.detect();
//!
//! if (caps.color_support.hasTrueColor()) {
//! // Use RGB colors
//! } else if (caps.color_support.has256()) {
//! // Fall back to 256 colors
//! } else {
//! // Use basic colors
//! }
//! ```
const std = @import("std");
const builtin = @import("builtin");
/// Level of color support.
pub const ColorSupport = enum(u8) {
/// No color support (monochrome).
none = 0,
/// Basic 8 colors.
basic = 8,
/// 16 colors (8 + bright variants).
extended = 16,
/// 256 color palette.
palette_256 = 255,
/// 24-bit true color (16M colors).
true_color = 254,
/// Returns true if the terminal supports at least 256 colors.
pub fn has256(self: ColorSupport) bool {
return self == .palette_256 or self == .true_color;
}
/// Returns true if the terminal supports true color (24-bit RGB).
pub fn hasTrueColor(self: ColorSupport) bool {
return self == .true_color;
}
/// Returns true if the terminal supports any colors.
pub fn hasColor(self: ColorSupport) bool {
return self != .none;
}
/// Returns the maximum number of colors supported.
pub fn maxColors(self: ColorSupport) u32 {
return switch (self) {
.none => 1,
.basic => 8,
.extended => 16,
.palette_256 => 256,
.true_color => 16777216,
};
}
};
/// Terminal capabilities.
pub const Capabilities = struct {
/// Color support level.
color_support: ColorSupport = .basic,
/// Terminal name (from TERM).
term_name: ?[]const u8 = null,
/// Terminal program (from TERM_PROGRAM).
term_program: ?[]const u8 = null,
/// Whether the terminal supports Unicode.
unicode: bool = true,
/// Whether the terminal supports hyperlinks (OSC 8).
hyperlinks: bool = false,
/// Whether the terminal supports images (Kitty/Sixel/iTerm2).
images: bool = false,
/// Whether the terminal supports OSC 52 clipboard.
clipboard: bool = false,
/// Whether the terminal supports bracketed paste.
bracketed_paste: bool = true,
/// Whether the terminal supports mouse reporting.
mouse: bool = true,
/// Whether the terminal supports alternate screen buffer.
alternate_screen: bool = true,
/// Whether the terminal supports styled underlines.
styled_underline: bool = false,
/// Returns true if this is a known modern terminal with good support.
pub fn isModern(self: Capabilities) bool {
return self.color_support.hasTrueColor() and
self.hyperlinks and
self.styled_underline;
}
};
/// Known terminal programs and their capabilities.
const KnownTerminal = struct {
name: []const u8,
color: ColorSupport,
hyperlinks: bool = false,
images: bool = false,
clipboard: bool = false,
styled_underline: bool = false,
};
const known_terminals = [_]KnownTerminal{
// Modern terminals with full support
.{ .name = "kitty", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
.{ .name = "WezTerm", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
.{ .name = "iTerm.app", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
.{ .name = "vscode", .color = .true_color, .hyperlinks = true, .clipboard = true, .styled_underline = true },
.{ .name = "Hyper", .color = .true_color, .hyperlinks = true, .clipboard = true },
.{ .name = "Alacritty", .color = .true_color, .hyperlinks = true, .clipboard = true },
.{ .name = "foot", .color = .true_color, .hyperlinks = true, .clipboard = true, .styled_underline = true },
.{ .name = "contour", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
// Good terminals
.{ .name = "gnome-terminal", .color = .true_color, .hyperlinks = true },
.{ .name = "konsole", .color = .true_color, .hyperlinks = true },
.{ .name = "xfce4-terminal", .color = .true_color, .hyperlinks = true },
.{ .name = "terminator", .color = .true_color, .hyperlinks = true },
.{ .name = "tilix", .color = .true_color, .hyperlinks = true },
.{ .name = "rio", .color = .true_color, .hyperlinks = true, .images = true },
// Apple Terminal
.{ .name = "Apple_Terminal", .color = .palette_256 },
// tmux/screen (pass-through)
.{ .name = "tmux", .color = .true_color, .clipboard = true },
.{ .name = "screen", .color = .palette_256 },
// Basic terminals
.{ .name = "linux", .color = .basic }, // Linux console
.{ .name = "xterm", .color = .palette_256 },
.{ .name = "rxvt", .color = .palette_256 },
};
/// Detects terminal capabilities from environment variables.
pub fn detect() Capabilities {
var caps = Capabilities{};
// Get TERM
caps.term_name = std.posix.getenv("TERM");
// Get TERM_PROGRAM
caps.term_program = std.posix.getenv("TERM_PROGRAM");
// Check for known terminal programs first
if (caps.term_program) |prog| {
for (known_terminals) |kt| {
if (std.mem.eql(u8, prog, kt.name)) {
caps.color_support = kt.color;
caps.hyperlinks = kt.hyperlinks;
caps.images = kt.images;
caps.clipboard = kt.clipboard;
caps.styled_underline = kt.styled_underline;
return caps;
}
}
}
// Check COLORTERM for true color
if (std.posix.getenv("COLORTERM")) |colorterm| {
if (std.mem.eql(u8, colorterm, "truecolor") or std.mem.eql(u8, colorterm, "24bit")) {
caps.color_support = .true_color;
}
}
// Check TERM for color hints
if (caps.term_name) |term| {
// True color indicators
if (std.mem.indexOf(u8, term, "truecolor") != null or
std.mem.indexOf(u8, term, "24bit") != null or
std.mem.indexOf(u8, term, "direct") != null)
{
caps.color_support = .true_color;
}
// 256 color indicators
else if (std.mem.indexOf(u8, term, "256color") != null or
std.mem.indexOf(u8, term, "256") != null)
{
if (caps.color_support != .true_color) {
caps.color_support = .palette_256;
}
}
// Known terminal types
else {
for (known_terminals) |kt| {
if (std.mem.startsWith(u8, term, kt.name)) {
if (@intFromEnum(kt.color) > @intFromEnum(caps.color_support)) {
caps.color_support = kt.color;
}
caps.hyperlinks = caps.hyperlinks or kt.hyperlinks;
caps.images = caps.images or kt.images;
caps.clipboard = caps.clipboard or kt.clipboard;
break;
}
}
}
}
// Check for specific feature environment variables
if (std.posix.getenv("KITTY_WINDOW_ID") != null) {
caps.color_support = .true_color;
caps.hyperlinks = true;
caps.images = true;
caps.clipboard = true;
caps.styled_underline = true;
}
if (std.posix.getenv("WEZTERM_PANE") != null) {
caps.color_support = .true_color;
caps.hyperlinks = true;
caps.images = true;
caps.clipboard = true;
caps.styled_underline = true;
}
if (std.posix.getenv("ITERM_SESSION_ID") != null) {
caps.color_support = .true_color;
caps.hyperlinks = true;
caps.images = true;
caps.clipboard = true;
}
if (std.posix.getenv("VSCODE_INJECTION") != null or
std.posix.getenv("TERM_PROGRAM_VERSION") != null and caps.term_program != null and
std.mem.eql(u8, caps.term_program.?, "vscode"))
{
caps.color_support = .true_color;
caps.hyperlinks = true;
caps.clipboard = true;
}
// Check for NO_COLOR environment variable (https://no-color.org/)
if (std.posix.getenv("NO_COLOR") != null) {
caps.color_support = .none;
}
// Check for FORCE_COLOR environment variable
if (std.posix.getenv("FORCE_COLOR")) |force| {
if (force.len == 0 or std.mem.eql(u8, force, "1") or std.mem.eql(u8, force, "true")) {
if (caps.color_support == .none) {
caps.color_support = .basic;
}
} else if (std.mem.eql(u8, force, "2")) {
caps.color_support = .palette_256;
} else if (std.mem.eql(u8, force, "3")) {
caps.color_support = .true_color;
}
}
// Check Unicode support via LANG/LC_ALL
const lang = std.posix.getenv("LC_ALL") orelse std.posix.getenv("LC_CTYPE") orelse std.posix.getenv("LANG");
if (lang) |l| {
caps.unicode = std.mem.indexOf(u8, l, "UTF-8") != null or
std.mem.indexOf(u8, l, "utf-8") != null or
std.mem.indexOf(u8, l, "UTF8") != null or
std.mem.indexOf(u8, l, "utf8") != null;
}
return caps;
}
/// Returns a color value appropriate for the terminal's color support level.
/// If the terminal doesn't support the given color depth, it will be
/// downgraded to a supported format.
pub fn adaptColor(caps: Capabilities, r: u8, g: u8, b: u8) union(enum) {
rgb: struct { r: u8, g: u8, b: u8 },
palette: u8,
basic: u8,
none: void,
} {
return switch (caps.color_support) {
.true_color => .{ .rgb = .{ .r = r, .g = g, .b = b } },
.palette_256 => .{ .palette = rgbTo256(r, g, b) },
.extended => .{ .basic = rgbToBasic(r, g, b, true) },
.basic => .{ .basic = rgbToBasic(r, g, b, false) },
.none => .{ .none = {} },
};
}
/// Converts RGB to the closest 256-color palette index.
pub fn rgbTo256(r: u8, g: u8, b: u8) u8 {
// Check if it's a grayscale
if (r == g and g == b) {
if (r < 8) return 16; // black
if (r > 248) return 231; // white
return @intCast((((@as(u16, r) - 8) * 24) / 240) + 232);
}
// Convert to 6x6x6 cube
const r6: u8 = @intCast(((@as(u16, r) * 6) / 256));
const g6: u8 = @intCast(((@as(u16, g) * 6) / 256));
const b6: u8 = @intCast(((@as(u16, b) * 6) / 256));
return 16 + 36 * r6 + 6 * g6 + b6;
}
/// Converts RGB to basic 8/16 color index.
pub fn rgbToBasic(r: u8, g: u8, b: u8, bright_support: bool) u8 {
// Determine "brightness" based on max channel value
const max_channel = @max(r, @max(g, b));
const is_bright = bright_support and max_channel > 170;
// Threshold for color detection
const threshold: u8 = 85;
var color: u8 = 0;
if (r >= threshold) color |= 1; // red
if (g >= threshold) color |= 2; // green
if (b >= threshold) color |= 4; // blue
// Calculate luminance for black/white decision
const lum: u32 = (@as(u32, r) * 299 + @as(u32, g) * 587 + @as(u32, b) * 114) / 1000;
// Map to ANSI colors
const base: u8 = switch (color) {
0 => if (lum > 64) 7 else 0, // black/white based on luminance
1 => 1, // red
2 => 2, // green
3 => 3, // yellow
4 => 4, // blue
5 => 5, // magenta
6 => 6, // cyan
7 => 7, // white
else => 7,
};
return if (is_bright) base + 8 else base;
}
// ============================================================================
// Tests
// ============================================================================
test "ColorSupport methods" {
try std.testing.expect(ColorSupport.true_color.hasTrueColor());
try std.testing.expect(ColorSupport.true_color.has256());
try std.testing.expect(ColorSupport.true_color.hasColor());
try std.testing.expect(!ColorSupport.palette_256.hasTrueColor());
try std.testing.expect(ColorSupport.palette_256.has256());
try std.testing.expect(ColorSupport.palette_256.hasColor());
try std.testing.expect(!ColorSupport.basic.hasTrueColor());
try std.testing.expect(!ColorSupport.basic.has256());
try std.testing.expect(ColorSupport.basic.hasColor());
try std.testing.expect(!ColorSupport.none.hasColor());
}
test "maxColors" {
try std.testing.expectEqual(@as(u32, 16777216), ColorSupport.true_color.maxColors());
try std.testing.expectEqual(@as(u32, 256), ColorSupport.palette_256.maxColors());
try std.testing.expectEqual(@as(u32, 16), ColorSupport.extended.maxColors());
try std.testing.expectEqual(@as(u32, 8), ColorSupport.basic.maxColors());
try std.testing.expectEqual(@as(u32, 1), ColorSupport.none.maxColors());
}
test "rgbTo256 grayscale" {
try std.testing.expectEqual(@as(u8, 16), rgbTo256(0, 0, 0));
try std.testing.expectEqual(@as(u8, 231), rgbTo256(255, 255, 255));
}
test "rgbTo256 colors" {
// Pure red should be in the cube
const red = rgbTo256(255, 0, 0);
try std.testing.expect(red >= 16 and red <= 231);
// Pure green
const green = rgbTo256(0, 255, 0);
try std.testing.expect(green >= 16 and green <= 231);
// Pure blue
const blue = rgbTo256(0, 0, 255);
try std.testing.expect(blue >= 16 and blue <= 231);
}
test "rgbToBasic" {
// Black
try std.testing.expectEqual(@as(u8, 0), rgbToBasic(0, 0, 0, false));
// White
try std.testing.expectEqual(@as(u8, 7), rgbToBasic(255, 255, 255, false));
// Red
try std.testing.expectEqual(@as(u8, 1), rgbToBasic(255, 0, 0, false));
// Bright red
try std.testing.expectEqual(@as(u8, 9), rgbToBasic(255, 0, 0, true));
}
test "detect returns valid capabilities" {
const caps = detect();
// Just verify it doesn't crash and returns something valid
try std.testing.expect(@intFromEnum(caps.color_support) <= 255);
}

330
src/unicode.zig Normal file
View file

@ -0,0 +1,330 @@
//! Unicode width calculation for TUI rendering.
//!
//! This module provides functions to calculate the display width of Unicode
//! characters and strings, essential for proper text alignment in terminal UIs.
//!
//! Most characters are single-width (1 cell), but:
//! - CJK characters are double-width (2 cells)
//! - Combining characters are zero-width (0 cells)
//! - Control characters are zero-width (0 cells)
//! - Some emojis are double-width (2 cells)
//!
//! ## Example
//!
//! ```zig
//! const unicode = @import("unicode.zig");
//!
//! // Single-width ASCII
//! try testing.expectEqual(@as(usize, 5), unicode.stringWidth("Hello"));
//!
//! // Double-width CJK
//! try testing.expectEqual(@as(usize, 4), unicode.stringWidth("日本"));
//!
//! // Mixed content
//! try testing.expectEqual(@as(usize, 9), unicode.stringWidth("Hello日本"));
//! ```
const std = @import("std");
/// Returns the display width of a Unicode codepoint.
///
/// - Returns 0 for control characters and combining marks
/// - Returns 1 for most characters (ASCII, Latin, etc.)
/// - Returns 2 for wide characters (CJK, some emojis)
/// - Returns -1 for non-printable characters (use 0 in most cases)
pub fn charWidth(codepoint: u21) i8 {
// Control characters (C0 and DEL)
if (codepoint < 0x20 or codepoint == 0x7F) {
return 0;
}
// C1 control characters
if (codepoint >= 0x80 and codepoint < 0xA0) {
return 0;
}
// Combining characters (zero-width)
if (isCombining(codepoint)) {
return 0;
}
// Zero-width characters
if (isZeroWidth(codepoint)) {
return 0;
}
// Wide characters (CJK, etc.)
if (isWide(codepoint)) {
return 2;
}
// Default: single width
return 1;
}
/// Returns the display width of a Unicode codepoint as usize.
/// Non-printable characters return 0.
pub fn charWidthUnsigned(codepoint: u21) usize {
const w = charWidth(codepoint);
return if (w < 0) 0 else @intCast(w);
}
/// Calculates the display width of a UTF-8 encoded string.
pub fn stringWidth(str: []const u8) usize {
var width: usize = 0;
var iter = std.unicode.Utf8Iterator{ .bytes = str, .i = 0 };
while (iter.nextCodepoint()) |cp| {
width += charWidthUnsigned(cp);
}
return width;
}
/// Calculates the display width of a UTF-8 string, stopping at max_width.
/// Returns the number of bytes consumed and the display width.
pub fn stringWidthBounded(str: []const u8, max_width: usize) struct { bytes: usize, width: usize } {
var width: usize = 0;
var byte_pos: usize = 0;
var iter = std.unicode.Utf8Iterator{ .bytes = str, .i = 0 };
while (iter.nextCodepoint()) |cp| {
const cw = charWidthUnsigned(cp);
if (width + cw > max_width) break;
width += cw;
byte_pos = iter.i;
}
return .{ .bytes = byte_pos, .width = width };
}
/// Truncates a string to fit within max_width display columns.
/// Returns a slice of the original string.
pub fn truncateToWidth(str: []const u8, max_width: usize) []const u8 {
const result = stringWidthBounded(str, max_width);
return str[0..result.bytes];
}
/// Pads a string to exactly the specified width.
/// If the string is wider, it is truncated.
/// Returns a new slice (or the original if no padding needed).
pub fn padToWidth(allocator: std.mem.Allocator, str: []const u8, target_width: usize) ![]u8 {
const current_width = stringWidth(str);
if (current_width >= target_width) {
// Truncate if needed
const truncated = truncateToWidth(str, target_width);
const result = try allocator.alloc(u8, truncated.len);
@memcpy(result, truncated);
return result;
}
// Pad with spaces
const padding = target_width - current_width;
const result = try allocator.alloc(u8, str.len + padding);
@memcpy(result[0..str.len], str);
@memset(result[str.len..], ' ');
return result;
}
/// Checks if a codepoint is a combining character (zero-width).
fn isCombining(cp: u21) bool {
// Combining Diacritical Marks
if (cp >= 0x0300 and cp <= 0x036F) return true;
// Combining Diacritical Marks Extended
if (cp >= 0x1AB0 and cp <= 0x1AFF) return true;
// Combining Diacritical Marks Supplement
if (cp >= 0x1DC0 and cp <= 0x1DFF) return true;
// Combining Diacritical Marks for Symbols
if (cp >= 0x20D0 and cp <= 0x20FF) return true;
// Combining Half Marks
if (cp >= 0xFE20 and cp <= 0xFE2F) return true;
// Thai combining marks
if (cp >= 0x0E31 and cp <= 0x0E3A) return true;
if (cp >= 0x0E47 and cp <= 0x0E4E) return true;
// Hebrew combining marks
if (cp >= 0x0591 and cp <= 0x05BD) return true;
if (cp == 0x05BF or cp == 0x05C1 or cp == 0x05C2 or cp == 0x05C4 or cp == 0x05C5 or cp == 0x05C7) return true;
// Arabic combining marks
if (cp >= 0x0610 and cp <= 0x061A) return true;
if (cp >= 0x064B and cp <= 0x065F) return true;
if (cp == 0x0670) return true;
if (cp >= 0x06D6 and cp <= 0x06DC) return true;
if (cp >= 0x06DF and cp <= 0x06E4) return true;
if (cp >= 0x06E7 and cp <= 0x06E8) return true;
if (cp >= 0x06EA and cp <= 0x06ED) return true;
// Variation selectors
if (cp >= 0xFE00 and cp <= 0xFE0F) return true;
if (cp >= 0xE0100 and cp <= 0xE01EF) return true;
return false;
}
/// Checks if a codepoint is zero-width (but not combining).
fn isZeroWidth(cp: u21) bool {
// Soft hyphen
if (cp == 0x00AD) return true;
// Zero-width space
if (cp == 0x200B) return true;
// Zero-width non-joiner
if (cp == 0x200C) return true;
// Zero-width joiner
if (cp == 0x200D) return true;
// Word joiner
if (cp == 0x2060) return true;
// Zero-width no-break space (BOM when not at start)
if (cp == 0xFEFF) return true;
// Default ignorables
if (cp >= 0x2060 and cp <= 0x206F) return true;
// Hangul fillers
if (cp == 0x115F or cp == 0x1160) return true;
if (cp >= 0x3164 and cp <= 0x3164) return true;
if (cp == 0xFFA0) return true;
return false;
}
/// Checks if a codepoint is a wide character (2 cells).
fn isWide(cp: u21) bool {
// CJK Radicals Supplement
if (cp >= 0x2E80 and cp <= 0x2EFF) return true;
// Kangxi Radicals
if (cp >= 0x2F00 and cp <= 0x2FDF) return true;
// CJK Symbols and Punctuation
if (cp >= 0x3000 and cp <= 0x303F) return true;
// Hiragana
if (cp >= 0x3040 and cp <= 0x309F) return true;
// Katakana
if (cp >= 0x30A0 and cp <= 0x30FF) return true;
// Bopomofo
if (cp >= 0x3100 and cp <= 0x312F) return true;
// Hangul Compatibility Jamo
if (cp >= 0x3130 and cp <= 0x318F) return true;
// Kanbun
if (cp >= 0x3190 and cp <= 0x319F) return true;
// Bopomofo Extended
if (cp >= 0x31A0 and cp <= 0x31BF) return true;
// CJK Strokes
if (cp >= 0x31C0 and cp <= 0x31EF) return true;
// Katakana Phonetic Extensions
if (cp >= 0x31F0 and cp <= 0x31FF) return true;
// Enclosed CJK Letters and Months
if (cp >= 0x3200 and cp <= 0x32FF) return true;
// CJK Compatibility
if (cp >= 0x3300 and cp <= 0x33FF) return true;
// CJK Unified Ideographs Extension A
if (cp >= 0x3400 and cp <= 0x4DBF) return true;
// CJK Unified Ideographs
if (cp >= 0x4E00 and cp <= 0x9FFF) return true;
// Yi Syllables
if (cp >= 0xA000 and cp <= 0xA48F) return true;
// Yi Radicals
if (cp >= 0xA490 and cp <= 0xA4CF) return true;
// Hangul Syllables
if (cp >= 0xAC00 and cp <= 0xD7AF) return true;
// CJK Compatibility Ideographs
if (cp >= 0xF900 and cp <= 0xFAFF) return true;
// Halfwidth and Fullwidth Forms (fullwidth only)
if (cp >= 0xFF00 and cp <= 0xFF60) return true;
if (cp >= 0xFFE0 and cp <= 0xFFE6) return true;
// CJK Unified Ideographs Extension B-F
if (cp >= 0x20000 and cp <= 0x2A6DF) return true;
if (cp >= 0x2A700 and cp <= 0x2B73F) return true;
if (cp >= 0x2B740 and cp <= 0x2B81F) return true;
if (cp >= 0x2B820 and cp <= 0x2CEAF) return true;
if (cp >= 0x2CEB0 and cp <= 0x2EBEF) return true;
if (cp >= 0x30000 and cp <= 0x3134F) return true;
// Some emoji are wide
// Emoji modifiers and ZWJ sequences handled separately
// Basic wide emoji ranges
if (cp >= 0x1F300 and cp <= 0x1F64F) return true; // Misc Symbols and Pictographs + Emoticons
if (cp >= 0x1F680 and cp <= 0x1F6FF) return true; // Transport and Map Symbols
if (cp >= 0x1F900 and cp <= 0x1F9FF) return true; // Supplemental Symbols and Pictographs
if (cp >= 0x1FA00 and cp <= 0x1FA6F) return true; // Chess Symbols
if (cp >= 0x1FA70 and cp <= 0x1FAFF) return true; // Symbols and Pictographs Extended-A
return false;
}
// ============================================================================
// Tests
// ============================================================================
test "ASCII characters are single-width" {
try std.testing.expectEqual(@as(i8, 1), charWidth('a'));
try std.testing.expectEqual(@as(i8, 1), charWidth('Z'));
try std.testing.expectEqual(@as(i8, 1), charWidth('0'));
try std.testing.expectEqual(@as(i8, 1), charWidth('!'));
try std.testing.expectEqual(@as(i8, 1), charWidth(' '));
}
test "Control characters are zero-width" {
try std.testing.expectEqual(@as(i8, 0), charWidth(0x00)); // NUL
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0A)); // LF
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0D)); // CR
try std.testing.expectEqual(@as(i8, 0), charWidth(0x1B)); // ESC
try std.testing.expectEqual(@as(i8, 0), charWidth(0x7F)); // DEL
}
test "CJK characters are double-width" {
try std.testing.expectEqual(@as(i8, 2), charWidth(0x4E2D)); //
try std.testing.expectEqual(@as(i8, 2), charWidth(0x6587)); //
try std.testing.expectEqual(@as(i8, 2), charWidth(0x65E5)); //
try std.testing.expectEqual(@as(i8, 2), charWidth(0x672C)); //
}
test "Hiragana/Katakana are double-width" {
try std.testing.expectEqual(@as(i8, 2), charWidth(0x3042)); //
try std.testing.expectEqual(@as(i8, 2), charWidth(0x30A2)); //
}
test "Combining characters are zero-width" {
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0301)); // combining acute
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0308)); // combining diaeresis
}
test "Zero-width characters" {
try std.testing.expectEqual(@as(i8, 0), charWidth(0x200B)); // ZWSP
try std.testing.expectEqual(@as(i8, 0), charWidth(0x200D)); // ZWJ
try std.testing.expectEqual(@as(i8, 0), charWidth(0xFEFF)); // BOM
}
test "stringWidth for ASCII" {
try std.testing.expectEqual(@as(usize, 5), stringWidth("Hello"));
try std.testing.expectEqual(@as(usize, 0), stringWidth(""));
try std.testing.expectEqual(@as(usize, 1), stringWidth("a"));
}
test "stringWidth for CJK" {
try std.testing.expectEqual(@as(usize, 4), stringWidth("日本"));
try std.testing.expectEqual(@as(usize, 6), stringWidth("中文字"));
}
test "stringWidth for mixed content" {
try std.testing.expectEqual(@as(usize, 9), stringWidth("Hello日本"));
try std.testing.expectEqual(@as(usize, 7), stringWidth("a日b本c"));
}
test "truncateToWidth" {
const result = truncateToWidth("Hello World", 5);
try std.testing.expectEqualStrings("Hello", result);
}
test "truncateToWidth with CJK" {
// "日本" = 4 width, truncate to 3 should give "" (2 width)
const result = truncateToWidth("日本語", 3);
try std.testing.expectEqual(@as(usize, 3), result.len); // is 3 bytes
}
test "stringWidthBounded" {
const result = stringWidthBounded("Hello World", 5);
try std.testing.expectEqual(@as(usize, 5), result.bytes);
try std.testing.expectEqual(@as(usize, 5), result.width);
}