feat: Add Unicode width calculation and terminal capability detection
Unicode width (src/unicode.zig): - charWidth(): Returns display width of Unicode codepoints (0, 1, or 2) - stringWidth(): Calculates display width of UTF-8 strings - truncateToWidth(): Truncates strings to fit display columns - stringWidthBounded(): Width calculation with byte tracking - Handles CJK (double-width), combining marks (zero-width), control characters, and emoji Terminal capabilities (src/termcap.zig): - detect(): Auto-detects terminal features from environment - ColorSupport: none, basic(8), extended(16), 256, true_color - Detects: hyperlinks, images, clipboard, mouse, unicode - Known terminals: kitty, WezTerm, iTerm, VSCode, Alacritty, etc. - Respects NO_COLOR and FORCE_COLOR env vars - rgbTo256(): Converts RGB to 256-color palette - rgbToBasic(): Converts RGB to 8/16 ANSI colors - adaptColor(): Downgrades colors based on terminal support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
96810d80ea
commit
a928fc55fd
3 changed files with 749 additions and 0 deletions
14
src/root.zig
14
src/root.zig
|
|
@ -282,6 +282,18 @@ pub const SimpleFocusable = focus.SimpleFocusable;
|
|||
pub const theme = @import("theme.zig");
|
||||
pub const Theme = theme.Theme;
|
||||
|
||||
// Unicode width calculation (wcwidth)
|
||||
pub const unicode = @import("unicode.zig");
|
||||
pub const charWidth = unicode.charWidth;
|
||||
pub const stringWidth = unicode.stringWidth;
|
||||
pub const truncateToWidth = unicode.truncateToWidth;
|
||||
|
||||
// Terminal capability detection
|
||||
pub const termcap = @import("termcap.zig");
|
||||
pub const Capabilities = termcap.Capabilities;
|
||||
pub const ColorSupport = termcap.ColorSupport;
|
||||
pub const detectCapabilities = termcap.detect;
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
|
@ -300,6 +312,8 @@ test {
|
|||
_ = @import("cursor.zig");
|
||||
_ = @import("focus.zig");
|
||||
_ = @import("theme.zig");
|
||||
_ = @import("unicode.zig");
|
||||
_ = @import("termcap.zig");
|
||||
|
||||
// Comprehensive test suite
|
||||
_ = @import("tests/tests.zig");
|
||||
|
|
|
|||
405
src/termcap.zig
Normal file
405
src/termcap.zig
Normal file
|
|
@ -0,0 +1,405 @@
|
|||
//! Terminal capability detection.
|
||||
//!
|
||||
//! This module detects the color and feature support of the current terminal
|
||||
//! by examining environment variables and terminal responses.
|
||||
//!
|
||||
//! ## Color Support Levels
|
||||
//!
|
||||
//! - **No color**: Monochrome terminal
|
||||
//! - **Basic (8)**: 8 standard colors (black, red, green, yellow, blue, magenta, cyan, white)
|
||||
//! - **Extended (16)**: 8 colors + 8 bright variants
|
||||
//! - **256**: 256 color palette (6x6x6 cube + 24 grayscale)
|
||||
//! - **TrueColor (24-bit)**: Full RGB support (16 million colors)
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```zig
|
||||
//! const termcap = @import("termcap.zig");
|
||||
//!
|
||||
//! const caps = termcap.detect();
|
||||
//!
|
||||
//! if (caps.color_support.hasTrueColor()) {
|
||||
//! // Use RGB colors
|
||||
//! } else if (caps.color_support.has256()) {
|
||||
//! // Fall back to 256 colors
|
||||
//! } else {
|
||||
//! // Use basic colors
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
/// Level of color support.
|
||||
pub const ColorSupport = enum(u8) {
|
||||
/// No color support (monochrome).
|
||||
none = 0,
|
||||
/// Basic 8 colors.
|
||||
basic = 8,
|
||||
/// 16 colors (8 + bright variants).
|
||||
extended = 16,
|
||||
/// 256 color palette.
|
||||
palette_256 = 255,
|
||||
/// 24-bit true color (16M colors).
|
||||
true_color = 254,
|
||||
|
||||
/// Returns true if the terminal supports at least 256 colors.
|
||||
pub fn has256(self: ColorSupport) bool {
|
||||
return self == .palette_256 or self == .true_color;
|
||||
}
|
||||
|
||||
/// Returns true if the terminal supports true color (24-bit RGB).
|
||||
pub fn hasTrueColor(self: ColorSupport) bool {
|
||||
return self == .true_color;
|
||||
}
|
||||
|
||||
/// Returns true if the terminal supports any colors.
|
||||
pub fn hasColor(self: ColorSupport) bool {
|
||||
return self != .none;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of colors supported.
|
||||
pub fn maxColors(self: ColorSupport) u32 {
|
||||
return switch (self) {
|
||||
.none => 1,
|
||||
.basic => 8,
|
||||
.extended => 16,
|
||||
.palette_256 => 256,
|
||||
.true_color => 16777216,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Terminal capabilities.
|
||||
pub const Capabilities = struct {
|
||||
/// Color support level.
|
||||
color_support: ColorSupport = .basic,
|
||||
/// Terminal name (from TERM).
|
||||
term_name: ?[]const u8 = null,
|
||||
/// Terminal program (from TERM_PROGRAM).
|
||||
term_program: ?[]const u8 = null,
|
||||
/// Whether the terminal supports Unicode.
|
||||
unicode: bool = true,
|
||||
/// Whether the terminal supports hyperlinks (OSC 8).
|
||||
hyperlinks: bool = false,
|
||||
/// Whether the terminal supports images (Kitty/Sixel/iTerm2).
|
||||
images: bool = false,
|
||||
/// Whether the terminal supports OSC 52 clipboard.
|
||||
clipboard: bool = false,
|
||||
/// Whether the terminal supports bracketed paste.
|
||||
bracketed_paste: bool = true,
|
||||
/// Whether the terminal supports mouse reporting.
|
||||
mouse: bool = true,
|
||||
/// Whether the terminal supports alternate screen buffer.
|
||||
alternate_screen: bool = true,
|
||||
/// Whether the terminal supports styled underlines.
|
||||
styled_underline: bool = false,
|
||||
|
||||
/// Returns true if this is a known modern terminal with good support.
|
||||
pub fn isModern(self: Capabilities) bool {
|
||||
return self.color_support.hasTrueColor() and
|
||||
self.hyperlinks and
|
||||
self.styled_underline;
|
||||
}
|
||||
};
|
||||
|
||||
/// Known terminal programs and their capabilities.
|
||||
const KnownTerminal = struct {
|
||||
name: []const u8,
|
||||
color: ColorSupport,
|
||||
hyperlinks: bool = false,
|
||||
images: bool = false,
|
||||
clipboard: bool = false,
|
||||
styled_underline: bool = false,
|
||||
};
|
||||
|
||||
const known_terminals = [_]KnownTerminal{
|
||||
// Modern terminals with full support
|
||||
.{ .name = "kitty", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
|
||||
.{ .name = "WezTerm", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
|
||||
.{ .name = "iTerm.app", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
|
||||
.{ .name = "vscode", .color = .true_color, .hyperlinks = true, .clipboard = true, .styled_underline = true },
|
||||
.{ .name = "Hyper", .color = .true_color, .hyperlinks = true, .clipboard = true },
|
||||
.{ .name = "Alacritty", .color = .true_color, .hyperlinks = true, .clipboard = true },
|
||||
.{ .name = "foot", .color = .true_color, .hyperlinks = true, .clipboard = true, .styled_underline = true },
|
||||
.{ .name = "contour", .color = .true_color, .hyperlinks = true, .images = true, .clipboard = true, .styled_underline = true },
|
||||
|
||||
// Good terminals
|
||||
.{ .name = "gnome-terminal", .color = .true_color, .hyperlinks = true },
|
||||
.{ .name = "konsole", .color = .true_color, .hyperlinks = true },
|
||||
.{ .name = "xfce4-terminal", .color = .true_color, .hyperlinks = true },
|
||||
.{ .name = "terminator", .color = .true_color, .hyperlinks = true },
|
||||
.{ .name = "tilix", .color = .true_color, .hyperlinks = true },
|
||||
.{ .name = "rio", .color = .true_color, .hyperlinks = true, .images = true },
|
||||
|
||||
// Apple Terminal
|
||||
.{ .name = "Apple_Terminal", .color = .palette_256 },
|
||||
|
||||
// tmux/screen (pass-through)
|
||||
.{ .name = "tmux", .color = .true_color, .clipboard = true },
|
||||
.{ .name = "screen", .color = .palette_256 },
|
||||
|
||||
// Basic terminals
|
||||
.{ .name = "linux", .color = .basic }, // Linux console
|
||||
.{ .name = "xterm", .color = .palette_256 },
|
||||
.{ .name = "rxvt", .color = .palette_256 },
|
||||
};
|
||||
|
||||
/// Detects terminal capabilities from environment variables.
|
||||
pub fn detect() Capabilities {
|
||||
var caps = Capabilities{};
|
||||
|
||||
// Get TERM
|
||||
caps.term_name = std.posix.getenv("TERM");
|
||||
|
||||
// Get TERM_PROGRAM
|
||||
caps.term_program = std.posix.getenv("TERM_PROGRAM");
|
||||
|
||||
// Check for known terminal programs first
|
||||
if (caps.term_program) |prog| {
|
||||
for (known_terminals) |kt| {
|
||||
if (std.mem.eql(u8, prog, kt.name)) {
|
||||
caps.color_support = kt.color;
|
||||
caps.hyperlinks = kt.hyperlinks;
|
||||
caps.images = kt.images;
|
||||
caps.clipboard = kt.clipboard;
|
||||
caps.styled_underline = kt.styled_underline;
|
||||
return caps;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check COLORTERM for true color
|
||||
if (std.posix.getenv("COLORTERM")) |colorterm| {
|
||||
if (std.mem.eql(u8, colorterm, "truecolor") or std.mem.eql(u8, colorterm, "24bit")) {
|
||||
caps.color_support = .true_color;
|
||||
}
|
||||
}
|
||||
|
||||
// Check TERM for color hints
|
||||
if (caps.term_name) |term| {
|
||||
// True color indicators
|
||||
if (std.mem.indexOf(u8, term, "truecolor") != null or
|
||||
std.mem.indexOf(u8, term, "24bit") != null or
|
||||
std.mem.indexOf(u8, term, "direct") != null)
|
||||
{
|
||||
caps.color_support = .true_color;
|
||||
}
|
||||
// 256 color indicators
|
||||
else if (std.mem.indexOf(u8, term, "256color") != null or
|
||||
std.mem.indexOf(u8, term, "256") != null)
|
||||
{
|
||||
if (caps.color_support != .true_color) {
|
||||
caps.color_support = .palette_256;
|
||||
}
|
||||
}
|
||||
// Known terminal types
|
||||
else {
|
||||
for (known_terminals) |kt| {
|
||||
if (std.mem.startsWith(u8, term, kt.name)) {
|
||||
if (@intFromEnum(kt.color) > @intFromEnum(caps.color_support)) {
|
||||
caps.color_support = kt.color;
|
||||
}
|
||||
caps.hyperlinks = caps.hyperlinks or kt.hyperlinks;
|
||||
caps.images = caps.images or kt.images;
|
||||
caps.clipboard = caps.clipboard or kt.clipboard;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for specific feature environment variables
|
||||
if (std.posix.getenv("KITTY_WINDOW_ID") != null) {
|
||||
caps.color_support = .true_color;
|
||||
caps.hyperlinks = true;
|
||||
caps.images = true;
|
||||
caps.clipboard = true;
|
||||
caps.styled_underline = true;
|
||||
}
|
||||
|
||||
if (std.posix.getenv("WEZTERM_PANE") != null) {
|
||||
caps.color_support = .true_color;
|
||||
caps.hyperlinks = true;
|
||||
caps.images = true;
|
||||
caps.clipboard = true;
|
||||
caps.styled_underline = true;
|
||||
}
|
||||
|
||||
if (std.posix.getenv("ITERM_SESSION_ID") != null) {
|
||||
caps.color_support = .true_color;
|
||||
caps.hyperlinks = true;
|
||||
caps.images = true;
|
||||
caps.clipboard = true;
|
||||
}
|
||||
|
||||
if (std.posix.getenv("VSCODE_INJECTION") != null or
|
||||
std.posix.getenv("TERM_PROGRAM_VERSION") != null and caps.term_program != null and
|
||||
std.mem.eql(u8, caps.term_program.?, "vscode"))
|
||||
{
|
||||
caps.color_support = .true_color;
|
||||
caps.hyperlinks = true;
|
||||
caps.clipboard = true;
|
||||
}
|
||||
|
||||
// Check for NO_COLOR environment variable (https://no-color.org/)
|
||||
if (std.posix.getenv("NO_COLOR") != null) {
|
||||
caps.color_support = .none;
|
||||
}
|
||||
|
||||
// Check for FORCE_COLOR environment variable
|
||||
if (std.posix.getenv("FORCE_COLOR")) |force| {
|
||||
if (force.len == 0 or std.mem.eql(u8, force, "1") or std.mem.eql(u8, force, "true")) {
|
||||
if (caps.color_support == .none) {
|
||||
caps.color_support = .basic;
|
||||
}
|
||||
} else if (std.mem.eql(u8, force, "2")) {
|
||||
caps.color_support = .palette_256;
|
||||
} else if (std.mem.eql(u8, force, "3")) {
|
||||
caps.color_support = .true_color;
|
||||
}
|
||||
}
|
||||
|
||||
// Check Unicode support via LANG/LC_ALL
|
||||
const lang = std.posix.getenv("LC_ALL") orelse std.posix.getenv("LC_CTYPE") orelse std.posix.getenv("LANG");
|
||||
if (lang) |l| {
|
||||
caps.unicode = std.mem.indexOf(u8, l, "UTF-8") != null or
|
||||
std.mem.indexOf(u8, l, "utf-8") != null or
|
||||
std.mem.indexOf(u8, l, "UTF8") != null or
|
||||
std.mem.indexOf(u8, l, "utf8") != null;
|
||||
}
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
/// Returns a color value appropriate for the terminal's color support level.
|
||||
/// If the terminal doesn't support the given color depth, it will be
|
||||
/// downgraded to a supported format.
|
||||
pub fn adaptColor(caps: Capabilities, r: u8, g: u8, b: u8) union(enum) {
|
||||
rgb: struct { r: u8, g: u8, b: u8 },
|
||||
palette: u8,
|
||||
basic: u8,
|
||||
none: void,
|
||||
} {
|
||||
return switch (caps.color_support) {
|
||||
.true_color => .{ .rgb = .{ .r = r, .g = g, .b = b } },
|
||||
.palette_256 => .{ .palette = rgbTo256(r, g, b) },
|
||||
.extended => .{ .basic = rgbToBasic(r, g, b, true) },
|
||||
.basic => .{ .basic = rgbToBasic(r, g, b, false) },
|
||||
.none => .{ .none = {} },
|
||||
};
|
||||
}
|
||||
|
||||
/// Converts RGB to the closest 256-color palette index.
|
||||
pub fn rgbTo256(r: u8, g: u8, b: u8) u8 {
|
||||
// Check if it's a grayscale
|
||||
if (r == g and g == b) {
|
||||
if (r < 8) return 16; // black
|
||||
if (r > 248) return 231; // white
|
||||
return @intCast((((@as(u16, r) - 8) * 24) / 240) + 232);
|
||||
}
|
||||
|
||||
// Convert to 6x6x6 cube
|
||||
const r6: u8 = @intCast(((@as(u16, r) * 6) / 256));
|
||||
const g6: u8 = @intCast(((@as(u16, g) * 6) / 256));
|
||||
const b6: u8 = @intCast(((@as(u16, b) * 6) / 256));
|
||||
|
||||
return 16 + 36 * r6 + 6 * g6 + b6;
|
||||
}
|
||||
|
||||
/// Converts RGB to basic 8/16 color index.
|
||||
pub fn rgbToBasic(r: u8, g: u8, b: u8, bright_support: bool) u8 {
|
||||
// Determine "brightness" based on max channel value
|
||||
const max_channel = @max(r, @max(g, b));
|
||||
const is_bright = bright_support and max_channel > 170;
|
||||
|
||||
// Threshold for color detection
|
||||
const threshold: u8 = 85;
|
||||
|
||||
var color: u8 = 0;
|
||||
if (r >= threshold) color |= 1; // red
|
||||
if (g >= threshold) color |= 2; // green
|
||||
if (b >= threshold) color |= 4; // blue
|
||||
|
||||
// Calculate luminance for black/white decision
|
||||
const lum: u32 = (@as(u32, r) * 299 + @as(u32, g) * 587 + @as(u32, b) * 114) / 1000;
|
||||
|
||||
// Map to ANSI colors
|
||||
const base: u8 = switch (color) {
|
||||
0 => if (lum > 64) 7 else 0, // black/white based on luminance
|
||||
1 => 1, // red
|
||||
2 => 2, // green
|
||||
3 => 3, // yellow
|
||||
4 => 4, // blue
|
||||
5 => 5, // magenta
|
||||
6 => 6, // cyan
|
||||
7 => 7, // white
|
||||
else => 7,
|
||||
};
|
||||
|
||||
return if (is_bright) base + 8 else base;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
||||
test "ColorSupport methods" {
|
||||
try std.testing.expect(ColorSupport.true_color.hasTrueColor());
|
||||
try std.testing.expect(ColorSupport.true_color.has256());
|
||||
try std.testing.expect(ColorSupport.true_color.hasColor());
|
||||
|
||||
try std.testing.expect(!ColorSupport.palette_256.hasTrueColor());
|
||||
try std.testing.expect(ColorSupport.palette_256.has256());
|
||||
try std.testing.expect(ColorSupport.palette_256.hasColor());
|
||||
|
||||
try std.testing.expect(!ColorSupport.basic.hasTrueColor());
|
||||
try std.testing.expect(!ColorSupport.basic.has256());
|
||||
try std.testing.expect(ColorSupport.basic.hasColor());
|
||||
|
||||
try std.testing.expect(!ColorSupport.none.hasColor());
|
||||
}
|
||||
|
||||
test "maxColors" {
|
||||
try std.testing.expectEqual(@as(u32, 16777216), ColorSupport.true_color.maxColors());
|
||||
try std.testing.expectEqual(@as(u32, 256), ColorSupport.palette_256.maxColors());
|
||||
try std.testing.expectEqual(@as(u32, 16), ColorSupport.extended.maxColors());
|
||||
try std.testing.expectEqual(@as(u32, 8), ColorSupport.basic.maxColors());
|
||||
try std.testing.expectEqual(@as(u32, 1), ColorSupport.none.maxColors());
|
||||
}
|
||||
|
||||
test "rgbTo256 grayscale" {
|
||||
try std.testing.expectEqual(@as(u8, 16), rgbTo256(0, 0, 0));
|
||||
try std.testing.expectEqual(@as(u8, 231), rgbTo256(255, 255, 255));
|
||||
}
|
||||
|
||||
test "rgbTo256 colors" {
|
||||
// Pure red should be in the cube
|
||||
const red = rgbTo256(255, 0, 0);
|
||||
try std.testing.expect(red >= 16 and red <= 231);
|
||||
|
||||
// Pure green
|
||||
const green = rgbTo256(0, 255, 0);
|
||||
try std.testing.expect(green >= 16 and green <= 231);
|
||||
|
||||
// Pure blue
|
||||
const blue = rgbTo256(0, 0, 255);
|
||||
try std.testing.expect(blue >= 16 and blue <= 231);
|
||||
}
|
||||
|
||||
test "rgbToBasic" {
|
||||
// Black
|
||||
try std.testing.expectEqual(@as(u8, 0), rgbToBasic(0, 0, 0, false));
|
||||
// White
|
||||
try std.testing.expectEqual(@as(u8, 7), rgbToBasic(255, 255, 255, false));
|
||||
// Red
|
||||
try std.testing.expectEqual(@as(u8, 1), rgbToBasic(255, 0, 0, false));
|
||||
// Bright red
|
||||
try std.testing.expectEqual(@as(u8, 9), rgbToBasic(255, 0, 0, true));
|
||||
}
|
||||
|
||||
test "detect returns valid capabilities" {
|
||||
const caps = detect();
|
||||
// Just verify it doesn't crash and returns something valid
|
||||
try std.testing.expect(@intFromEnum(caps.color_support) <= 255);
|
||||
}
|
||||
330
src/unicode.zig
Normal file
330
src/unicode.zig
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
//! Unicode width calculation for TUI rendering.
|
||||
//!
|
||||
//! This module provides functions to calculate the display width of Unicode
|
||||
//! characters and strings, essential for proper text alignment in terminal UIs.
|
||||
//!
|
||||
//! Most characters are single-width (1 cell), but:
|
||||
//! - CJK characters are double-width (2 cells)
|
||||
//! - Combining characters are zero-width (0 cells)
|
||||
//! - Control characters are zero-width (0 cells)
|
||||
//! - Some emojis are double-width (2 cells)
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```zig
|
||||
//! const unicode = @import("unicode.zig");
|
||||
//!
|
||||
//! // Single-width ASCII
|
||||
//! try testing.expectEqual(@as(usize, 5), unicode.stringWidth("Hello"));
|
||||
//!
|
||||
//! // Double-width CJK
|
||||
//! try testing.expectEqual(@as(usize, 4), unicode.stringWidth("日本"));
|
||||
//!
|
||||
//! // Mixed content
|
||||
//! try testing.expectEqual(@as(usize, 9), unicode.stringWidth("Hello日本"));
|
||||
//! ```
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
/// Returns the display width of a Unicode codepoint.
|
||||
///
|
||||
/// - Returns 0 for control characters and combining marks
|
||||
/// - Returns 1 for most characters (ASCII, Latin, etc.)
|
||||
/// - Returns 2 for wide characters (CJK, some emojis)
|
||||
/// - Returns -1 for non-printable characters (use 0 in most cases)
|
||||
pub fn charWidth(codepoint: u21) i8 {
|
||||
// Control characters (C0 and DEL)
|
||||
if (codepoint < 0x20 or codepoint == 0x7F) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// C1 control characters
|
||||
if (codepoint >= 0x80 and codepoint < 0xA0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Combining characters (zero-width)
|
||||
if (isCombining(codepoint)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Zero-width characters
|
||||
if (isZeroWidth(codepoint)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Wide characters (CJK, etc.)
|
||||
if (isWide(codepoint)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Default: single width
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns the display width of a Unicode codepoint as usize.
|
||||
/// Non-printable characters return 0.
|
||||
pub fn charWidthUnsigned(codepoint: u21) usize {
|
||||
const w = charWidth(codepoint);
|
||||
return if (w < 0) 0 else @intCast(w);
|
||||
}
|
||||
|
||||
/// Calculates the display width of a UTF-8 encoded string.
|
||||
pub fn stringWidth(str: []const u8) usize {
|
||||
var width: usize = 0;
|
||||
var iter = std.unicode.Utf8Iterator{ .bytes = str, .i = 0 };
|
||||
|
||||
while (iter.nextCodepoint()) |cp| {
|
||||
width += charWidthUnsigned(cp);
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
/// Calculates the display width of a UTF-8 string, stopping at max_width.
|
||||
/// Returns the number of bytes consumed and the display width.
|
||||
pub fn stringWidthBounded(str: []const u8, max_width: usize) struct { bytes: usize, width: usize } {
|
||||
var width: usize = 0;
|
||||
var byte_pos: usize = 0;
|
||||
var iter = std.unicode.Utf8Iterator{ .bytes = str, .i = 0 };
|
||||
|
||||
while (iter.nextCodepoint()) |cp| {
|
||||
const cw = charWidthUnsigned(cp);
|
||||
if (width + cw > max_width) break;
|
||||
width += cw;
|
||||
byte_pos = iter.i;
|
||||
}
|
||||
|
||||
return .{ .bytes = byte_pos, .width = width };
|
||||
}
|
||||
|
||||
/// Truncates a string to fit within max_width display columns.
|
||||
/// Returns a slice of the original string.
|
||||
pub fn truncateToWidth(str: []const u8, max_width: usize) []const u8 {
|
||||
const result = stringWidthBounded(str, max_width);
|
||||
return str[0..result.bytes];
|
||||
}
|
||||
|
||||
/// Pads a string to exactly the specified width.
|
||||
/// If the string is wider, it is truncated.
|
||||
/// Returns a new slice (or the original if no padding needed).
|
||||
pub fn padToWidth(allocator: std.mem.Allocator, str: []const u8, target_width: usize) ![]u8 {
|
||||
const current_width = stringWidth(str);
|
||||
|
||||
if (current_width >= target_width) {
|
||||
// Truncate if needed
|
||||
const truncated = truncateToWidth(str, target_width);
|
||||
const result = try allocator.alloc(u8, truncated.len);
|
||||
@memcpy(result, truncated);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Pad with spaces
|
||||
const padding = target_width - current_width;
|
||||
const result = try allocator.alloc(u8, str.len + padding);
|
||||
@memcpy(result[0..str.len], str);
|
||||
@memset(result[str.len..], ' ');
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Checks if a codepoint is a combining character (zero-width).
|
||||
fn isCombining(cp: u21) bool {
|
||||
// Combining Diacritical Marks
|
||||
if (cp >= 0x0300 and cp <= 0x036F) return true;
|
||||
// Combining Diacritical Marks Extended
|
||||
if (cp >= 0x1AB0 and cp <= 0x1AFF) return true;
|
||||
// Combining Diacritical Marks Supplement
|
||||
if (cp >= 0x1DC0 and cp <= 0x1DFF) return true;
|
||||
// Combining Diacritical Marks for Symbols
|
||||
if (cp >= 0x20D0 and cp <= 0x20FF) return true;
|
||||
// Combining Half Marks
|
||||
if (cp >= 0xFE20 and cp <= 0xFE2F) return true;
|
||||
|
||||
// Thai combining marks
|
||||
if (cp >= 0x0E31 and cp <= 0x0E3A) return true;
|
||||
if (cp >= 0x0E47 and cp <= 0x0E4E) return true;
|
||||
|
||||
// Hebrew combining marks
|
||||
if (cp >= 0x0591 and cp <= 0x05BD) return true;
|
||||
if (cp == 0x05BF or cp == 0x05C1 or cp == 0x05C2 or cp == 0x05C4 or cp == 0x05C5 or cp == 0x05C7) return true;
|
||||
|
||||
// Arabic combining marks
|
||||
if (cp >= 0x0610 and cp <= 0x061A) return true;
|
||||
if (cp >= 0x064B and cp <= 0x065F) return true;
|
||||
if (cp == 0x0670) return true;
|
||||
if (cp >= 0x06D6 and cp <= 0x06DC) return true;
|
||||
if (cp >= 0x06DF and cp <= 0x06E4) return true;
|
||||
if (cp >= 0x06E7 and cp <= 0x06E8) return true;
|
||||
if (cp >= 0x06EA and cp <= 0x06ED) return true;
|
||||
|
||||
// Variation selectors
|
||||
if (cp >= 0xFE00 and cp <= 0xFE0F) return true;
|
||||
if (cp >= 0xE0100 and cp <= 0xE01EF) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Checks if a codepoint is zero-width (but not combining).
|
||||
fn isZeroWidth(cp: u21) bool {
|
||||
// Soft hyphen
|
||||
if (cp == 0x00AD) return true;
|
||||
// Zero-width space
|
||||
if (cp == 0x200B) return true;
|
||||
// Zero-width non-joiner
|
||||
if (cp == 0x200C) return true;
|
||||
// Zero-width joiner
|
||||
if (cp == 0x200D) return true;
|
||||
// Word joiner
|
||||
if (cp == 0x2060) return true;
|
||||
// Zero-width no-break space (BOM when not at start)
|
||||
if (cp == 0xFEFF) return true;
|
||||
|
||||
// Default ignorables
|
||||
if (cp >= 0x2060 and cp <= 0x206F) return true;
|
||||
|
||||
// Hangul fillers
|
||||
if (cp == 0x115F or cp == 0x1160) return true;
|
||||
if (cp >= 0x3164 and cp <= 0x3164) return true;
|
||||
if (cp == 0xFFA0) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Checks if a codepoint is a wide character (2 cells).
|
||||
fn isWide(cp: u21) bool {
|
||||
// CJK Radicals Supplement
|
||||
if (cp >= 0x2E80 and cp <= 0x2EFF) return true;
|
||||
// Kangxi Radicals
|
||||
if (cp >= 0x2F00 and cp <= 0x2FDF) return true;
|
||||
// CJK Symbols and Punctuation
|
||||
if (cp >= 0x3000 and cp <= 0x303F) return true;
|
||||
// Hiragana
|
||||
if (cp >= 0x3040 and cp <= 0x309F) return true;
|
||||
// Katakana
|
||||
if (cp >= 0x30A0 and cp <= 0x30FF) return true;
|
||||
// Bopomofo
|
||||
if (cp >= 0x3100 and cp <= 0x312F) return true;
|
||||
// Hangul Compatibility Jamo
|
||||
if (cp >= 0x3130 and cp <= 0x318F) return true;
|
||||
// Kanbun
|
||||
if (cp >= 0x3190 and cp <= 0x319F) return true;
|
||||
// Bopomofo Extended
|
||||
if (cp >= 0x31A0 and cp <= 0x31BF) return true;
|
||||
// CJK Strokes
|
||||
if (cp >= 0x31C0 and cp <= 0x31EF) return true;
|
||||
// Katakana Phonetic Extensions
|
||||
if (cp >= 0x31F0 and cp <= 0x31FF) return true;
|
||||
// Enclosed CJK Letters and Months
|
||||
if (cp >= 0x3200 and cp <= 0x32FF) return true;
|
||||
// CJK Compatibility
|
||||
if (cp >= 0x3300 and cp <= 0x33FF) return true;
|
||||
// CJK Unified Ideographs Extension A
|
||||
if (cp >= 0x3400 and cp <= 0x4DBF) return true;
|
||||
// CJK Unified Ideographs
|
||||
if (cp >= 0x4E00 and cp <= 0x9FFF) return true;
|
||||
// Yi Syllables
|
||||
if (cp >= 0xA000 and cp <= 0xA48F) return true;
|
||||
// Yi Radicals
|
||||
if (cp >= 0xA490 and cp <= 0xA4CF) return true;
|
||||
// Hangul Syllables
|
||||
if (cp >= 0xAC00 and cp <= 0xD7AF) return true;
|
||||
// CJK Compatibility Ideographs
|
||||
if (cp >= 0xF900 and cp <= 0xFAFF) return true;
|
||||
// Halfwidth and Fullwidth Forms (fullwidth only)
|
||||
if (cp >= 0xFF00 and cp <= 0xFF60) return true;
|
||||
if (cp >= 0xFFE0 and cp <= 0xFFE6) return true;
|
||||
// CJK Unified Ideographs Extension B-F
|
||||
if (cp >= 0x20000 and cp <= 0x2A6DF) return true;
|
||||
if (cp >= 0x2A700 and cp <= 0x2B73F) return true;
|
||||
if (cp >= 0x2B740 and cp <= 0x2B81F) return true;
|
||||
if (cp >= 0x2B820 and cp <= 0x2CEAF) return true;
|
||||
if (cp >= 0x2CEB0 and cp <= 0x2EBEF) return true;
|
||||
if (cp >= 0x30000 and cp <= 0x3134F) return true;
|
||||
|
||||
// Some emoji are wide
|
||||
// Emoji modifiers and ZWJ sequences handled separately
|
||||
// Basic wide emoji ranges
|
||||
if (cp >= 0x1F300 and cp <= 0x1F64F) return true; // Misc Symbols and Pictographs + Emoticons
|
||||
if (cp >= 0x1F680 and cp <= 0x1F6FF) return true; // Transport and Map Symbols
|
||||
if (cp >= 0x1F900 and cp <= 0x1F9FF) return true; // Supplemental Symbols and Pictographs
|
||||
if (cp >= 0x1FA00 and cp <= 0x1FA6F) return true; // Chess Symbols
|
||||
if (cp >= 0x1FA70 and cp <= 0x1FAFF) return true; // Symbols and Pictographs Extended-A
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
||||
test "ASCII characters are single-width" {
|
||||
try std.testing.expectEqual(@as(i8, 1), charWidth('a'));
|
||||
try std.testing.expectEqual(@as(i8, 1), charWidth('Z'));
|
||||
try std.testing.expectEqual(@as(i8, 1), charWidth('0'));
|
||||
try std.testing.expectEqual(@as(i8, 1), charWidth('!'));
|
||||
try std.testing.expectEqual(@as(i8, 1), charWidth(' '));
|
||||
}
|
||||
|
||||
test "Control characters are zero-width" {
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x00)); // NUL
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0A)); // LF
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0D)); // CR
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x1B)); // ESC
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x7F)); // DEL
|
||||
}
|
||||
|
||||
test "CJK characters are double-width" {
|
||||
try std.testing.expectEqual(@as(i8, 2), charWidth(0x4E2D)); // 中
|
||||
try std.testing.expectEqual(@as(i8, 2), charWidth(0x6587)); // 文
|
||||
try std.testing.expectEqual(@as(i8, 2), charWidth(0x65E5)); // 日
|
||||
try std.testing.expectEqual(@as(i8, 2), charWidth(0x672C)); // 本
|
||||
}
|
||||
|
||||
test "Hiragana/Katakana are double-width" {
|
||||
try std.testing.expectEqual(@as(i8, 2), charWidth(0x3042)); // あ
|
||||
try std.testing.expectEqual(@as(i8, 2), charWidth(0x30A2)); // ア
|
||||
}
|
||||
|
||||
test "Combining characters are zero-width" {
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0301)); // combining acute
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x0308)); // combining diaeresis
|
||||
}
|
||||
|
||||
test "Zero-width characters" {
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x200B)); // ZWSP
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0x200D)); // ZWJ
|
||||
try std.testing.expectEqual(@as(i8, 0), charWidth(0xFEFF)); // BOM
|
||||
}
|
||||
|
||||
test "stringWidth for ASCII" {
|
||||
try std.testing.expectEqual(@as(usize, 5), stringWidth("Hello"));
|
||||
try std.testing.expectEqual(@as(usize, 0), stringWidth(""));
|
||||
try std.testing.expectEqual(@as(usize, 1), stringWidth("a"));
|
||||
}
|
||||
|
||||
test "stringWidth for CJK" {
|
||||
try std.testing.expectEqual(@as(usize, 4), stringWidth("日本"));
|
||||
try std.testing.expectEqual(@as(usize, 6), stringWidth("中文字"));
|
||||
}
|
||||
|
||||
test "stringWidth for mixed content" {
|
||||
try std.testing.expectEqual(@as(usize, 9), stringWidth("Hello日本"));
|
||||
try std.testing.expectEqual(@as(usize, 7), stringWidth("a日b本c"));
|
||||
}
|
||||
|
||||
test "truncateToWidth" {
|
||||
const result = truncateToWidth("Hello World", 5);
|
||||
try std.testing.expectEqualStrings("Hello", result);
|
||||
}
|
||||
|
||||
test "truncateToWidth with CJK" {
|
||||
// "日本" = 4 width, truncate to 3 should give "日" (2 width)
|
||||
const result = truncateToWidth("日本語", 3);
|
||||
try std.testing.expectEqual(@as(usize, 3), result.len); // 日 is 3 bytes
|
||||
}
|
||||
|
||||
test "stringWidthBounded" {
|
||||
const result = stringWidthBounded("Hello World", 5);
|
||||
try std.testing.expectEqual(@as(usize, 5), result.bytes);
|
||||
try std.testing.expectEqual(@as(usize, 5), result.width);
|
||||
}
|
||||
Loading…
Reference in a new issue