From 0342d5c145f5f52864ece3a4a310775731258e3c Mon Sep 17 00:00:00 2001 From: "R.Eugenio" Date: Fri, 2 Jan 2026 01:49:54 +0100 Subject: [PATCH] perf(render): Shadow Baking + Glyph Blitting optimizado (v0.27.0-v0.27.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shadow Baking: - ShadowCache prerenderiza sombras blur (key: w,h,blur,radius,spread) - initWithCache() habilita cache, deinit() lo libera - 4.2x más rápido en Debug, 2.5x en ReleaseSafe Glyph Blitting: - Early exit si glifo fuera de clip - Pre-cálculo región visible - Acceso directo fb.pixels[] - Aritmética u32 (sin structs Color) - Fast path alpha=255 Correcciones: - Integer overflow: saturating arithmetic (+|, -|, *|) - u16→u32 en blitShadowCache 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- CHANGELOG.md | 72 ++++++++ src/render/software.zig | 394 +++++++++++++++++++++++++++++++++++++++- src/render/ttf.zig | 86 ++++++--- 3 files changed, 515 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4da3b15..534ed32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,8 @@ | 2025-12-30 | v0.25.0 | ⭐ IdleCompanion widget: mascota animada que aparece tras inactividad | | 2025-12-31 | v0.26.0 | ⭐ Z-Design V5 Pixel Perfect: títulos legibles (contrastTextColor), botones centrados (+2px TTF), semáforo reubicado (texto + cuadrado derecha) | | 2025-12-31 | v0.26.1 | Fix: drawBeveledRect bisel ahora +1px inset (no solapa borde exterior) | +| 2026-01-02 | v0.27.0 | ⭐ **PERF: Shadow Baking** - Cache de sombras prerenderizadas en SoftwareRenderer | +| 2026-01-02 | v0.27.1 | ⭐ **PERF: Glyph Blitting Optimizado** - drawGlyphBitmap con acceso directo a píxeles | --- @@ -116,3 +118,73 @@ Sistema de mascota animada reutilizable para cualquier aplicación: ``` → Archivo: `widgets/idle_companion.zig` + +### v0.27.0-v0.27.1 - Optimizaciones de Rendimiento (2026-01-02) + +Optimizaciones profundas del motor de renderizado software para alcanzar 60fps. + +#### Shadow Baking (v0.27.0) + +Sistema de cache de sombras prerenderizadas que elimina el recálculo costoso de sombras blur cada frame. + +**Problema original:** +- Las sombras con blur dibujaban N capas de rectángulos redondeados por frame +- Cada capa ejecutaba `fillRoundedRect` que hace `@sqrt()` por cada píxel +- Una sombra blur=8 sobre 200x100px = 8 × 20,000 × sqrt = 160,000 operaciones sqrt/frame + +**Solución:** +- Nuevo `ShadowCache` struct con HashMap para sombras prerenderizadas +- Key: (w, h, blur, radius, spread) - forma de la sombra +- Value: Buffer de alpha (u8[]) con la sombra ya calculada +- Primer frame: renderiza sombra a buffer, cachea +- Frames siguientes: blit rápido del buffer con color aplicado + +**Resultados medidos:** +| Modo | Sin Cache | Con Cache | Mejora | +|------|-----------|-----------|--------| +| Debug | ~180ms | ~43ms | **4.2x** | +| ReleaseSafe | ~11ms | ~4.4ms | **2.5x** | + +**API:** +```zig +// Antes (sin cache) +var renderer = SoftwareRenderer.init(&fb); + +// Ahora (con cache - recomendado) +var renderer = SoftwareRenderer.initWithCache(&fb, allocator); +defer renderer.deinit(); +``` + +**Archivos modificados:** +- `src/render/software.zig`: +200 LOC (ShadowCache, ShadowCacheKey, ShadowCacheEntry, fillAlphaRect, fillAlphaRectAdditive, blitShadowCache) + +#### Glyph Blitting Optimizado (v0.27.1) + +Optimización de `drawGlyphBitmap` en TTF rendering. + +**Problema original:** +- 6 checks de bounds por cada píxel del glifo +- Llamadas a `getPixel()`/`setPixel()` (con más bounds checks) +- Creación de struct `Color` por cada píxel semi-transparente +- Sin early exit para glifos fuera del clip + +**Optimizaciones aplicadas:** +1. **Early exit**: Si glifo entero está fuera de clip/framebuffer, return inmediato +2. **Pre-cálculo de región visible**: Intersección glifo ∩ clip ∩ framebuffer calculada una vez +3. **Acceso directo a `fb.pixels[]`**: Sin getPixel/setPixel +4. **Aritmética u32**: Sin crear structs Color por cada píxel +5. **Fast path opaco**: Si alpha=255, escritura directa sin blending + +**Archivos modificados:** +- `src/render/ttf.zig`: drawGlyphBitmap() reescrito (~80 LOC) + +#### Correcciones de Estabilidad + +Durante la implementación se detectaron y corrigieron varios integer overflows: + +- **fillAlphaRect/fillAlphaRectAdditive**: Aritmética saturante (`+|`, `-|`, `*|`) y bounds checks +- **blitShadowCache**: Color math con u32 en lugar de u16 para evitar overflow en multiplicación +- **renderShadow**: Límite de tamaño máximo (2048x2048) para evitar allocaciones enormes + +→ Archivos: `src/render/software.zig`, `src/render/ttf.zig` +→ Telemetría: Añadida en `zsimifactu/src/main.zig` para medir Exec time diff --git a/src/render/software.zig b/src/render/software.zig index 0fb59f2..03a1f77 100644 --- a/src/render/software.zig +++ b/src/render/software.zig @@ -23,6 +23,7 @@ //! ``` const std = @import("std"); +const Allocator = std.mem.Allocator; const Command = @import("../core/command.zig"); const Style = @import("../core/style.zig"); @@ -35,6 +36,279 @@ const Color = Style.Color; const Rect = Layout.Rect; const DrawCommand = Command.DrawCommand; +// ============================================================================= +// Shadow Cache - Pre-rendered shadows for instant blitting +// ============================================================================= + +/// Key for shadow cache lookup (shape only, not position/color) +const ShadowCacheKey = struct { + w: u32, + h: u32, + blur: u8, + radius: u8, + spread: i8, +}; + +/// Cached pre-rendered shadow (alpha channel only) +const ShadowCacheEntry = struct { + /// Alpha values (0-255) for each pixel + alpha: []u8, + /// Total width including blur expansion + width: u32, + /// Total height including blur expansion + height: u32, + /// How much the shadow extends beyond the original rect (left/top) + padding: u32, +}; + +/// Shadow cache with LRU eviction +const ShadowCache = struct { + entries: std.AutoHashMap(ShadowCacheKey, ShadowCacheEntry), + allocator: Allocator, + /// Maximum cached shadows (LRU eviction when exceeded) + max_entries: usize = 64, + + pub fn init(allocator: Allocator) ShadowCache { + return .{ + .entries = std.AutoHashMap(ShadowCacheKey, ShadowCacheEntry).init(allocator), + .allocator = allocator, + }; + } + + pub fn deinit(self: *ShadowCache) void { + var it = self.entries.iterator(); + while (it.next()) |entry| { + self.allocator.free(entry.value_ptr.alpha); + } + self.entries.deinit(); + } + + /// Get cached shadow or create new one + pub fn getOrCreate(self: *ShadowCache, key: ShadowCacheKey) ?ShadowCacheEntry { + // Check cache first + if (self.entries.get(key)) |entry| { + return entry; + } + + // Evict if cache is full + if (self.entries.count() >= self.max_entries) { + // Simple eviction: remove first entry (not true LRU, but fast) + var it = self.entries.iterator(); + if (it.next()) |first| { + self.allocator.free(first.value_ptr.alpha); + self.entries.removeByPtr(first.key_ptr); + } + } + + // Render new shadow + const entry = self.renderShadow(key) orelse return null; + self.entries.put(key, entry) catch return null; + return entry; + } + + /// Render shadow to alpha buffer + fn renderShadow(self: *ShadowCache, key: ShadowCacheKey) ?ShadowCacheEntry { + const blur: u32 = key.blur; + const spread_abs: u32 = @intCast(@abs(key.spread)); + const padding = blur +| spread_abs; // Saturating add + + const total_w = key.w +| (padding *| 2); // Saturating + const total_h = key.h +| (padding *| 2); + + // Safety: limit buffer size to prevent huge allocations + if (total_w > 2048 or total_h > 2048) return null; + if (total_w == 0 or total_h == 0) return null; + + const alpha = self.allocator.alloc(u8, total_w * total_h) catch return null; + @memset(alpha, 0); + + if (blur == 0) { + // Hard shadow - solid rect + const base_x = padding -| spread_abs; + const base_y = padding -| spread_abs; + const base_w = key.w +| (spread_abs *| 2); + const base_h = key.h +| (spread_abs *| 2); + + fillAlphaRect(alpha, total_w, base_x, base_y, @min(base_w, total_w), @min(base_h, total_h), key.radius, 255); + } else { + // Soft shadow - multiple layers with decreasing alpha + const layers: u32 = blur; + + // Draw from outermost to innermost + var layer: u32 = layers; + while (layer > 0) { + layer -= 1; + + const t = @as(f32, @floatFromInt(layer)) / @as(f32, @floatFromInt(layers)); + const alpha_factor = (1.0 - t) * (1.0 - t) * 0.5; + const layer_alpha: u8 = @intFromFloat(255.0 * alpha_factor); + if (layer_alpha == 0) continue; + + const expand = layers - layer; + // Use saturating subtraction to prevent underflow + const layer_x = (padding -| expand) -| spread_abs; + const layer_y = (padding -| expand) -| spread_abs; + const layer_w = key.w +| ((expand +| spread_abs) *| 2); + const layer_h = key.h +| ((expand +| spread_abs) *| 2); + const layer_radius: u8 = if (key.radius > 0) + key.radius +| @as(u8, @intCast(@min(255 - key.radius, expand))) + else + 0; + + fillAlphaRectAdditive(alpha, total_w, layer_x, layer_y, @min(layer_w, total_w), @min(layer_h, total_h), layer_radius, layer_alpha); + } + + // Core shadow + const core_x = padding -| spread_abs; + const core_y = padding -| spread_abs; + const core_w = key.w +| (spread_abs *| 2); + const core_h = key.h +| (spread_abs *| 2); + fillAlphaRectAdditive(alpha, total_w, core_x, core_y, @min(core_w, total_w), @min(core_h, total_h), key.radius, 178); + } + + return ShadowCacheEntry{ + .alpha = alpha, + .width = total_w, + .height = total_h, + .padding = padding, + }; + } +}; + +/// Fill alpha rect (simple, for hard shadows) +fn fillAlphaRect(alpha: []u8, stride: u32, x: u32, y: u32, w: u32, h: u32, radius: u8, value: u8) void { + if (w == 0 or h == 0) return; + + if (radius == 0) { + // Fast path: no corners + var py: u32 = 0; + while (py < h) : (py += 1) { + const row_start = (y +| py) *| stride +| x; + if (row_start >= alpha.len) continue; + const end = @min(row_start + w, alpha.len); + if (end > row_start) { + @memset(alpha[row_start..end], value); + } + } + } else { + // Rounded corners - use distance check + const r: f32 = @floatFromInt(radius); + const r_sq = r * r; + const r_u32: u32 = radius; + + var py: u32 = 0; + while (py < h) : (py += 1) { + var px: u32 = 0; + while (px < w) : (px += 1) { + // Safe index calculation with bounds check + const row = (y +| py) *| stride; + const idx = row +| x +| px; + if (idx >= alpha.len) continue; + + // Check corners + var dist_sq: f32 = 0; + var in_corner = false; + + // Top-left + if (px < r_u32 and py < r_u32) { + const dx: f32 = r - @as(f32, @floatFromInt(px)) - 0.5; + const dy: f32 = r - @as(f32, @floatFromInt(py)) - 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } + // Top-right + else if (px >= w -| r_u32 and py < r_u32) { + const dx: f32 = @as(f32, @floatFromInt(px)) - @as(f32, @floatFromInt(w -| r_u32)) + 0.5; + const dy: f32 = r - @as(f32, @floatFromInt(py)) - 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } + // Bottom-left + else if (px < r_u32 and py >= h -| r_u32) { + const dx: f32 = r - @as(f32, @floatFromInt(px)) - 0.5; + const dy: f32 = @as(f32, @floatFromInt(py)) - @as(f32, @floatFromInt(h -| r_u32)) + 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } + // Bottom-right + else if (px >= w -| r_u32 and py >= h -| r_u32) { + const dx: f32 = @as(f32, @floatFromInt(px)) - @as(f32, @floatFromInt(w -| r_u32)) + 0.5; + const dy: f32 = @as(f32, @floatFromInt(py)) - @as(f32, @floatFromInt(h -| r_u32)) + 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } + + if (in_corner and dist_sq > r_sq) { + continue; // Outside corner + } + + alpha[idx] = value; + } + } + } +} + +/// Fill alpha rect with additive blending (for layered soft shadows) +fn fillAlphaRectAdditive(alpha: []u8, stride: u32, x: u32, y: u32, w: u32, h: u32, radius: u8, value: u8) void { + if (w == 0 or h == 0) return; + + if (radius == 0) { + var py: u32 = 0; + while (py < h) : (py += 1) { + const row_start = (y +| py) *| stride +| x; + var px: u32 = 0; + while (px < w) : (px += 1) { + const idx = row_start +| px; + if (idx >= alpha.len) continue; + alpha[idx] = alpha[idx] +| value; // Saturating add + } + } + } else { + const r: f32 = @floatFromInt(radius); + const r_sq = r * r; + const r_u32: u32 = radius; + + var py: u32 = 0; + while (py < h) : (py += 1) { + var px: u32 = 0; + while (px < w) : (px += 1) { + const row = (y +| py) *| stride; + const idx = row +| x +| px; + if (idx >= alpha.len) continue; + + var dist_sq: f32 = 0; + var in_corner = false; + + if (px < r_u32 and py < r_u32) { + const dx: f32 = r - @as(f32, @floatFromInt(px)) - 0.5; + const dy: f32 = r - @as(f32, @floatFromInt(py)) - 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } else if (px >= w -| r_u32 and py < r_u32) { + const dx: f32 = @as(f32, @floatFromInt(px)) - @as(f32, @floatFromInt(w -| r_u32)) + 0.5; + const dy: f32 = r - @as(f32, @floatFromInt(py)) - 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } else if (px < r_u32 and py >= h -| r_u32) { + const dx: f32 = r - @as(f32, @floatFromInt(px)) - 0.5; + const dy: f32 = @as(f32, @floatFromInt(py)) - @as(f32, @floatFromInt(h -| r_u32)) + 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } else if (px >= w -| r_u32 and py >= h -| r_u32) { + const dx: f32 = @as(f32, @floatFromInt(px)) - @as(f32, @floatFromInt(w -| r_u32)) + 0.5; + const dy: f32 = @as(f32, @floatFromInt(py)) - @as(f32, @floatFromInt(h -| r_u32)) + 0.5; + dist_sq = dx * dx + dy * dy; + in_corner = true; + } + + if (in_corner and dist_sq > r_sq) continue; + + alpha[idx] = alpha[idx] +| value; + } + } + } +} + /// Software renderer state pub const SoftwareRenderer = struct { framebuffer: *Framebuffer, @@ -46,9 +320,12 @@ pub const SoftwareRenderer = struct { clip_stack: [16]Rect, clip_depth: usize, + /// Shadow cache for instant shadow blitting (optional, requires allocator) + shadow_cache: ?ShadowCache = null, + const Self = @This(); - /// Initialize the renderer + /// Initialize the renderer (without shadow cache) pub fn init(framebuffer: *Framebuffer) Self { return .{ .framebuffer = framebuffer, @@ -56,9 +333,30 @@ pub const SoftwareRenderer = struct { .ttf_font = null, .clip_stack = undefined, .clip_depth = 0, + .shadow_cache = null, }; } + /// Initialize with shadow cache enabled (recommended for performance) + pub fn initWithCache(framebuffer: *Framebuffer, allocator: Allocator) Self { + return .{ + .framebuffer = framebuffer, + .default_font = null, + .ttf_font = null, + .clip_stack = undefined, + .clip_depth = 0, + .shadow_cache = ShadowCache.init(allocator), + }; + } + + /// Deinitialize (frees shadow cache if present) + pub fn deinit(self: *Self) void { + if (self.shadow_cache) |*cache| { + cache.deinit(); + self.shadow_cache = null; + } + } + /// Set the default bitmap font pub fn setDefaultFont(self: *Self, font: *Font) void { self.default_font = font; @@ -305,8 +603,91 @@ pub const SoftwareRenderer = struct { } /// Draw a multi-layer shadow to simulate blur effect - /// Draws expanding layers with decreasing alpha, creating soft edges + /// Uses cached pre-rendered shadows when available for maximum performance fn drawShadow(self: *Self, s: Command.ShadowCommand) void { + // Try to use cached shadow first (FAST PATH) + if (self.shadow_cache) |*cache| { + const key = ShadowCacheKey{ + .w = s.w, + .h = s.h, + .blur = s.blur, + .radius = s.radius, + .spread = s.spread, + }; + + if (cache.getOrCreate(key)) |entry| { + // Blit cached shadow with color + self.blitShadowCache(s, entry); + return; + } + } + + // Fallback: render shadow directly (SLOW PATH) + self.drawShadowDirect(s); + } + + /// Blit a cached shadow to the framebuffer + fn blitShadowCache(self: *Self, s: Command.ShadowCommand, entry: ShadowCacheEntry) void { + // Safe cast: padding is at most blur(255) + spread(128) = 383, fits in i32 + const padding_i32: i32 = @intCast(@min(entry.padding, std.math.maxInt(i32))); + const dest_x = s.x + @as(i32, s.offset_x) - padding_i32; + const dest_y = s.y + @as(i32, s.offset_y) - padding_i32; + + const clip = self.getClip(); + const fb = self.framebuffer; + + // Use u32 for all color math to prevent overflow + const color_r: u32 = s.color.r; + const color_g: u32 = s.color.g; + const color_b: u32 = s.color.b; + const color_a: u32 = s.color.a; + + var py: u32 = 0; + while (py < entry.height) : (py += 1) { + const screen_y = dest_y + @as(i32, @intCast(py)); + if (screen_y < clip.y or screen_y >= clip.y + @as(i32, @intCast(clip.h))) continue; + if (screen_y < 0 or screen_y >= @as(i32, @intCast(fb.height))) continue; + + const src_row = py *| entry.width; // Saturating + const dst_row = @as(u32, @intCast(screen_y)) * fb.width; + + var px: u32 = 0; + while (px < entry.width) : (px += 1) { + const src_idx = src_row +| px; + if (src_idx >= entry.alpha.len) continue; + + const cached_alpha = entry.alpha[src_idx]; + if (cached_alpha == 0) continue; + + const screen_x = dest_x + @as(i32, @intCast(px)); + if (screen_x < clip.x or screen_x >= clip.x + @as(i32, @intCast(clip.w))) continue; + if (screen_x < 0 or screen_x >= @as(i32, @intCast(fb.width))) continue; + + const dst_idx = dst_row + @as(u32, @intCast(screen_x)); + if (dst_idx >= fb.pixels.len) continue; + + // Modulate alpha with shadow color's alpha (u32 math) + const final_alpha: u32 = (@as(u32, cached_alpha) * color_a) / 255; + if (final_alpha == 0) continue; + + // Blend with framebuffer (all u32 to prevent overflow) + const existing = fb.pixels[dst_idx]; + const bg_r: u32 = existing & 0xFF; + const bg_g: u32 = (existing >> 8) & 0xFF; + const bg_b: u32 = (existing >> 16) & 0xFF; + + const inv_alpha: u32 = 255 -| final_alpha; // Saturating subtract + const out_r: u8 = @intCast((color_r * final_alpha + bg_r * inv_alpha) / 255); + const out_g: u8 = @intCast((color_g * final_alpha + bg_g * inv_alpha) / 255); + const out_b: u8 = @intCast((color_b * final_alpha + bg_b * inv_alpha) / 255); + + fb.pixels[dst_idx] = @as(u32, out_r) | (@as(u32, out_g) << 8) | (@as(u32, out_b) << 16) | (0xFF << 24); + } + } + } + + /// Render shadow directly (fallback when cache unavailable) + fn drawShadowDirect(self: *Self, s: Command.ShadowCommand) void { if (s.blur == 0) { // Hard shadow - single solid rect const shadow_x = s.x + @as(i32, s.offset_x) - @as(i32, s.spread); @@ -323,29 +704,24 @@ pub const SoftwareRenderer = struct { } // Soft shadow - draw multiple expanding layers with decreasing alpha - // Each layer is larger and more transparent, creating a blur effect const layers: u8 = s.blur; const base_alpha = s.color.a; - // Calculate base shadow position const base_x = s.x + @as(i32, s.offset_x) - @as(i32, s.spread); const base_y = s.y + @as(i32, s.offset_y) - @as(i32, s.spread); const base_w = s.w +| @as(u32, @intCast(@abs(s.spread) * 2)); const base_h = s.h +| @as(u32, @intCast(@abs(s.spread) * 2)); - // Draw from outermost (most transparent) to innermost (most opaque) var layer: u8 = layers; while (layer > 0) { layer -= 1; - // Calculate alpha for this layer (quadratic falloff for softer edges) const t = @as(f32, @floatFromInt(layer)) / @as(f32, @floatFromInt(layers)); - const alpha_factor = (1.0 - t) * (1.0 - t); // Quadratic falloff + const alpha_factor = (1.0 - t) * (1.0 - t); const layer_alpha = @as(u8, @intFromFloat(@as(f32, @floatFromInt(base_alpha)) * alpha_factor * 0.5)); if (layer_alpha == 0) continue; - // Expand layer outward const expand = @as(i32, @intCast(layers - layer)); const layer_x = base_x - expand; const layer_y = base_y - expand; @@ -362,7 +738,7 @@ pub const SoftwareRenderer = struct { } } - // Draw core shadow (innermost, full opacity relative to input) + // Core shadow const core_alpha = @as(u8, @intFromFloat(@as(f32, @floatFromInt(base_alpha)) * 0.7)); if (core_alpha > 0) { const core_color = Color.rgba(s.color.r, s.color.g, s.color.b, core_alpha); diff --git a/src/render/ttf.zig b/src/render/ttf.zig index 646a573..9a3ec58 100644 --- a/src/render/ttf.zig +++ b/src/render/ttf.zig @@ -310,6 +310,7 @@ pub const TtfFont = struct { } /// Draw a cached glyph bitmap with alpha blending + /// Optimized: pre-calculate visible region, direct pixel access fn drawGlyphBitmap( self: Self, fb: *Framebuffer, @@ -321,44 +322,73 @@ pub const TtfFont = struct { ) void { _ = self; - // Calculate position: bearing_y is distance from baseline to top of glyph + const width: u32 = glyph.metrics.width; + const height: u32 = glyph.metrics.height; + if (width == 0 or height == 0) return; + + // Calculate glyph position const glyph_x = x + glyph.metrics.bearing_x; const glyph_y = baseline_y - glyph.metrics.bearing_y; - const width = glyph.metrics.width; - const height = glyph.metrics.height; + // Early exit: entire glyph outside clip or framebuffer + const glyph_right = glyph_x + @as(i32, @intCast(width)); + const glyph_bottom = glyph_y + @as(i32, @intCast(height)); + const clip_right = clip.x + @as(i32, @intCast(clip.w)); + const clip_bottom = clip.y + @as(i32, @intCast(clip.h)); - if (width == 0 or height == 0) return; + if (glyph_right <= clip.x or glyph_x >= clip_right) return; + if (glyph_bottom <= clip.y or glyph_y >= clip_bottom) return; + if (glyph_right <= 0 or glyph_x >= @as(i32, @intCast(fb.width))) return; + if (glyph_bottom <= 0 or glyph_y >= @as(i32, @intCast(fb.height))) return; - // Draw each pixel with alpha blending - for (0..height) |py| { - for (0..width) |px| { - const alpha = glyph.bitmap[py * width + px]; + // Calculate visible region (intersection of glyph, clip, and framebuffer) + const vis_x0 = @max(0, @max(glyph_x, clip.x)); + const vis_y0 = @max(0, @max(glyph_y, clip.y)); + const vis_x1 = @min(@as(i32, @intCast(fb.width)), @min(glyph_right, clip_right)); + const vis_y1 = @min(@as(i32, @intCast(fb.height)), @min(glyph_bottom, clip_bottom)); + + if (vis_x0 >= vis_x1 or vis_y0 >= vis_y1) return; + + // Precompute color for blending (u32 to avoid per-pixel struct creation) + const color_r: u32 = color.r; + const color_g: u32 = color.g; + const color_b: u32 = color.b; + const color_packed = color.toABGR(); + + // Draw only visible region with direct pixel access + var screen_y = vis_y0; + while (screen_y < vis_y1) : (screen_y += 1) { + const glyph_py: u32 = @intCast(screen_y - glyph_y); + const dst_row: u32 = @intCast(screen_y); + const dst_row_start = dst_row * fb.width; + const src_row_start = glyph_py * width; + + var screen_x = vis_x0; + while (screen_x < vis_x1) : (screen_x += 1) { + const glyph_px: u32 = @intCast(screen_x - glyph_x); + const alpha = glyph.bitmap[src_row_start + glyph_px]; if (alpha == 0) continue; - const screen_x = glyph_x + @as(i32, @intCast(px)); - const screen_y = glyph_y + @as(i32, @intCast(py)); + const dst_idx = dst_row_start + @as(u32, @intCast(screen_x)); - // Clip check - if (screen_x < clip.x or screen_x >= clip.x + @as(i32, @intCast(clip.w))) continue; - if (screen_y < clip.y or screen_y >= clip.y + @as(i32, @intCast(clip.h))) continue; - if (screen_x < 0 or screen_y < 0) continue; - if (screen_x >= @as(i32, @intCast(fb.width)) or screen_y >= @as(i32, @intCast(fb.height))) continue; - - // Alpha blend if (alpha == 255) { - fb.setPixel(@intCast(screen_x), @intCast(screen_y), color); + // Fully opaque: direct write + fb.pixels[dst_idx] = color_packed; } else { - // Get background pixel and convert u32 to Color (ABGR format) - const bg_u32 = fb.getPixel(@intCast(screen_x), @intCast(screen_y)) orelse 0; - const bg = Color{ - .r = @truncate(bg_u32), - .g = @truncate(bg_u32 >> 8), - .b = @truncate(bg_u32 >> 16), - .a = @truncate(bg_u32 >> 24), - }; - const blended = blendColors(color, bg, alpha); - fb.setPixel(@intCast(screen_x), @intCast(screen_y), blended); + // Alpha blend with direct u32 math + const bg = fb.pixels[dst_idx]; + const bg_r: u32 = bg & 0xFF; + const bg_g: u32 = (bg >> 8) & 0xFF; + const bg_b: u32 = (bg >> 16) & 0xFF; + + const a: u32 = alpha; + const inv_a: u32 = 255 - a; + + const out_r: u8 = @intCast((color_r * a + bg_r * inv_a) / 255); + const out_g: u8 = @intCast((color_g * a + bg_g * inv_a) / 255); + const out_b: u8 = @intCast((color_b * a + bg_b * inv_a) / 255); + + fb.pixels[dst_idx] = @as(u32, out_r) | (@as(u32, out_g) << 8) | (@as(u32, out_b) << 16) | (0xFF << 24); } } }