perf(framebuffer): SIMD fillRect con @Vector(8, u32)

Optimización 2.3 del plan de rendimiento: - fillRect para alpha=255 usa vectores SIMD explícitos - Escribe 8 pixels (256 bits) por iteración en lugar de 1 - Remainder loop para los últimos 0-7 pixels - Mantiene el path de blending para alpha<255 Beneficio esperado: 5-10x más rápido para rectángulos grandes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 18:38:53 +01:00 · 2026-01-03 18:38:53 +01:00 · 1155e904cb
commit 1155e904cb
parent d92ce07bb3
1 changed files with 24 additions and 7 deletions
--- a/src/render/framebuffer.zig
+++ b/src/render/framebuffer.zig
@ -112,7 +112,7 @@ pub const Framebuffer = struct {
    }

    /// Draw a filled rectangle
-    /// Optimized with SIMD-friendly @memset for solid colors (alpha=255)
+    /// Optimized with explicit SIMD vectors for solid colors (alpha=255)
    pub fn fillRect(self: *Self, x: i32, y: i32, w: u32, h: u32, color: Color) void {
        @setRuntimeSafety(false); // Hot path: bounds already validated below
        const x_start = @max(0, x);
@ -123,16 +123,33 @@ pub const Framebuffer = struct {
        if (x_start >= x_end or y_start >= y_end) return;

        const c = color.toABGR();
-        const row_width = @as(u32, @intCast(x_end - x_start));
-        const ux_start = @as(u32, @intCast(x_start));
+        const row_width: usize = @intCast(x_end - x_start);
+        const ux_start: usize = @intCast(x_start);

-        // FAST PATH: Solid colors (alpha=255) use @memset which is SIMD-optimized
+        // FAST PATH: Solid colors (alpha=255) use explicit SIMD vectors
        if (color.a == 255) {
-            var py: u32 = @intCast(y_start);
-            const uy_end: u32 = @intCast(y_end);
+            var py: usize = @intCast(y_start);
+            const uy_end: usize = @intCast(y_end);
+
+            // SIMD: 8 pixels at a time (256 bits = 8 × 32-bit u32)
+            const vec_size = 8;
+            const color_vec: @Vector(vec_size, u32) = @splat(c);
+
            while (py < uy_end) : (py += 1) {
                const row_start = py * self.width + ux_start;
-                @memset(self.pixels[row_start..][0..row_width], c);
+                const row_slice = self.pixels[row_start..][0..row_width];
+
+                // Vectorized: write 8 pixels at a time
+                var px: usize = 0;
+                while (px + vec_size <= row_width) : (px += vec_size) {
+                    const ptr: *@Vector(vec_size, u32) = @ptrCast(@alignCast(row_slice[px..].ptr));
+                    ptr.* = color_vec;
+                }
+
+                // Remainder: write remaining pixels one by one
+                while (px < row_width) : (px += 1) {
+                    row_slice[px] = c;
+                }
            }
            return;
        }