From 1155e904cbb4c36771a37d25a79c530ebefca485 Mon Sep 17 00:00:00 2001
From: "R.Eugenio" <yo@reugenio.com>
Date: Sat, 3 Jan 2026 18:38:53 +0100
Subject: [PATCH] perf(framebuffer): SIMD fillRect con @Vector(8, u32)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Optimización 2.3 del plan de rendimiento:
- fillRect para alpha=255 usa vectores SIMD explícitos
- Escribe 8 pixels (256 bits) por iteración en lugar de 1
- Remainder loop para los últimos 0-7 pixels
- Mantiene el path de blending para alpha<255

Beneficio esperado: 5-10x más rápido para rectángulos grandes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/render/framebuffer.zig | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/render/framebuffer.zig b/src/render/framebuffer.zig
index 8af7fe7..c0d0bc2 100644
--- a/src/render/framebuffer.zig
+++ b/src/render/framebuffer.zig
@@ -112,7 +112,7 @@ pub const Framebuffer = struct {
     }
 
     /// Draw a filled rectangle
-    /// Optimized with SIMD-friendly @memset for solid colors (alpha=255)
+    /// Optimized with explicit SIMD vectors for solid colors (alpha=255)
     pub fn fillRect(self: *Self, x: i32, y: i32, w: u32, h: u32, color: Color) void {
         @setRuntimeSafety(false); // Hot path: bounds already validated below
         const x_start = @max(0, x);
@@ -123,16 +123,33 @@ pub const Framebuffer = struct {
         if (x_start >= x_end or y_start >= y_end) return;
 
         const c = color.toABGR();
-        const row_width = @as(u32, @intCast(x_end - x_start));
-        const ux_start = @as(u32, @intCast(x_start));
+        const row_width: usize = @intCast(x_end - x_start);
+        const ux_start: usize = @intCast(x_start);
 
-        // FAST PATH: Solid colors (alpha=255) use @memset which is SIMD-optimized
+        // FAST PATH: Solid colors (alpha=255) use explicit SIMD vectors
         if (color.a == 255) {
-            var py: u32 = @intCast(y_start);
-            const uy_end: u32 = @intCast(y_end);
+            var py: usize = @intCast(y_start);
+            const uy_end: usize = @intCast(y_end);
+
+            // SIMD: 8 pixels at a time (256 bits = 8 × 32-bit u32)
+            const vec_size = 8;
+            const color_vec: @Vector(vec_size, u32) = @splat(c);
+
             while (py < uy_end) : (py += 1) {
                 const row_start = py * self.width + ux_start;
-                @memset(self.pixels[row_start..][0..row_width], c);
+                const row_slice = self.pixels[row_start..][0..row_width];
+
+                // Vectorized: write 8 pixels at a time
+                var px: usize = 0;
+                while (px + vec_size <= row_width) : (px += vec_size) {
+                    const ptr: *@Vector(vec_size, u32) = @ptrCast(@alignCast(row_slice[px..].ptr));
+                    ptr.* = color_vec;
+                }
+
+                // Remainder: write remaining pixels one by one
+                while (px < row_width) : (px += 1) {
+                    row_slice[px] = c;
+                }
             }
             return;
         }