perf(framebuffer): SIMD fillRect con @Vector(8, u32)
Optimización 2.3 del plan de rendimiento: - fillRect para alpha=255 usa vectores SIMD explícitos - Escribe 8 pixels (256 bits) por iteración en lugar de 1 - Remainder loop para los últimos 0-7 pixels - Mantiene el path de blending para alpha<255 Beneficio esperado: 5-10x más rápido para rectángulos grandes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
d92ce07bb3
commit
1155e904cb
1 changed files with 24 additions and 7 deletions
|
|
@ -112,7 +112,7 @@ pub const Framebuffer = struct {
|
|||
}
|
||||
|
||||
/// Draw a filled rectangle
|
||||
/// Optimized with SIMD-friendly @memset for solid colors (alpha=255)
|
||||
/// Optimized with explicit SIMD vectors for solid colors (alpha=255)
|
||||
pub fn fillRect(self: *Self, x: i32, y: i32, w: u32, h: u32, color: Color) void {
|
||||
@setRuntimeSafety(false); // Hot path: bounds already validated below
|
||||
const x_start = @max(0, x);
|
||||
|
|
@ -123,16 +123,33 @@ pub const Framebuffer = struct {
|
|||
if (x_start >= x_end or y_start >= y_end) return;
|
||||
|
||||
const c = color.toABGR();
|
||||
const row_width = @as(u32, @intCast(x_end - x_start));
|
||||
const ux_start = @as(u32, @intCast(x_start));
|
||||
const row_width: usize = @intCast(x_end - x_start);
|
||||
const ux_start: usize = @intCast(x_start);
|
||||
|
||||
// FAST PATH: Solid colors (alpha=255) use @memset which is SIMD-optimized
|
||||
// FAST PATH: Solid colors (alpha=255) use explicit SIMD vectors
|
||||
if (color.a == 255) {
|
||||
var py: u32 = @intCast(y_start);
|
||||
const uy_end: u32 = @intCast(y_end);
|
||||
var py: usize = @intCast(y_start);
|
||||
const uy_end: usize = @intCast(y_end);
|
||||
|
||||
// SIMD: 8 pixels at a time (256 bits = 8 × 32-bit u32)
|
||||
const vec_size = 8;
|
||||
const color_vec: @Vector(vec_size, u32) = @splat(c);
|
||||
|
||||
while (py < uy_end) : (py += 1) {
|
||||
const row_start = py * self.width + ux_start;
|
||||
@memset(self.pixels[row_start..][0..row_width], c);
|
||||
const row_slice = self.pixels[row_start..][0..row_width];
|
||||
|
||||
// Vectorized: write 8 pixels at a time
|
||||
var px: usize = 0;
|
||||
while (px + vec_size <= row_width) : (px += vec_size) {
|
||||
const ptr: *@Vector(vec_size, u32) = @ptrCast(@alignCast(row_slice[px..].ptr));
|
||||
ptr.* = color_vec;
|
||||
}
|
||||
|
||||
// Remainder: write remaining pixels one by one
|
||||
while (px < row_width) : (px += 1) {
|
||||
row_slice[px] = c;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue