perf(font): Optimizar drawChar con fast path para caracteres visibles
- Early out para caracteres completamente fuera del clip - Fast path para caracteres 100% visibles (caso común) - Escritura directa al buffer de píxeles sin setPixel - Loop optimizado para fuentes de 8px de ancho - Unroll de los 8 bits del glyph byte Resultados: - Debug: 40ms → 32ms por frame - Release: 40ms → 1.5ms por frame (~26x más rápido) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
097f1474ca
commit
51d41bd01e
1 changed files with 63 additions and 1 deletions
|
|
@ -56,6 +56,7 @@ pub const Font = struct {
|
|||
}
|
||||
|
||||
/// Draw a single character
|
||||
/// Optimized for common cases with fast path for fully visible characters.
|
||||
pub fn drawChar(
|
||||
self: Self,
|
||||
fb: *Framebuffer,
|
||||
|
|
@ -70,6 +71,15 @@ pub const Font = struct {
|
|||
const idx = char - self.first_char;
|
||||
if (idx >= self.num_chars) return;
|
||||
|
||||
// Early out: character completely outside clip
|
||||
const char_right = x + @as(i32, self.char_width);
|
||||
const char_bottom = y + @as(i32, self.char_height);
|
||||
if (x >= clip.right() or char_right <= clip.left() or
|
||||
y >= clip.bottom() or char_bottom <= clip.top())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate glyph data offset
|
||||
const bytes_per_row = (self.char_width + 7) / 8;
|
||||
const bytes_per_char = @as(usize, bytes_per_row) * @as(usize, self.char_height);
|
||||
|
|
@ -78,7 +88,59 @@ pub const Font = struct {
|
|||
if (glyph_offset + bytes_per_char > self.glyphs.len) return;
|
||||
const glyph = self.glyphs[glyph_offset..][0..bytes_per_char];
|
||||
|
||||
// Draw the glyph
|
||||
// Pre-compute color value for direct pixel writes
|
||||
const color_value = color.toABGR();
|
||||
|
||||
// Check if character is fully visible (most common case)
|
||||
const fully_visible = x >= clip.left() and char_right <= clip.right() and
|
||||
y >= clip.top() and char_bottom <= clip.bottom();
|
||||
|
||||
if (fully_visible and x >= 0 and y >= 0) {
|
||||
// FAST PATH: No clipping needed, write directly to framebuffer
|
||||
const ux: u32 = @intCast(x);
|
||||
const uy: u32 = @intCast(y);
|
||||
|
||||
// Ensure we're within framebuffer bounds
|
||||
if (ux + self.char_width <= fb.width and uy + self.char_height <= fb.height) {
|
||||
const fb_width = fb.width;
|
||||
const pixels = fb.pixels;
|
||||
var row_start = uy * fb_width + ux;
|
||||
|
||||
// For 8-pixel wide fonts, bytes_per_row is always 1
|
||||
if (self.char_width == 8) {
|
||||
for (glyph[0..self.char_height]) |glyph_byte| {
|
||||
// Process 8 pixels from the glyph byte - unrolled
|
||||
if (glyph_byte & 0x80 != 0) pixels[row_start + 0] = color_value;
|
||||
if (glyph_byte & 0x40 != 0) pixels[row_start + 1] = color_value;
|
||||
if (glyph_byte & 0x20 != 0) pixels[row_start + 2] = color_value;
|
||||
if (glyph_byte & 0x10 != 0) pixels[row_start + 3] = color_value;
|
||||
if (glyph_byte & 0x08 != 0) pixels[row_start + 4] = color_value;
|
||||
if (glyph_byte & 0x04 != 0) pixels[row_start + 5] = color_value;
|
||||
if (glyph_byte & 0x02 != 0) pixels[row_start + 6] = color_value;
|
||||
if (glyph_byte & 0x01 != 0) pixels[row_start + 7] = color_value;
|
||||
row_start += fb_width;
|
||||
}
|
||||
} else {
|
||||
// Generic path for other font widths
|
||||
var py: u8 = 0;
|
||||
while (py < self.char_height) : (py += 1) {
|
||||
const glyph_byte = glyph[py * bytes_per_row];
|
||||
if (glyph_byte & 0x80 != 0) pixels[row_start + 0] = color_value;
|
||||
if (glyph_byte & 0x40 != 0) pixels[row_start + 1] = color_value;
|
||||
if (glyph_byte & 0x20 != 0) pixels[row_start + 2] = color_value;
|
||||
if (glyph_byte & 0x10 != 0) pixels[row_start + 3] = color_value;
|
||||
if (glyph_byte & 0x08 != 0) pixels[row_start + 4] = color_value;
|
||||
if (glyph_byte & 0x04 != 0) pixels[row_start + 5] = color_value;
|
||||
if (glyph_byte & 0x02 != 0) pixels[row_start + 6] = color_value;
|
||||
if (glyph_byte & 0x01 != 0) pixels[row_start + 7] = color_value;
|
||||
row_start += fb_width;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// SLOW PATH: Character partially clipped, use per-pixel checks
|
||||
var py: u8 = 0;
|
||||
while (py < self.char_height) : (py += 1) {
|
||||
const screen_y = y + @as(i32, py);
|
||||
|
|
|
|||
Loading…
Reference in a new issue