branchless sprite blit and zbuffer updates

This commit is contained in:
asrael 2025-12-03 17:42:42 -06:00
parent 349fa8bc81
commit a33d4c0068
No known key found for this signature in database
GPG key ID: 2786557804DFAE24
2 changed files with 137 additions and 41 deletions

View file

@ -9,10 +9,29 @@ void pxl8_blit_hicolor(u16* fb, u32 fb_width, const u16* sprite, u32 atlas_width
u16* dest_row = dest_base + row * fb_width;
const u16* src_row = src_base + row * atlas_width;
for (u32 col = 0; col < w; col++) {
if (src_row[col] != 0) {
dest_row[col] = src_row[col];
u32 col = 0;
u32 count2 = w / 2;
for (u32 i = 0; i < count2; i++) {
u32 pixels = ((const u32*)src_row)[i];
if (pixels == 0) {
col += 2;
continue;
}
u16 s0 = (u16)(pixels);
u16 s1 = (u16)(pixels >> 16);
u16 d0 = dest_row[col];
u16 d1 = dest_row[col + 1];
u16 m0 = (u16)(-(s0 != 0));
u16 m1 = (u16)(-(s1 != 0));
dest_row[col] = (s0 & m0) | (d0 & ~m0);
dest_row[col + 1] = (s1 & m1) | (d1 & ~m1);
col += 2;
}
if (w & 1) {
u16 s = src_row[col];
u16 d = dest_row[col];
u16 m = (u16)(-(s != 0));
dest_row[col] = (s & m) | (d & ~m);
}
}
}
@ -26,10 +45,37 @@ void pxl8_blit_indexed(u8* fb, u32 fb_width, const u8* sprite, u32 atlas_width,
u8* dest_row = dest_base + row * fb_width;
const u8* src_row = src_base + row * atlas_width;
for (u32 col = 0; col < w; col++) {
if (src_row[col] != 0) {
dest_row[col] = src_row[col];
}
u32 col = 0;
u32 count4 = w / 4;
for (u32 i = 0; i < count4; i++) {
u32 pixels = ((const u32*)src_row)[i];
if (pixels == 0) {
col += 4;
continue;
}
u8 s0 = (u8)(pixels);
u8 s1 = (u8)(pixels >> 8);
u8 s2 = (u8)(pixels >> 16);
u8 s3 = (u8)(pixels >> 24);
u8 d0 = dest_row[col];
u8 d1 = dest_row[col + 1];
u8 d2 = dest_row[col + 2];
u8 d3 = dest_row[col + 3];
u8 m0 = (u8)(-(s0 != 0));
u8 m1 = (u8)(-(s1 != 0));
u8 m2 = (u8)(-(s2 != 0));
u8 m3 = (u8)(-(s3 != 0));
dest_row[col] = (s0 & m0) | (d0 & ~m0);
dest_row[col + 1] = (s1 & m1) | (d1 & ~m1);
dest_row[col + 2] = (s2 & m2) | (d2 & ~m2);
dest_row[col + 3] = (s3 & m3) | (d3 & ~m3);
col += 4;
}
for (; col < w; col++) {
u8 s = src_row[col];
u8 d = dest_row[col];
u8 m = (u8)(-(s != 0));
dest_row[col] = (s & m) | (d & ~m);
}
}
}

View file

@ -382,8 +382,14 @@ void pxl8_clear(pxl8_gfx* gfx, u32 color) {
if (gfx->pixel_mode == PXL8_PIXEL_HICOLOR) {
u16* fb16 = (u16*)gfx->framebuffer;
u16 color16 = pxl8_rgba32_to_rgb565(color);
for (i32 i = 0; i < size; i++) {
fb16[i] = color16;
u32 pattern = (u32)color16 | ((u32)color16 << 16);
u32* fb32 = (u32*)fb16;
i32 count2 = size / 2;
for (i32 i = 0; i < count2; i++) {
fb32[i] = pattern;
}
if (size & 1) {
fb16[size - 1] = color16;
}
} else {
memset(gfx->framebuffer, color & 0xFF, size);
@ -466,9 +472,30 @@ void pxl8_rect_fill(pxl8_gfx* gfx, i32 x, i32 y, i32 w, i32 h, u32 color) {
i32 x1 = (x + w > gfx->framebuffer_width) ? gfx->framebuffer_width : x + w;
i32 y1 = (y + h > gfx->framebuffer_height) ? gfx->framebuffer_height : y + h;
i32 rect_w = x1 - x0;
if (rect_w <= 0 || y1 <= y0) return;
if (gfx->pixel_mode == PXL8_PIXEL_HICOLOR) {
u16* fb16 = (u16*)gfx->framebuffer;
u16 color16 = pxl8_rgba32_to_rgb565(color);
u32 pattern = (u32)color16 | ((u32)color16 << 16);
for (i32 py = y0; py < y1; py++) {
for (i32 px = x0; px < x1; px++) {
pxl8_pixel_unchecked(gfx, px, py, color);
u16* row = fb16 + py * gfx->framebuffer_width + x0;
i32 count2 = rect_w / 2;
u32* row32 = (u32*)row;
for (i32 i = 0; i < count2; i++) {
row32[i] = pattern;
}
if (rect_w & 1) {
row[rect_w - 1] = color16;
}
}
} else {
u8 color8 = color & 0xFF;
for (i32 py = y0; py < y1; py++) {
u8* row = gfx->framebuffer + py * gfx->framebuffer_width + x0;
memset(row, color8, rect_w);
}
}
}
@ -617,15 +644,15 @@ void pxl8_sprite(pxl8_gfx* gfx, u32 sprite_id, i32 x, i32 y, i32 w, i32 h) {
i32 dest_idx = (dest_y + py) * gfx->framebuffer_width + (dest_x + px);
if (gfx->pixel_mode == PXL8_PIXEL_HICOLOR) {
u16 pixel = ((const u16*)atlas_pixels)[src_idx];
if (pixel != 0) {
((u16*)gfx->framebuffer)[dest_idx] = pixel;
}
u16 s = ((const u16*)atlas_pixels)[src_idx];
u16 d = ((u16*)gfx->framebuffer)[dest_idx];
u16 m = (u16)(-(s != 0));
((u16*)gfx->framebuffer)[dest_idx] = (s & m) | (d & ~m);
} else {
u8 pixel = atlas_pixels[src_idx];
if (pixel != 0) {
gfx->framebuffer[dest_idx] = pixel;
}
u8 s = atlas_pixels[src_idx];
u8 d = gfx->framebuffer[dest_idx];
u8 m = (u8)(-(s != 0));
gfx->framebuffer[dest_idx] = (s & m) | (d & ~m);
}
}
}
@ -802,9 +829,8 @@ void pxl8_3d_clear_zbuffer(pxl8_gfx* gfx) {
i32 count = gfx->zbuffer_width * gfx->zbuffer_height;
const f32 far_z = 1e30f;
f32* ptr = gfx->zbuffer;
for (i32 i = 0; i < count; i++) {
ptr[i] = far_z;
gfx->zbuffer[i] = far_z;
}
}
@ -1024,17 +1050,29 @@ static inline void pxl8_fill_scanline_textured(
i32 atlas_idx = (atlas_y_base + ty) * atlas_width + (atlas_x_base + tx);
if (is_hicolor) {
u16 color = ((const u16*)atlas_pixels)[atlas_idx];
if (color != 0) {
gfx->zbuffer[idx] = z0;
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + xs] = color;
}
u16 s = ((const u16*)atlas_pixels)[atlas_idx];
u16 d = ((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + xs];
u16 m = (u16)(-(s != 0));
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + xs] = (s & m) | (d & ~m);
f32 old_z = gfx->zbuffer[idx];
u32 zm = -(s != 0);
u32 z0_bits, old_z_bits;
memcpy(&z0_bits, &z0, sizeof(u32));
memcpy(&old_z_bits, &old_z, sizeof(u32));
u32 new_z_bits = (z0_bits & zm) | (old_z_bits & ~zm);
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
} else {
u8 color = atlas_pixels[atlas_idx];
if (color != 0) {
gfx->zbuffer[idx] = z0;
gfx->framebuffer[y * gfx->framebuffer_width + xs] = color;
}
u8 s = atlas_pixels[atlas_idx];
u8 d = gfx->framebuffer[y * gfx->framebuffer_width + xs];
u8 m = (u8)(-(s != 0));
gfx->framebuffer[y * gfx->framebuffer_width + xs] = (s & m) | (d & ~m);
f32 old_z = gfx->zbuffer[idx];
u32 zm = -(s != 0);
u32 z0_bits, old_z_bits;
memcpy(&z0_bits, &z0, sizeof(u32));
memcpy(&old_z_bits, &old_z, sizeof(u32));
u32 new_z_bits = (z0_bits & zm) | (old_z_bits & ~zm);
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
}
}
}
@ -1080,17 +1118,29 @@ static inline void pxl8_fill_scanline_textured(
i32 atlas_idx = (atlas_y_base + ty) * atlas_width + (atlas_x_base + tx);
if (is_hicolor) {
u16 color = ((const u16*)atlas_pixels)[atlas_idx];
if (color != 0) {
gfx->zbuffer[idx] = z;
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + x] = color;
}
u16 s = ((const u16*)atlas_pixels)[atlas_idx];
u16 d = ((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + x];
u16 m = (u16)(-(s != 0));
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + x] = (s & m) | (d & ~m);
f32 old_z = gfx->zbuffer[idx];
u32 zm = -(s != 0);
u32 z_bits, old_z_bits;
memcpy(&z_bits, &z, sizeof(u32));
memcpy(&old_z_bits, &old_z, sizeof(u32));
u32 new_z_bits = (z_bits & zm) | (old_z_bits & ~zm);
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
} else {
u8 color = atlas_pixels[atlas_idx];
if (color != 0) {
gfx->zbuffer[idx] = z;
gfx->framebuffer[y * gfx->framebuffer_width + x] = color;
}
u8 s = atlas_pixels[atlas_idx];
u8 d = gfx->framebuffer[y * gfx->framebuffer_width + x];
u8 m = (u8)(-(s != 0));
gfx->framebuffer[y * gfx->framebuffer_width + x] = (s & m) | (d & ~m);
f32 old_z = gfx->zbuffer[idx];
u32 zm = -(s != 0);
u32 z_bits, old_z_bits;
memcpy(&z_bits, &z, sizeof(u32));
memcpy(&old_z_bits, &old_z, sizeof(u32));
u32 new_z_bits = (z_bits & zm) | (old_z_bits & ~zm);
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
}
}
}