branchless sprite blit and zbuffer updates
This commit is contained in:
parent
349fa8bc81
commit
a33d4c0068
2 changed files with 137 additions and 41 deletions
|
|
@ -9,10 +9,29 @@ void pxl8_blit_hicolor(u16* fb, u32 fb_width, const u16* sprite, u32 atlas_width
|
|||
u16* dest_row = dest_base + row * fb_width;
|
||||
const u16* src_row = src_base + row * atlas_width;
|
||||
|
||||
for (u32 col = 0; col < w; col++) {
|
||||
if (src_row[col] != 0) {
|
||||
dest_row[col] = src_row[col];
|
||||
u32 col = 0;
|
||||
u32 count2 = w / 2;
|
||||
for (u32 i = 0; i < count2; i++) {
|
||||
u32 pixels = ((const u32*)src_row)[i];
|
||||
if (pixels == 0) {
|
||||
col += 2;
|
||||
continue;
|
||||
}
|
||||
u16 s0 = (u16)(pixels);
|
||||
u16 s1 = (u16)(pixels >> 16);
|
||||
u16 d0 = dest_row[col];
|
||||
u16 d1 = dest_row[col + 1];
|
||||
u16 m0 = (u16)(-(s0 != 0));
|
||||
u16 m1 = (u16)(-(s1 != 0));
|
||||
dest_row[col] = (s0 & m0) | (d0 & ~m0);
|
||||
dest_row[col + 1] = (s1 & m1) | (d1 & ~m1);
|
||||
col += 2;
|
||||
}
|
||||
if (w & 1) {
|
||||
u16 s = src_row[col];
|
||||
u16 d = dest_row[col];
|
||||
u16 m = (u16)(-(s != 0));
|
||||
dest_row[col] = (s & m) | (d & ~m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -26,10 +45,37 @@ void pxl8_blit_indexed(u8* fb, u32 fb_width, const u8* sprite, u32 atlas_width,
|
|||
u8* dest_row = dest_base + row * fb_width;
|
||||
const u8* src_row = src_base + row * atlas_width;
|
||||
|
||||
for (u32 col = 0; col < w; col++) {
|
||||
if (src_row[col] != 0) {
|
||||
dest_row[col] = src_row[col];
|
||||
u32 col = 0;
|
||||
u32 count4 = w / 4;
|
||||
for (u32 i = 0; i < count4; i++) {
|
||||
u32 pixels = ((const u32*)src_row)[i];
|
||||
if (pixels == 0) {
|
||||
col += 4;
|
||||
continue;
|
||||
}
|
||||
u8 s0 = (u8)(pixels);
|
||||
u8 s1 = (u8)(pixels >> 8);
|
||||
u8 s2 = (u8)(pixels >> 16);
|
||||
u8 s3 = (u8)(pixels >> 24);
|
||||
u8 d0 = dest_row[col];
|
||||
u8 d1 = dest_row[col + 1];
|
||||
u8 d2 = dest_row[col + 2];
|
||||
u8 d3 = dest_row[col + 3];
|
||||
u8 m0 = (u8)(-(s0 != 0));
|
||||
u8 m1 = (u8)(-(s1 != 0));
|
||||
u8 m2 = (u8)(-(s2 != 0));
|
||||
u8 m3 = (u8)(-(s3 != 0));
|
||||
dest_row[col] = (s0 & m0) | (d0 & ~m0);
|
||||
dest_row[col + 1] = (s1 & m1) | (d1 & ~m1);
|
||||
dest_row[col + 2] = (s2 & m2) | (d2 & ~m2);
|
||||
dest_row[col + 3] = (s3 & m3) | (d3 & ~m3);
|
||||
col += 4;
|
||||
}
|
||||
for (; col < w; col++) {
|
||||
u8 s = src_row[col];
|
||||
u8 d = dest_row[col];
|
||||
u8 m = (u8)(-(s != 0));
|
||||
dest_row[col] = (s & m) | (d & ~m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
120
src/pxl8_gfx.c
120
src/pxl8_gfx.c
|
|
@ -382,8 +382,14 @@ void pxl8_clear(pxl8_gfx* gfx, u32 color) {
|
|||
if (gfx->pixel_mode == PXL8_PIXEL_HICOLOR) {
|
||||
u16* fb16 = (u16*)gfx->framebuffer;
|
||||
u16 color16 = pxl8_rgba32_to_rgb565(color);
|
||||
for (i32 i = 0; i < size; i++) {
|
||||
fb16[i] = color16;
|
||||
u32 pattern = (u32)color16 | ((u32)color16 << 16);
|
||||
u32* fb32 = (u32*)fb16;
|
||||
i32 count2 = size / 2;
|
||||
for (i32 i = 0; i < count2; i++) {
|
||||
fb32[i] = pattern;
|
||||
}
|
||||
if (size & 1) {
|
||||
fb16[size - 1] = color16;
|
||||
}
|
||||
} else {
|
||||
memset(gfx->framebuffer, color & 0xFF, size);
|
||||
|
|
@ -466,9 +472,30 @@ void pxl8_rect_fill(pxl8_gfx* gfx, i32 x, i32 y, i32 w, i32 h, u32 color) {
|
|||
i32 x1 = (x + w > gfx->framebuffer_width) ? gfx->framebuffer_width : x + w;
|
||||
i32 y1 = (y + h > gfx->framebuffer_height) ? gfx->framebuffer_height : y + h;
|
||||
|
||||
for (i32 py = y0; py < y1; py++) {
|
||||
for (i32 px = x0; px < x1; px++) {
|
||||
pxl8_pixel_unchecked(gfx, px, py, color);
|
||||
i32 rect_w = x1 - x0;
|
||||
if (rect_w <= 0 || y1 <= y0) return;
|
||||
|
||||
if (gfx->pixel_mode == PXL8_PIXEL_HICOLOR) {
|
||||
u16* fb16 = (u16*)gfx->framebuffer;
|
||||
u16 color16 = pxl8_rgba32_to_rgb565(color);
|
||||
u32 pattern = (u32)color16 | ((u32)color16 << 16);
|
||||
|
||||
for (i32 py = y0; py < y1; py++) {
|
||||
u16* row = fb16 + py * gfx->framebuffer_width + x0;
|
||||
i32 count2 = rect_w / 2;
|
||||
u32* row32 = (u32*)row;
|
||||
for (i32 i = 0; i < count2; i++) {
|
||||
row32[i] = pattern;
|
||||
}
|
||||
if (rect_w & 1) {
|
||||
row[rect_w - 1] = color16;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
u8 color8 = color & 0xFF;
|
||||
for (i32 py = y0; py < y1; py++) {
|
||||
u8* row = gfx->framebuffer + py * gfx->framebuffer_width + x0;
|
||||
memset(row, color8, rect_w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -617,15 +644,15 @@ void pxl8_sprite(pxl8_gfx* gfx, u32 sprite_id, i32 x, i32 y, i32 w, i32 h) {
|
|||
i32 dest_idx = (dest_y + py) * gfx->framebuffer_width + (dest_x + px);
|
||||
|
||||
if (gfx->pixel_mode == PXL8_PIXEL_HICOLOR) {
|
||||
u16 pixel = ((const u16*)atlas_pixels)[src_idx];
|
||||
if (pixel != 0) {
|
||||
((u16*)gfx->framebuffer)[dest_idx] = pixel;
|
||||
}
|
||||
u16 s = ((const u16*)atlas_pixels)[src_idx];
|
||||
u16 d = ((u16*)gfx->framebuffer)[dest_idx];
|
||||
u16 m = (u16)(-(s != 0));
|
||||
((u16*)gfx->framebuffer)[dest_idx] = (s & m) | (d & ~m);
|
||||
} else {
|
||||
u8 pixel = atlas_pixels[src_idx];
|
||||
if (pixel != 0) {
|
||||
gfx->framebuffer[dest_idx] = pixel;
|
||||
}
|
||||
u8 s = atlas_pixels[src_idx];
|
||||
u8 d = gfx->framebuffer[dest_idx];
|
||||
u8 m = (u8)(-(s != 0));
|
||||
gfx->framebuffer[dest_idx] = (s & m) | (d & ~m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -802,9 +829,8 @@ void pxl8_3d_clear_zbuffer(pxl8_gfx* gfx) {
|
|||
i32 count = gfx->zbuffer_width * gfx->zbuffer_height;
|
||||
const f32 far_z = 1e30f;
|
||||
|
||||
f32* ptr = gfx->zbuffer;
|
||||
for (i32 i = 0; i < count; i++) {
|
||||
ptr[i] = far_z;
|
||||
gfx->zbuffer[i] = far_z;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1024,17 +1050,29 @@ static inline void pxl8_fill_scanline_textured(
|
|||
i32 atlas_idx = (atlas_y_base + ty) * atlas_width + (atlas_x_base + tx);
|
||||
|
||||
if (is_hicolor) {
|
||||
u16 color = ((const u16*)atlas_pixels)[atlas_idx];
|
||||
if (color != 0) {
|
||||
gfx->zbuffer[idx] = z0;
|
||||
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + xs] = color;
|
||||
}
|
||||
u16 s = ((const u16*)atlas_pixels)[atlas_idx];
|
||||
u16 d = ((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + xs];
|
||||
u16 m = (u16)(-(s != 0));
|
||||
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + xs] = (s & m) | (d & ~m);
|
||||
f32 old_z = gfx->zbuffer[idx];
|
||||
u32 zm = -(s != 0);
|
||||
u32 z0_bits, old_z_bits;
|
||||
memcpy(&z0_bits, &z0, sizeof(u32));
|
||||
memcpy(&old_z_bits, &old_z, sizeof(u32));
|
||||
u32 new_z_bits = (z0_bits & zm) | (old_z_bits & ~zm);
|
||||
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
|
||||
} else {
|
||||
u8 color = atlas_pixels[atlas_idx];
|
||||
if (color != 0) {
|
||||
gfx->zbuffer[idx] = z0;
|
||||
gfx->framebuffer[y * gfx->framebuffer_width + xs] = color;
|
||||
}
|
||||
u8 s = atlas_pixels[atlas_idx];
|
||||
u8 d = gfx->framebuffer[y * gfx->framebuffer_width + xs];
|
||||
u8 m = (u8)(-(s != 0));
|
||||
gfx->framebuffer[y * gfx->framebuffer_width + xs] = (s & m) | (d & ~m);
|
||||
f32 old_z = gfx->zbuffer[idx];
|
||||
u32 zm = -(s != 0);
|
||||
u32 z0_bits, old_z_bits;
|
||||
memcpy(&z0_bits, &z0, sizeof(u32));
|
||||
memcpy(&old_z_bits, &old_z, sizeof(u32));
|
||||
u32 new_z_bits = (z0_bits & zm) | (old_z_bits & ~zm);
|
||||
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1080,17 +1118,29 @@ static inline void pxl8_fill_scanline_textured(
|
|||
i32 atlas_idx = (atlas_y_base + ty) * atlas_width + (atlas_x_base + tx);
|
||||
|
||||
if (is_hicolor) {
|
||||
u16 color = ((const u16*)atlas_pixels)[atlas_idx];
|
||||
if (color != 0) {
|
||||
gfx->zbuffer[idx] = z;
|
||||
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + x] = color;
|
||||
}
|
||||
u16 s = ((const u16*)atlas_pixels)[atlas_idx];
|
||||
u16 d = ((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + x];
|
||||
u16 m = (u16)(-(s != 0));
|
||||
((u16*)gfx->framebuffer)[y * gfx->framebuffer_width + x] = (s & m) | (d & ~m);
|
||||
f32 old_z = gfx->zbuffer[idx];
|
||||
u32 zm = -(s != 0);
|
||||
u32 z_bits, old_z_bits;
|
||||
memcpy(&z_bits, &z, sizeof(u32));
|
||||
memcpy(&old_z_bits, &old_z, sizeof(u32));
|
||||
u32 new_z_bits = (z_bits & zm) | (old_z_bits & ~zm);
|
||||
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
|
||||
} else {
|
||||
u8 color = atlas_pixels[atlas_idx];
|
||||
if (color != 0) {
|
||||
gfx->zbuffer[idx] = z;
|
||||
gfx->framebuffer[y * gfx->framebuffer_width + x] = color;
|
||||
}
|
||||
u8 s = atlas_pixels[atlas_idx];
|
||||
u8 d = gfx->framebuffer[y * gfx->framebuffer_width + x];
|
||||
u8 m = (u8)(-(s != 0));
|
||||
gfx->framebuffer[y * gfx->framebuffer_width + x] = (s & m) | (d & ~m);
|
||||
f32 old_z = gfx->zbuffer[idx];
|
||||
u32 zm = -(s != 0);
|
||||
u32 z_bits, old_z_bits;
|
||||
memcpy(&z_bits, &z, sizeof(u32));
|
||||
memcpy(&old_z_bits, &old_z, sizeof(u32));
|
||||
u32 new_z_bits = (z_bits & zm) | (old_z_bits & ~zm);
|
||||
memcpy(&gfx->zbuffer[idx], &new_z_bits, sizeof(f32));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue