2025-08-13 15:04:49 -05:00
|
|
|
#include "pxl8_blit.h"
|
|
|
|
|
#include "pxl8_simd.h"
|
|
|
|
|
|
2025-09-28 13:10:29 -05:00
|
|
|
void pxl8_blit_simd_hicolor(u32* fb, u32 fb_width, const u32* sprite, u32 atlas_width,
|
2025-08-13 15:04:49 -05:00
|
|
|
i32 x, i32 y, u32 w, u32 h) {
|
2025-09-28 13:10:29 -05:00
|
|
|
u32* dest_base = fb + y * fb_width + x;
|
|
|
|
|
const u32* src_base = sprite;
|
2025-08-13 15:04:49 -05:00
|
|
|
|
|
|
|
|
for (u32 row = 0; row < h; row++) {
|
2025-09-28 13:10:29 -05:00
|
|
|
u32* dest_row = dest_base + row * fb_width;
|
|
|
|
|
const u32* src_row = src_base + row * atlas_width;
|
2025-08-13 15:04:49 -05:00
|
|
|
|
|
|
|
|
u32 col = 0;
|
2025-09-28 13:10:29 -05:00
|
|
|
for (; col + PXL8_SIMD_WIDTH_U32 <= w; col += PXL8_SIMD_WIDTH_U32) {
|
|
|
|
|
pxl8_simd_vec src_vec = pxl8_simd_load_u32(src_row + col);
|
|
|
|
|
pxl8_simd_vec dest_vec = pxl8_simd_load_u32(dest_row + col);
|
|
|
|
|
pxl8_simd_vec alpha_mask = pxl8_simd_alpha_mask_u32();
|
|
|
|
|
pxl8_simd_vec has_alpha = pxl8_simd_and(src_vec, alpha_mask);
|
2025-08-13 15:04:49 -05:00
|
|
|
pxl8_simd_vec zero = pxl8_simd_zero_u8();
|
2025-09-28 13:10:29 -05:00
|
|
|
pxl8_simd_vec mask = pxl8_simd_cmpeq_u32(has_alpha, zero);
|
|
|
|
|
pxl8_simd_vec result = pxl8_simd_blendv_u32(src_vec, dest_vec, mask);
|
|
|
|
|
pxl8_simd_store_u32(dest_row + col, result);
|
2025-08-13 15:04:49 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (; col < w; col++) {
|
2025-09-28 13:10:29 -05:00
|
|
|
if (src_row[col] & 0xFF000000) {
|
2025-08-13 15:04:49 -05:00
|
|
|
dest_row[col] = src_row[col];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-28 13:10:29 -05:00
|
|
|
void pxl8_blit_simd_indexed(u8* fb, u32 fb_width, const u8* sprite, u32 atlas_width,
|
2025-08-13 15:04:49 -05:00
|
|
|
i32 x, i32 y, u32 w, u32 h) {
|
2025-09-28 13:10:29 -05:00
|
|
|
u8* dest_base = fb + y * fb_width + x;
|
|
|
|
|
const u8* src_base = sprite;
|
2025-08-13 15:04:49 -05:00
|
|
|
|
|
|
|
|
for (u32 row = 0; row < h; row++) {
|
2025-09-28 13:10:29 -05:00
|
|
|
u8* dest_row = dest_base + row * fb_width;
|
|
|
|
|
const u8* src_row = src_base + row * atlas_width;
|
2025-08-13 15:04:49 -05:00
|
|
|
|
|
|
|
|
u32 col = 0;
|
2025-09-28 13:10:29 -05:00
|
|
|
for (; col + PXL8_SIMD_WIDTH_U8 <= w; col += PXL8_SIMD_WIDTH_U8) {
|
|
|
|
|
pxl8_simd_vec src_vec = pxl8_simd_load_u8(src_row + col);
|
|
|
|
|
pxl8_simd_vec dest_vec = pxl8_simd_load_u8(dest_row + col);
|
2025-08-13 15:04:49 -05:00
|
|
|
pxl8_simd_vec zero = pxl8_simd_zero_u8();
|
2025-09-28 13:10:29 -05:00
|
|
|
pxl8_simd_vec mask = pxl8_simd_cmpeq_u8(src_vec, zero);
|
|
|
|
|
pxl8_simd_vec result = pxl8_simd_blendv_u8(src_vec, dest_vec, mask);
|
|
|
|
|
pxl8_simd_store_u8(dest_row + col, result);
|
2025-08-13 15:04:49 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (; col < w; col++) {
|
2025-09-28 13:10:29 -05:00
|
|
|
if (src_row[col] != 0) {
|
2025-08-13 15:04:49 -05:00
|
|
|
dest_row[col] = src_row[col];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|