add colored lighting back in via the colormap

This commit is contained in:
asrael 2026-02-05 03:27:35 -06:00
parent 01e6059dd1
commit a29a6018b8
16 changed files with 149 additions and 466 deletions

View file

@ -12,16 +12,6 @@
#include <stdlib.h>
#include <string.h>
#if PXL8_GFX_ENABLE_STATS
#define STATS_INC(stats, field, val) do { (stats)->field += (val); } while (0)
#define STATS_START() pxl8_get_ticks_ns()
#define STATS_ADD(stats, field, start) do { (stats)->field += pxl8_get_ticks_ns() - (start); } while (0)
#else
#define STATS_INC(stats, field, val) do { (void)(stats); } while (0)
#define STATS_START() 0
#define STATS_ADD(stats, field, start) do { (void)(stats); (void)(start); } while (0)
#endif
typedef struct {
pxl8_vec4 clip_pos;
pxl8_vec3 world_pos;
@ -300,8 +290,7 @@ static void rasterize_triangle(
pxl8_shader_fn shader,
const pxl8_gfx_pipeline_desc* pipeline,
const pxl8_shader_bindings* bindings,
const pxl8_shader_uniforms* uniforms,
pxl8_gfx_stats* stats
const pxl8_shader_uniforms* uniforms
) {
const i32 SUBDIV = 16;
@ -490,8 +479,6 @@ static void rasterize_triangle(
pxl8_i32_simd zbuf = pxl8_i32_simd_set4((i32)zrow[px], (i32)zrow[px+1], (i32)zrow[px+2], (i32)zrow[px+3]);
i32 mask = pxl8_i32_simd_movemask(pxl8_i32_simd_cmpgt(zbuf, z16_4));
STATS_INC(stats, depth_tests, 4);
if (mask) {
pxl8_shader_ctx frag_ctx = {
.color_count = 4,
@ -517,20 +504,16 @@ static void rasterize_triangle(
u8 colors[4];
shader(&frag_ctx, bindings, uniforms, colors);
STATS_INC(stats, shader_calls, 1);
i32 z16_arr[4];
pxl8_i32_simd_store(z16_arr, z16_4);
for (i32 i = 0; i < 4; i++) {
if (!(mask & (0x8 << (i * 4)))) continue;
STATS_INC(stats, depth_passes, 1);
u8 color = colors[i];
if (!(alpha_test && color <= alpha_ref) && color != 0) {
prow[px + i] = color;
if (depth_write) zrow[px + i] = (u16)z16_arr[i];
STATS_INC(stats, pixels_written, 1);
}
}
}
@ -540,16 +523,12 @@ static void rasterize_triangle(
z4 = pxl8_f32_simd_add(z4, dz4_simd);
}
}
#endif
for (; px <= span_end; px++) {
f32 depth_norm = pxl8_clamp((z_a + 1.0f) * 0.5f, 0.0f, 1.0f);
u16 z16 = (u16)(depth_norm * 65535.0f);
STATS_INC(stats, depth_tests, 1);
bool depth_pass = !depth_test || depth_test_pass(depth_compare, z16, zrow[px]);
if (depth_pass) {
STATS_INC(stats, depth_passes, 1);
pxl8_shader_ctx frag_ctx = {
.color_count = 1,
.x = pxl8_i32_simd_set(px),
@ -564,7 +543,6 @@ static void rasterize_triangle(
u8 color;
shader(&frag_ctx, bindings, uniforms, &color);
STATS_INC(stats, shader_calls, 1);
if (!(alpha_test && color <= alpha_ref)) {
if (color != 0) {
@ -577,7 +555,6 @@ static void rasterize_triangle(
if (depth_write) {
zrow[px] = z16;
}
STATS_INC(stats, pixels_written, 1);
}
}
}
@ -591,6 +568,53 @@ static void rasterize_triangle(
wy_a += dwy;
wz_a += dwz;
}
#else
for (; px <= span_end; px++) {
f32 depth_norm = pxl8_clamp((z_a + 1.0f) * 0.5f, 0.0f, 1.0f);
u16 z16 = (u16)(depth_norm * 65535.0f);
bool depth_pass = !depth_test || depth_test_pass(depth_compare, z16, zrow[px]);
if (depth_pass) {
pxl8_shader_ctx frag_ctx = {
.color_count = 1,
.x = px,
.y = y,
.v_uv = { u_a, v_a },
.v_world = { wx_a, wy_a, wz_a },
.v_normal = setup->normal,
.v_light = l_a / 255.0f,
.v_color = c_a,
.v_depth = z_a,
};
u8 color;
shader(&frag_ctx, bindings, uniforms, &color);
if (!(alpha_test && color <= alpha_ref)) {
if (color != 0) {
u8 out_color = color;
if (blend_enabled) {
out_color = blend_indexed(pipeline, color, prow[px], palette, colormap);
}
prow[px] = out_color;
if (depth_write) {
zrow[px] = z16;
}
}
}
}
u_a += du;
v_a += dv;
l_a += dl;
c_a += dc;
z_a += dz;
wx_a += dwx;
wy_a += dwy;
wz_a += dwz;
}
#endif
wr += dwr * (f32)span_len;
uw += duw * (f32)span_len;
@ -618,8 +642,7 @@ static void draw_line_clipped(
i32 clip_min_x,
i32 clip_min_y,
i32 clip_max_x,
i32 clip_max_y,
pxl8_gfx_stats* stats
i32 clip_max_y
) {
i32 dx = abs(x1 - x0);
i32 dy = -abs(y1 - y0);
@ -631,7 +654,6 @@ static void draw_line_clipped(
if (x0 >= clip_min_x && x0 <= clip_max_x && y0 >= clip_min_y && y0 <= clip_max_y) {
if (x0 >= 0 && y0 >= 0 && x0 < (i32)fb_w && y0 < (i32)fb_h) {
fb[y0 * (i32)fb_w + x0] = color;
STATS_INC(stats, pixels_written, 1);
}
}
if (x0 == x1 && y0 == y1) break;
@ -718,7 +740,6 @@ struct pxl8_renderer {
u32 scissor_w, scissor_h;
pxl8_shader_fn shader;
pxl8_gfx_stats stats;
};
struct pxl8_gfx_cmdbuf {
@ -735,7 +756,6 @@ pxl8_renderer* pxl8_renderer_create(u32 width, u32 height) {
r->viewport_h = height;
r->scissor_w = width;
r->scissor_h = height;
pxl8_renderer_reset_stats(r);
return r;
}
@ -762,20 +782,10 @@ void pxl8_renderer_set_shader(pxl8_renderer* r, pxl8_shader_fn fn) {
if (r) r->shader = fn;
}
void pxl8_renderer_reset_stats(pxl8_renderer* r) {
if (!r) return;
memset(&r->stats, 0, sizeof(r->stats));
}
const pxl8_gfx_stats* pxl8_renderer_get_stats(const pxl8_renderer* r) {
return r ? &r->stats : NULL;
}
static u32 texture_byte_size(pxl8_gfx_texture_format fmt, u32 w, u32 h) {
switch (fmt) {
case PXL8_GFX_FORMAT_INDEXED8: return w * h;
case PXL8_GFX_FORMAT_DEPTH16: return w * h * 2;
case PXL8_GFX_FORMAT_LIGHT_ACCUM: return w * h * 4;
}
return 0;
}
@ -1085,9 +1095,6 @@ static void execute_draw(
if (!VALID_PASS(r, r->current_pass)) return;
if (!VALID_PIPELINE(r, r->current_pipeline)) return;
u64 exec_start = STATS_START();
STATS_INC(&r->stats, draw_calls, 1);
buffer_slot* vb = &r->buffers[SLOT_INDEX(cmd->vertex_buffer.id)];
buffer_slot* ib = use_indices ? &r->buffers[SLOT_INDEX(cmd->index_buffer.id)] : NULL;
pass_slot* pass = &r->passes[SLOT_INDEX(r->current_pass.id)];
@ -1095,12 +1102,10 @@ static void execute_draw(
if (!VALID_TEX(r, pass->desc.color.texture)) {
pxl8_error("draw: invalid color texture");
STATS_ADD(&r->stats, execute_draw_ns, exec_start);
return;
}
if (!VALID_TEX(r, pass->desc.depth.texture)) {
pxl8_error("draw: invalid depth texture");
STATS_ADD(&r->stats, execute_draw_ns, exec_start);
return;
}
@ -1112,10 +1117,7 @@ static void execute_draw(
u32 fb_w = color_tex->width;
u32 fb_h = color_tex->height;
if (r->viewport_w == 0 || r->viewport_h == 0) {
STATS_ADD(&r->stats, execute_draw_ns, exec_start);
return;
}
if (r->viewport_w == 0 || r->viewport_h == 0) return;
i32 vp_x = r->viewport_x;
i32 vp_y = r->viewport_y;
@ -1143,10 +1145,7 @@ static void execute_draw(
if (clip_min_y < 0) clip_min_y = 0;
if (clip_max_x >= (i32)fb_w) clip_max_x = (i32)fb_w - 1;
if (clip_max_y >= (i32)fb_h) clip_max_y = (i32)fb_h - 1;
if (clip_min_x > clip_max_x || clip_min_y > clip_max_y) {
STATS_ADD(&r->stats, execute_draw_ns, exec_start);
return;
}
if (clip_min_x > clip_max_x || clip_min_y > clip_max_y) return;
const pxl8_vertex* vertices = vb->data;
const u16* indices = use_indices ? ib->data : NULL;
@ -1156,10 +1155,7 @@ static void execute_draw(
f32 near = 0.1f;
pxl8_shader_fn shader = pip->desc.shader;
if (!shader) {
STATS_ADD(&r->stats, execute_draw_ns, exec_start);
return;
}
if (!shader) return;
pxl8_shader_bindings shader_bindings = {0};
pxl8_shader_uniforms shader_uniforms = r->current_draw_params.shader;
@ -1190,8 +1186,8 @@ static void execute_draw(
bool is_wireframe = pip->desc.rasterizer.fill == PXL8_GFX_FILL_WIREFRAME;
for (u32 i = cmd->first_index; i < cmd->first_index + cmd->index_count; i += 3) {
STATS_INC(&r->stats, triangles, 1);
u16 i0, i1, i2;
if (use_indices) {
if (i + 2 >= ib->size / sizeof(u16)) break;
i0 = indices[i] + cmd->base_vertex;
@ -1262,7 +1258,6 @@ static void execute_draw(
i32 clipped_count = clip_triangle_near(&rv0, &rv1, &rv2, near, clipped);
for (i32 t = 0; t < clipped_count; t += 3) {
STATS_INC(&r->stats, clipped_triangles, 1);
if (is_wireframe) {
f32 hw = (f32)vp_w * 0.5f;
f32 hh = (f32)vp_h * 0.5f;
@ -1286,11 +1281,11 @@ static void execute_draw(
u8 wire_color = v0->color ? v0->color : 15;
draw_line_clipped(fb, fb_w, fb_h, sx0, sy0, sx1, sy1, wire_color,
clip_min_x, clip_min_y, clip_max_x, clip_max_y, &r->stats);
clip_min_x, clip_min_y, clip_max_x, clip_max_y);
draw_line_clipped(fb, fb_w, fb_h, sx1, sy1, sx2, sy2, wire_color,
clip_min_x, clip_min_y, clip_max_x, clip_max_y, &r->stats);
clip_min_x, clip_min_y, clip_max_x, clip_max_y);
draw_line_clipped(fb, fb_w, fb_h, sx2, sy2, sx0, sy0, wire_color,
clip_min_x, clip_min_y, clip_max_x, clip_max_y, &r->stats);
clip_min_x, clip_min_y, clip_max_x, clip_max_y);
} else {
tri_setup setup;
if (!setup_tri(&setup, &clipped[t], &clipped[t+1], &clipped[t+2],
@ -1300,19 +1295,14 @@ static void execute_draw(
continue;
}
u64 raster_start = STATS_START();
rasterize_triangle(&setup, fb, zb, fb_w, shader, &pip->desc,
&shader_bindings, &shader_uniforms, &r->stats);
STATS_ADD(&r->stats, raster_ns, raster_start);
&shader_bindings, &shader_uniforms);
}
}
}
STATS_ADD(&r->stats, execute_draw_ns, exec_start);
}
void pxl8_gfx_submit(pxl8_renderer* r, pxl8_gfx_cmdbuf* cb) {
u64 submit_start = STATS_START();
for (u32 i = 0; i < cb->count; i++) {
pxl8_gfx_cmd* cmd = &cb->commands[i];
switch (cmd->type) {
@ -1322,13 +1312,8 @@ void pxl8_gfx_submit(pxl8_renderer* r, pxl8_gfx_cmdbuf* cb) {
pass_slot* p = &r->passes[SLOT_INDEX(cmd->begin_pass.pass.id)];
if (p->desc.color.load == PXL8_GFX_LOAD_CLEAR) {
pxl8_clear(r, p->desc.color.texture, p->desc.color.clear_value);
}
if (p->desc.depth.load == PXL8_GFX_LOAD_CLEAR) {
pxl8_clear_depth(r, p->desc.depth.texture);
}
if (p->desc.light_accum.load == PXL8_GFX_LOAD_CLEAR) {
pxl8_clear_light(r, p->desc.light_accum.texture);
}
}
break;
case PXL8_GFX_CMD_END_PASS:
@ -1368,7 +1353,6 @@ void pxl8_gfx_submit(pxl8_renderer* r, pxl8_gfx_cmdbuf* cb) {
r->buffers[i].append_pos = 0;
}
}
STATS_ADD(&r->stats, submit_ns, submit_start);
}
void pxl8_clear(pxl8_renderer* r, pxl8_gfx_texture target, u8 color) {
@ -1387,14 +1371,6 @@ void pxl8_clear_depth(pxl8_renderer* r, pxl8_gfx_texture target) {
}
}
void pxl8_clear_light(pxl8_renderer* r, pxl8_gfx_texture target) {
if (!VALID_TEX(r, target)) return;
texture_slot* s = &r->textures[SLOT_INDEX(target.id)];
if (s->format == PXL8_GFX_FORMAT_LIGHT_ACCUM) {
memset(s->data, 0, s->width * s->height * 4);
}
}
void pxl8_draw_pixel(pxl8_renderer* r, pxl8_gfx_texture target, i32 x, i32 y, u8 color) {
if (!VALID_TEX(r, target)) return;
texture_slot* s = &r->textures[SLOT_INDEX(target.id)];
@ -1516,8 +1492,7 @@ void pxl8_draw_circle_fill(pxl8_renderer* r, pxl8_gfx_texture target, i32 cx, i3
}
}
void pxl8_resolve_to_rgba(pxl8_renderer* r, pxl8_gfx_texture color, pxl8_gfx_texture light_accum,
const u32* palette, u32* output) {
void pxl8_resolve_to_rgba(pxl8_renderer* r, pxl8_gfx_texture color, const u32* palette, u32* output) {
if (!VALID_TEX(r, color)) return;
texture_slot* cs = &r->textures[SLOT_INDEX(color.id)];
@ -1525,12 +1500,10 @@ void pxl8_resolve_to_rgba(pxl8_renderer* r, pxl8_gfx_texture color, pxl8_gfx_tex
u32 w = cs->width;
u32 h = cs->height;
u32 total = w * h;
(void)light_accum;
u32 i = 0;
#if defined(PXL8_SIMD_SSE) || defined(PXL8_SIMD_NEON)
pxl8_i32_simd alpha_mask = pxl8_i32_simd_set((i32)0xFF000000);
u32 i = 0;
for (; i + 4 <= total; i += 4) {
pxl8_i32_simd base = pxl8_i32_simd_set4(
(i32)palette[fb[i + 0]], (i32)palette[fb[i + 1]],
@ -1539,12 +1512,10 @@ void pxl8_resolve_to_rgba(pxl8_renderer* r, pxl8_gfx_texture color, pxl8_gfx_tex
base = pxl8_i32_simd_or(base, alpha_mask);
pxl8_i32_simd_store((i32*)&output[i], base);
}
#endif
for (; i < total; i++) {
output[i] = palette[fb[i]] | 0xFF000000;
}
#else
for (u32 i = 0; i < total; i++) {
output[i] = palette[fb[i]] | 0xFF000000;
}
#endif
}