#include "pxl8_render.h" #include "pxl8_atlas.h" #include "pxl8_colormap.h" #include "pxl8_dither.h" #include "pxl8_hal.h" #include "pxl8_log.h" #include "pxl8_mem.h" #include "pxl8_mesh.h" #include "pxl8_shader.h" #include #include #include #if PXL8_GFX_ENABLE_STATS #define STATS_INC(stats, field, val) do { (stats)->field += (val); } while (0) #define STATS_START() pxl8_get_ticks_ns() #define STATS_ADD(stats, field, start) do { (stats)->field += pxl8_get_ticks_ns() - (start); } while (0) #else #define STATS_INC(stats, field, val) do { (void)(stats); } while (0) #define STATS_START() 0 #define STATS_ADD(stats, field, start) do { (void)(stats); (void)(start); } while (0) #endif typedef struct { pxl8_vec4 clip_pos; pxl8_vec3 world_pos; pxl8_vec3 normal; f32 u, v; u8 color; u8 light; } raster_vertex; typedef struct { pxl8_vec3 p0, p1, p2; pxl8_vec3 w_recip; pxl8_vec3 u_w, v_w; pxl8_vec3 l_w; pxl8_vec3 c_w; pxl8_vec3 world0_w, world1_w, world2_w; pxl8_vec3 normal; i32 y_start, y_end; f32 inv_total; u32 target_width, target_height; i32 clip_min_x, clip_min_y; i32 clip_max_x, clip_max_y; } tri_setup; static inline bool depth_test_pass(pxl8_gfx_compare_func func, u16 src, u16 dst) { switch (func) { case PXL8_GFX_COMPARE_NEVER: return false; case PXL8_GFX_COMPARE_LESS: return src < dst; case PXL8_GFX_COMPARE_EQUAL: return src == dst; case PXL8_GFX_COMPARE_LEQUAL: return src <= dst; case PXL8_GFX_COMPARE_GREATER: return src > dst; case PXL8_GFX_COMPARE_NOTEQUAL: return src != dst; case PXL8_GFX_COMPARE_GEQUAL: return src >= dst; case PXL8_GFX_COMPARE_ALWAYS: return true; } return true; } static inline f32 blend_factor_value(pxl8_gfx_blend_factor factor, f32 src_a, f32 dst_a) { switch (factor) { case PXL8_GFX_BLEND_ZERO: return 0.0f; case PXL8_GFX_BLEND_ONE: return 1.0f; case PXL8_GFX_BLEND_SRC_ALPHA: return src_a; case PXL8_GFX_BLEND_INV_SRC_ALPHA: return 1.0f - src_a; case PXL8_GFX_BLEND_DST_ALPHA: return dst_a; case PXL8_GFX_BLEND_INV_DST_ALPHA: return 1.0f - dst_a; } return 1.0f; } static u8 palette_find_closest(const u32* palette, u8 r, u8 g, u8 b) { if (!palette) return 0; u8 best_idx = 1; u32 best_dist = 0xFFFFFFFF; for (u32 i = 1; i < 256; i++) { u8 pr = palette[i] & 0xFF; u8 pg = (palette[i] >> 8) & 0xFF; u8 pb = (palette[i] >> 16) & 0xFF; i32 dr = (i32)r - (i32)pr; i32 dg = (i32)g - (i32)pg; i32 db = (i32)b - (i32)pb; u32 dist = (u32)(dr * dr + dg * dg + db * db); if (dist < best_dist) { best_dist = dist; best_idx = (u8)i; if (dist == 0) break; } } return best_idx; } static u8 blend_indexed( const pxl8_gfx_pipeline_desc* pipeline, u8 src, u8 dst, const u32* palette, const u8* colormap ) { (void)colormap; if (!pipeline || !pipeline->blend.enabled) return src; if (src == 0) return dst; if (!palette) return src; f32 src_a = src == 0 ? 0.0f : 1.0f; f32 dst_a = dst == 0 ? 0.0f : 1.0f; f32 sf = blend_factor_value(pipeline->blend.src, src_a, dst_a); f32 df = blend_factor_value(pipeline->blend.dst, src_a, dst_a); if (sf == 1.0f && df == 0.0f) return src; if (sf == 0.0f && df == 1.0f) return dst; u8 sr = palette[src] & 0xFF; u8 sg = (palette[src] >> 8) & 0xFF; u8 sb = (palette[src] >> 16) & 0xFF; u8 dr = palette[dst] & 0xFF; u8 dg = (palette[dst] >> 8) & 0xFF; u8 db = (palette[dst] >> 16) & 0xFF; i32 out_r = (i32)(sr * sf + dr * df); i32 out_g = (i32)(sg * sf + dg * df); i32 out_b = (i32)(sb * sf + db * df); if (out_r < 0) out_r = 0; if (out_g < 0) out_g = 0; if (out_b < 0) out_b = 0; if (out_r > 255) out_r = 255; if (out_g > 255) out_g = 255; if (out_b > 255) out_b = 255; return palette_find_closest(palette, (u8)out_r, (u8)out_g, (u8)out_b); } static inline pxl8_vec4 vec4_lerp(pxl8_vec4 a, pxl8_vec4 b, f32 t) { return (pxl8_vec4){ a.x + (b.x - a.x) * t, a.y + (b.y - a.y) * t, a.z + (b.z - a.z) * t, a.w + (b.w - a.w) * t }; } static raster_vertex lerp_raster_vertex(const raster_vertex* a, const raster_vertex* b, f32 t) { return (raster_vertex){ .clip_pos = vec4_lerp(a->clip_pos, b->clip_pos, t), .world_pos = pxl8_vec3_lerp(a->world_pos, b->world_pos, t), .normal = pxl8_vec3_lerp(a->normal, b->normal, t), .u = pxl8_lerp(a->u, b->u, t), .v = pxl8_lerp(a->v, b->v, t), .color = (u8)(a->color + (b->color - a->color) * t), .light = (u8)(a->light + (b->light - a->light) * t), }; } static i32 clip_triangle_near( const raster_vertex* v0, const raster_vertex* v1, const raster_vertex* v2, f32 near, raster_vertex out[6] ) { bool in0 = v0->clip_pos.w >= near; bool in1 = v1->clip_pos.w >= near; bool in2 = v2->clip_pos.w >= near; i32 count = in0 + in1 + in2; if (count == 0) return 0; if (count == 3) { out[0] = *v0; out[1] = *v1; out[2] = *v2; return 3; } if (count == 1) { const raster_vertex *inside, *out_a, *out_b; if (in0) { inside = v0; out_a = v1; out_b = v2; } else if (in1) { inside = v1; out_a = v2; out_b = v0; } else { inside = v2; out_a = v0; out_b = v1; } f32 t_a = (near - out_a->clip_pos.w) / (inside->clip_pos.w - out_a->clip_pos.w); f32 t_b = (near - out_b->clip_pos.w) / (inside->clip_pos.w - out_b->clip_pos.w); out[0] = *inside; out[1] = lerp_raster_vertex(out_a, inside, t_a); out[2] = lerp_raster_vertex(out_b, inside, t_b); return 3; } const raster_vertex *outside, *in_a, *in_b; if (!in0) { outside = v0; in_a = v1; in_b = v2; } else if (!in1) { outside = v1; in_a = v2; in_b = v0; } else { outside = v2; in_a = v0; in_b = v1; } f32 t_a = (near - outside->clip_pos.w) / (in_a->clip_pos.w - outside->clip_pos.w); f32 t_b = (near - outside->clip_pos.w) / (in_b->clip_pos.w - outside->clip_pos.w); raster_vertex new_a = lerp_raster_vertex(outside, in_a, t_a); raster_vertex new_b = lerp_raster_vertex(outside, in_b, t_b); out[0] = *in_a; out[1] = *in_b; out[2] = new_b; out[3] = *in_a; out[4] = new_b; out[5] = new_a; return 6; } static bool setup_tri( tri_setup* setup, const raster_vertex* vo0, const raster_vertex* vo1, const raster_vertex* vo2, i32 viewport_x, i32 viewport_y, u32 viewport_w, u32 viewport_h, i32 clip_min_x, i32 clip_min_y, i32 clip_max_x, i32 clip_max_y, pxl8_gfx_cull_mode cull, bool double_sided ) { if (viewport_w == 0 || viewport_h == 0) return false; f32 hw = (f32)viewport_w * 0.5f; f32 hh = (f32)viewport_h * 0.5f; setup->p0.x = (f32)viewport_x + hw + vo0->clip_pos.x / vo0->clip_pos.w * hw; setup->p0.y = (f32)viewport_y + hh - vo0->clip_pos.y / vo0->clip_pos.w * hh; setup->p0.z = vo0->clip_pos.z / vo0->clip_pos.w; setup->p1.x = (f32)viewport_x + hw + vo1->clip_pos.x / vo1->clip_pos.w * hw; setup->p1.y = (f32)viewport_y + hh - vo1->clip_pos.y / vo1->clip_pos.w * hh; setup->p1.z = vo1->clip_pos.z / vo1->clip_pos.w; setup->p2.x = (f32)viewport_x + hw + vo2->clip_pos.x / vo2->clip_pos.w * hw; setup->p2.y = (f32)viewport_y + hh - vo2->clip_pos.y / vo2->clip_pos.w * hh; setup->p2.z = vo2->clip_pos.z / vo2->clip_pos.w; f32 cross = (setup->p1.x - setup->p0.x) * (setup->p2.y - setup->p0.y) - (setup->p1.y - setup->p0.y) * (setup->p2.x - setup->p0.x); if (!double_sided) { if (cull == PXL8_GFX_CULL_BACK && cross >= 0.0f) return false; if (cull == PXL8_GFX_CULL_FRONT && cross <= 0.0f) return false; } const raster_vertex* sorted[3] = {vo0, vo1, vo2}; if (setup->p0.y > setup->p1.y) { pxl8_vec3 t = setup->p0; setup->p0 = setup->p1; setup->p1 = t; const raster_vertex* tv = sorted[0]; sorted[0] = sorted[1]; sorted[1] = tv; } if (setup->p0.y > setup->p2.y) { pxl8_vec3 t = setup->p0; setup->p0 = setup->p2; setup->p2 = t; const raster_vertex* tv = sorted[0]; sorted[0] = sorted[2]; sorted[2] = tv; } if (setup->p1.y > setup->p2.y) { pxl8_vec3 t = setup->p1; setup->p1 = setup->p2; setup->p2 = t; const raster_vertex* tv = sorted[1]; sorted[1] = sorted[2]; sorted[2] = tv; } f32 total_height = setup->p2.y - setup->p0.y; if (total_height < 1.0f) return false; i32 y0_int = (i32)floorf(setup->p0.y); i32 y2_int = (i32)ceilf(setup->p2.y) - 1; setup->y_start = y0_int < clip_min_y ? clip_min_y : y0_int; setup->y_end = y2_int > clip_max_y ? clip_max_y : y2_int; setup->w_recip.x = 1.0f / sorted[0]->clip_pos.w; setup->w_recip.y = 1.0f / sorted[1]->clip_pos.w; setup->w_recip.z = 1.0f / sorted[2]->clip_pos.w; setup->u_w.x = sorted[0]->u * setup->w_recip.x; setup->v_w.x = sorted[0]->v * setup->w_recip.x; setup->u_w.y = sorted[1]->u * setup->w_recip.y; setup->v_w.y = sorted[1]->v * setup->w_recip.y; setup->u_w.z = sorted[2]->u * setup->w_recip.z; setup->v_w.z = sorted[2]->v * setup->w_recip.z; setup->l_w.x = sorted[0]->light * setup->w_recip.x; setup->l_w.y = sorted[1]->light * setup->w_recip.y; setup->l_w.z = sorted[2]->light * setup->w_recip.z; setup->c_w.x = (f32)sorted[0]->color * setup->w_recip.x; setup->c_w.y = (f32)sorted[1]->color * setup->w_recip.y; setup->c_w.z = (f32)sorted[2]->color * setup->w_recip.z; setup->world0_w = pxl8_vec3_scale(sorted[0]->world_pos, setup->w_recip.x); setup->world1_w = pxl8_vec3_scale(sorted[1]->world_pos, setup->w_recip.y); setup->world2_w = pxl8_vec3_scale(sorted[2]->world_pos, setup->w_recip.z); setup->normal = sorted[0]->normal; setup->inv_total = 1.0f / total_height; setup->target_width = viewport_w; setup->target_height = viewport_h; setup->clip_min_x = clip_min_x; setup->clip_min_y = clip_min_y; setup->clip_max_x = clip_max_x; setup->clip_max_y = clip_max_y; return true; } static void rasterize_triangle( const tri_setup* setup, u8* fb, u16* zb, u32 fb_width, pxl8_shader_fn shader, const pxl8_gfx_pipeline_desc* pipeline, const pxl8_shader_bindings* bindings, const pxl8_shader_uniforms* uniforms, pxl8_gfx_stats* stats ) { const i32 SUBDIV = 16; if (setup->y_start > setup->y_end) return; bool depth_test = pipeline && pipeline->depth.test; bool depth_write = pipeline && pipeline->depth.write; pxl8_gfx_compare_func depth_compare = pipeline ? pipeline->depth.compare : PXL8_GFX_COMPARE_ALWAYS; bool alpha_test = pipeline && pipeline->blend.alpha_test; u8 alpha_ref = pipeline ? pipeline->blend.alpha_ref : 0; bool blend_enabled = pipeline && pipeline->blend.enabled; const u32* palette = bindings ? bindings->palette : NULL; const u8* colormap = bindings ? bindings->colormap : NULL; for (i32 y = setup->y_start; y <= setup->y_end; y++) { f32 yf = (f32)y + 0.5f; f32 alpha = (yf - setup->p0.y) * setup->inv_total; f32 ax = setup->p0.x + (setup->p2.x - setup->p0.x) * alpha; f32 az = setup->p0.z + (setup->p2.z - setup->p0.z) * alpha; f32 a_wr = setup->w_recip.x + (setup->w_recip.z - setup->w_recip.x) * alpha; f32 a_uw = setup->u_w.x + (setup->u_w.z - setup->u_w.x) * alpha; f32 a_vw = setup->v_w.x + (setup->v_w.z - setup->v_w.x) * alpha; f32 a_lw = setup->l_w.x + (setup->l_w.z - setup->l_w.x) * alpha; f32 a_cw = setup->c_w.x + (setup->c_w.z - setup->c_w.x) * alpha; f32 a_wxw = setup->world0_w.x + (setup->world2_w.x - setup->world0_w.x) * alpha; f32 a_wyw = setup->world0_w.y + (setup->world2_w.y - setup->world0_w.y) * alpha; f32 a_wzw = setup->world0_w.z + (setup->world2_w.z - setup->world0_w.z) * alpha; f32 bx, bz, b_wr, b_uw, b_vw, b_lw, b_cw, b_wxw, b_wyw, b_wzw; bool second_half = yf > setup->p1.y || setup->p1.y == setup->p0.y; f32 segment_height = second_half ? (setup->p2.y - setup->p1.y) : (setup->p1.y - setup->p0.y); if (segment_height < 0.001f) segment_height = 0.001f; f32 beta = (yf - (second_half ? setup->p1.y : setup->p0.y)) / segment_height; if (beta < 0.0f) beta = 0.0f; if (beta > 1.0f) beta = 1.0f; if (second_half) { bx = setup->p1.x + (setup->p2.x - setup->p1.x) * beta; bz = setup->p1.z + (setup->p2.z - setup->p1.z) * beta; b_wr = setup->w_recip.y + (setup->w_recip.z - setup->w_recip.y) * beta; b_uw = setup->u_w.y + (setup->u_w.z - setup->u_w.y) * beta; b_vw = setup->v_w.y + (setup->v_w.z - setup->v_w.y) * beta; b_lw = setup->l_w.y + (setup->l_w.z - setup->l_w.y) * beta; b_cw = setup->c_w.y + (setup->c_w.z - setup->c_w.y) * beta; b_wxw = setup->world1_w.x + (setup->world2_w.x - setup->world1_w.x) * beta; b_wyw = setup->world1_w.y + (setup->world2_w.y - setup->world1_w.y) * beta; b_wzw = setup->world1_w.z + (setup->world2_w.z - setup->world1_w.z) * beta; } else { bx = setup->p0.x + (setup->p1.x - setup->p0.x) * beta; bz = setup->p0.z + (setup->p1.z - setup->p0.z) * beta; b_wr = setup->w_recip.x + (setup->w_recip.y - setup->w_recip.x) * beta; b_uw = setup->u_w.x + (setup->u_w.y - setup->u_w.x) * beta; b_vw = setup->v_w.x + (setup->v_w.y - setup->v_w.x) * beta; b_lw = setup->l_w.x + (setup->l_w.y - setup->l_w.x) * beta; b_cw = setup->c_w.x + (setup->c_w.y - setup->c_w.x) * beta; b_wxw = setup->world0_w.x + (setup->world1_w.x - setup->world0_w.x) * beta; b_wyw = setup->world0_w.y + (setup->world1_w.y - setup->world0_w.y) * beta; b_wzw = setup->world0_w.z + (setup->world1_w.z - setup->world0_w.z) * beta; } f32 x_start_fp, x_end_fp, z_start, z_end; f32 wr_start, wr_end, uw_start, uw_end, vw_start, vw_end, lw_start, lw_end, cw_start, cw_end; f32 wxw_start, wxw_end, wyw_start, wyw_end, wzw_start, wzw_end; if (ax <= bx) { x_start_fp = ax; x_end_fp = bx; z_start = az; z_end = bz; wr_start = a_wr; wr_end = b_wr; uw_start = a_uw; uw_end = b_uw; vw_start = a_vw; vw_end = b_vw; lw_start = a_lw; lw_end = b_lw; cw_start = a_cw; cw_end = b_cw; wxw_start = a_wxw; wxw_end = b_wxw; wyw_start = a_wyw; wyw_end = b_wyw; wzw_start = a_wzw; wzw_end = b_wzw; } else { x_start_fp = bx; x_end_fp = ax; z_start = bz; z_end = az; wr_start = b_wr; wr_end = a_wr; uw_start = b_uw; uw_end = a_uw; vw_start = b_vw; vw_end = a_vw; lw_start = b_lw; lw_end = a_lw; cw_start = b_cw; cw_end = a_cw; wxw_start = b_wxw; wxw_end = a_wxw; wyw_start = b_wyw; wyw_end = a_wyw; wzw_start = b_wzw; wzw_end = a_wzw; } i32 x_start = (i32)floorf(x_start_fp); i32 x_end = (i32)ceilf(x_end_fp) - 1; if (x_start < setup->clip_min_x) x_start = setup->clip_min_x; if (x_end > setup->clip_max_x) x_end = setup->clip_max_x; if (x_start > x_end) continue; f32 span_width = x_end_fp - x_start_fp; if (span_width < 1.0f) span_width = 1.0f; f32 inv_width = 1.0f / span_width; f32 dz = (z_end - z_start) * inv_width; f32 dwr = (wr_end - wr_start) * inv_width; f32 duw = (uw_end - uw_start) * inv_width; f32 dvw = (vw_end - vw_start) * inv_width; f32 dlw = (lw_end - lw_start) * inv_width; f32 dcw = (cw_end - cw_start) * inv_width; f32 dwxw = (wxw_end - wxw_start) * inv_width; f32 dwyw = (wyw_end - wyw_start) * inv_width; f32 dwzw = (wzw_end - wzw_start) * inv_width; f32 skip = (f32)x_start + 0.5f - x_start_fp; f32 z = z_start + dz * skip; f32 wr = wr_start + dwr * skip; f32 uw = uw_start + duw * skip; f32 vw = vw_start + dvw * skip; f32 lw = lw_start + dlw * skip; f32 cw = cw_start + dcw * skip; f32 wxw = wxw_start + dwxw * skip; f32 wyw = wyw_start + dwyw * skip; f32 wzw = wzw_start + dwzw * skip; u32 row_start = (u32)y * fb_width; u8* prow = fb + row_start; u16* zrow = zb + row_start; i32 x = x_start; while (x <= x_end) { i32 span_end = x + SUBDIV - 1; if (span_end > x_end) span_end = x_end; i32 span_len = span_end - x + 1; f32 pw_start = 1.0f / wr; f32 pw_end = 1.0f / (wr + dwr * (f32)span_len); f32 u_start = uw * pw_start; f32 v_start = vw * pw_start; f32 u_end = (uw + duw * (f32)span_len) * pw_end; f32 v_end = (vw + dvw * (f32)span_len) * pw_end; f32 l_start_fp = pxl8_clamp(lw * pw_start, 0.0f, 255.0f); f32 l_end_fp = pxl8_clamp((lw + dlw * (f32)span_len) * pw_end, 0.0f, 255.0f); f32 c_start_fp = pxl8_clamp(cw * pw_start, 0.0f, 255.0f); f32 c_end_fp = pxl8_clamp((cw + dcw * (f32)span_len) * pw_end, 0.0f, 255.0f); f32 wx_start = wxw * pw_start; f32 wy_start = wyw * pw_start; f32 wz_start = wzw * pw_start; f32 wx_end = (wxw + dwxw * (f32)span_len) * pw_end; f32 wy_end = (wyw + dwyw * (f32)span_len) * pw_end; f32 wz_end = (wzw + dwzw * (f32)span_len) * pw_end; f32 inv_span = span_len > 1 ? 1.0f / (f32)(span_len - 1) : 0.0f; f32 du = (u_end - u_start) * inv_span; f32 dv = (v_end - v_start) * inv_span; f32 dl = (l_end_fp - l_start_fp) * inv_span; f32 dc = (c_end_fp - c_start_fp) * inv_span; f32 dwx = (wx_end - wx_start) * inv_span; f32 dwy = (wy_end - wy_start) * inv_span; f32 dwz = (wz_end - wz_start) * inv_span; f32 u_a = u_start; f32 v_a = v_start; f32 l_a = l_start_fp; f32 c_a = c_start_fp; f32 z_a = z; f32 wx_a = wx_start; f32 wy_a = wy_start; f32 wz_a = wz_start; i32 px = x; #if defined(PXL8_SIMD_SSE) || defined(PXL8_SIMD_NEON) if (depth_test && depth_compare == PXL8_GFX_COMPARE_LESS && !blend_enabled) { pxl8_f32_simd dz4_simd = pxl8_f32_simd_set(dz * 4.0f); pxl8_f32_simd half = pxl8_f32_simd_set(0.5f); pxl8_f32_simd one = pxl8_f32_simd_set(1.0f); pxl8_f32_simd zero = pxl8_f32_simd_zero(); pxl8_f32_simd scale65535 = pxl8_f32_simd_set(65535.0f); pxl8_f32_simd z4 = pxl8_f32_simd_set4(z_a, z_a + dz, z_a + dz * 2.0f, z_a + dz * 3.0f); pxl8_f32_simd offsets = pxl8_f32_simd_set4(0.0f, 1.0f, 2.0f, 3.0f); f32 du4 = du * 4.0f, dv4 = dv * 4.0f, dl4 = dl * 4.0f, dc4 = dc * 4.0f; f32 dz4 = dz * 4.0f, dwx4 = dwx * 4.0f, dwy4 = dwy * 4.0f, dwz4 = dwz * 4.0f; for (; px + 3 <= span_end; px += 4) { pxl8_f32_simd depth_norm = pxl8_f32_simd_clamp(pxl8_f32_simd_mul(pxl8_f32_simd_add(z4, one), half), zero, one); pxl8_i32_simd z16_4 = pxl8_f32_simd_to_i32(pxl8_f32_simd_mul(depth_norm, scale65535)); pxl8_i32_simd zbuf = pxl8_i32_simd_set4((i32)zrow[px], (i32)zrow[px+1], (i32)zrow[px+2], (i32)zrow[px+3]); i32 mask = pxl8_i32_simd_movemask(pxl8_i32_simd_cmpgt(zbuf, z16_4)); STATS_INC(stats, depth_tests, 4); if (mask) { pxl8_shader_ctx frag_ctx = { .color_count = 4, .x = pxl8_i32_simd_set4(px, px + 1, px + 2, px + 3), .y = pxl8_i32_simd_set(y), .v_uv = { pxl8_f32_simd_add(pxl8_f32_simd_set(u_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(du), offsets)), pxl8_f32_simd_add(pxl8_f32_simd_set(v_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(dv), offsets)) }, .v_world = { pxl8_f32_simd_add(pxl8_f32_simd_set(wx_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(dwx), offsets)), pxl8_f32_simd_add(pxl8_f32_simd_set(wy_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(dwy), offsets)), pxl8_f32_simd_add(pxl8_f32_simd_set(wz_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(dwz), offsets)) }, .v_normal = pxl8_vec3_simd_set(setup->normal), .v_light = pxl8_f32_simd_mul( pxl8_f32_simd_add(pxl8_f32_simd_set(l_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(dl), offsets)), pxl8_f32_simd_set(1.0f / 255.0f) ), .v_color = pxl8_f32_simd_add(pxl8_f32_simd_set(c_a), pxl8_f32_simd_mul(pxl8_f32_simd_set(dc), offsets)), .v_depth = z4, }; u8 colors[4]; shader(&frag_ctx, bindings, uniforms, colors); STATS_INC(stats, shader_calls, 1); i32 z16_arr[4]; pxl8_i32_simd_store(z16_arr, z16_4); for (i32 i = 0; i < 4; i++) { if (!(mask & (0x8 << (i * 4)))) continue; STATS_INC(stats, depth_passes, 1); u8 color = colors[i]; if (!(alpha_test && color <= alpha_ref) && color != 0) { prow[px + i] = color; if (depth_write) zrow[px + i] = (u16)z16_arr[i]; STATS_INC(stats, pixels_written, 1); } } } u_a += du4; v_a += dv4; l_a += dl4; c_a += dc4; z_a += dz4; wx_a += dwx4; wy_a += dwy4; wz_a += dwz4; z4 = pxl8_f32_simd_add(z4, dz4_simd); } } #endif for (; px <= span_end; px++) { f32 depth_norm = pxl8_clamp((z_a + 1.0f) * 0.5f, 0.0f, 1.0f); u16 z16 = (u16)(depth_norm * 65535.0f); STATS_INC(stats, depth_tests, 1); bool depth_pass = !depth_test || depth_test_pass(depth_compare, z16, zrow[px]); if (depth_pass) { STATS_INC(stats, depth_passes, 1); pxl8_shader_ctx frag_ctx = { .color_count = 1, .x = pxl8_i32_simd_set(px), .y = pxl8_i32_simd_set(y), .v_uv = { pxl8_f32_simd_set(u_a), pxl8_f32_simd_set(v_a) }, .v_world = { pxl8_f32_simd_set(wx_a), pxl8_f32_simd_set(wy_a), pxl8_f32_simd_set(wz_a) }, .v_normal = pxl8_vec3_simd_set(setup->normal), .v_light = pxl8_f32_simd_set(l_a / 255.0f), .v_color = pxl8_f32_simd_set(c_a), .v_depth = pxl8_f32_simd_set(z_a), }; u8 color; shader(&frag_ctx, bindings, uniforms, &color); STATS_INC(stats, shader_calls, 1); if (!(alpha_test && color <= alpha_ref)) { if (color != 0) { u8 out_color = color; if (blend_enabled) { out_color = blend_indexed(pipeline, color, prow[px], palette, colormap); } prow[px] = out_color; if (depth_write) { zrow[px] = z16; } STATS_INC(stats, pixels_written, 1); } } } u_a += du; v_a += dv; l_a += dl; c_a += dc; z_a += dz; wx_a += dwx; wy_a += dwy; wz_a += dwz; } wr += dwr * (f32)span_len; uw += duw * (f32)span_len; vw += dvw * (f32)span_len; lw += dlw * (f32)span_len; cw += dcw * (f32)span_len; z += dz * (f32)span_len; wxw += dwxw * (f32)span_len; wyw += dwyw * (f32)span_len; wzw += dwzw * (f32)span_len; x = span_end + 1; } } } static void draw_line_clipped( u8* fb, u32 fb_w, u32 fb_h, i32 x0, i32 y0, i32 x1, i32 y1, u8 color, i32 clip_min_x, i32 clip_min_y, i32 clip_max_x, i32 clip_max_y, pxl8_gfx_stats* stats ) { i32 dx = abs(x1 - x0); i32 dy = -abs(y1 - y0); i32 sx = x0 < x1 ? 1 : -1; i32 sy = y0 < y1 ? 1 : -1; i32 err = dx + dy; while (true) { if (x0 >= clip_min_x && x0 <= clip_max_x && y0 >= clip_min_y && y0 <= clip_max_y) { if (x0 >= 0 && y0 >= 0 && x0 < (i32)fb_w && y0 < (i32)fb_h) { fb[y0 * (i32)fb_w + x0] = color; STATS_INC(stats, pixels_written, 1); } } if (x0 == x1 && y0 == y1) break; i32 e2 = 2 * err; if (e2 >= dy) { err += dy; x0 += sx; } if (e2 <= dx) { err += dx; y0 += sy; } } } #define SLOT_INDEX(id) ((id) & 0xFFFF) #define SLOT_GEN(id) ((id) >> 16) #define MAKE_ID(index, gen) (((u32)(gen) << 16) | (u32)(index)) #define NEXT_GEN(gen) ((u16)((gen) == 0xFFFF ? 1 : (gen) + 1)) typedef struct { pxl8_gfx_bindings_desc desc; u16 generation; bool active; } bindings_slot; typedef struct { void* data; u32 size; u32 append_pos; pxl8_gfx_buffer_type type; pxl8_gfx_usage usage; u16 generation; bool active; } buffer_slot; typedef struct { pxl8_gfx_pass_desc desc; u16 generation; bool active; } pass_slot; #define PXL8_PIPELINE_CACHE_SIZE 64 typedef struct { u32 hash; u32 slot_idx; u32 last_used_frame; bool valid; } pipeline_cache_entry; typedef struct { pxl8_gfx_pipeline_desc desc; u16 generation; bool active; bool cached; } pipeline_slot; typedef struct { void* data; u32 width; u32 height; pxl8_gfx_texture_format format; pxl8_gfx_usage usage; u16 generation; bool active; } texture_slot; struct pxl8_renderer { u32 width; u32 height; texture_slot textures[PXL8_GFX_MAX_TEXTURES]; buffer_slot buffers[PXL8_GFX_MAX_BUFFERS]; pipeline_slot pipelines[PXL8_GFX_MAX_PIPELINES]; bindings_slot bindings[PXL8_GFX_MAX_BINDINGS]; pass_slot passes[PXL8_GFX_MAX_PASSES]; pipeline_cache_entry pipeline_cache[PXL8_PIPELINE_CACHE_SIZE]; u32 frame_counter; pxl8_gfx_pass current_pass; pxl8_gfx_pipeline current_pipeline; pxl8_gfx_bindings current_bindings; pxl8_gfx_cmd_draw_params current_draw_params; i32 viewport_x, viewport_y; u32 viewport_w, viewport_h; i32 scissor_x, scissor_y; u32 scissor_w, scissor_h; pxl8_shader_fn shader; pxl8_gfx_stats stats; }; struct pxl8_gfx_cmdbuf { pxl8_gfx_cmd* commands; u32 capacity; u32 count; }; pxl8_renderer* pxl8_renderer_create(u32 width, u32 height) { pxl8_renderer* r = pxl8_calloc(1, sizeof(pxl8_renderer)); r->width = width; r->height = height; r->viewport_w = width; r->viewport_h = height; r->scissor_w = width; r->scissor_h = height; pxl8_renderer_reset_stats(r); return r; } void pxl8_renderer_destroy(pxl8_renderer* r) { if (!r) return; for (u32 i = 0; i < PXL8_GFX_MAX_TEXTURES; i++) { if (r->textures[i].data) pxl8_free(r->textures[i].data); } for (u32 i = 0; i < PXL8_GFX_MAX_BUFFERS; i++) { if (r->buffers[i].data) pxl8_free(r->buffers[i].data); } pxl8_free(r); } u32 pxl8_renderer_get_width(const pxl8_renderer* r) { return r ? r->width : 0; } u32 pxl8_renderer_get_height(const pxl8_renderer* r) { return r ? r->height : 0; } void pxl8_renderer_set_shader(pxl8_renderer* r, pxl8_shader_fn fn) { if (r) r->shader = fn; } void pxl8_renderer_reset_stats(pxl8_renderer* r) { if (!r) return; memset(&r->stats, 0, sizeof(r->stats)); } const pxl8_gfx_stats* pxl8_renderer_get_stats(const pxl8_renderer* r) { return r ? &r->stats : NULL; } static u32 texture_byte_size(pxl8_gfx_texture_format fmt, u32 w, u32 h) { switch (fmt) { case PXL8_GFX_FORMAT_INDEXED8: return w * h; case PXL8_GFX_FORMAT_DEPTH16: return w * h * 2; case PXL8_GFX_FORMAT_LIGHT_ACCUM: return w * h * 4; } return 0; } pxl8_gfx_texture pxl8_create_texture(pxl8_renderer* r, const pxl8_gfx_texture_desc* desc) { for (u32 i = 0; i < PXL8_GFX_MAX_TEXTURES; i++) { if (!r->textures[i].active) { texture_slot* s = &r->textures[i]; u32 size = texture_byte_size(desc->format, desc->width, desc->height); s->data = pxl8_malloc(size); if (desc->data.ptr && desc->data.size >= size) { memcpy(s->data, desc->data.ptr, size); } else { memset(s->data, 0, size); } s->width = desc->width; s->height = desc->height; s->format = desc->format; s->usage = desc->usage; s->generation = NEXT_GEN(s->generation); s->active = true; return (pxl8_gfx_texture){ MAKE_ID(i, s->generation) }; } } pxl8_error("Out of texture slots"); return (pxl8_gfx_texture){ PXL8_GFX_INVALID_ID }; } pxl8_gfx_buffer pxl8_create_buffer(pxl8_renderer* r, const pxl8_gfx_buffer_desc* desc) { for (u32 i = 0; i < PXL8_GFX_MAX_BUFFERS; i++) { if (!r->buffers[i].active) { buffer_slot* s = &r->buffers[i]; u32 capacity = desc->capacity > 0 ? desc->capacity : desc->data.size; s->data = pxl8_malloc(capacity); if (desc->data.ptr && desc->data.size > 0) { memcpy(s->data, desc->data.ptr, desc->data.size); if (capacity > desc->data.size) { memset((u8*)s->data + desc->data.size, 0, capacity - desc->data.size); } } else { memset(s->data, 0, capacity); } s->size = capacity; s->append_pos = 0; s->type = desc->type; s->usage = desc->usage; s->generation = NEXT_GEN(s->generation); s->active = true; return (pxl8_gfx_buffer){ MAKE_ID(i, s->generation) }; } } pxl8_error("Out of buffer slots"); return (pxl8_gfx_buffer){ PXL8_GFX_INVALID_ID }; } pxl8_gfx_pipeline pxl8_create_pipeline(pxl8_renderer* r, const pxl8_gfx_pipeline_desc* desc) { for (u32 i = 0; i < PXL8_GFX_MAX_PIPELINES; i++) { if (!r->pipelines[i].active) { pipeline_slot* s = &r->pipelines[i]; s->desc = *desc; s->generation = NEXT_GEN(s->generation); s->active = true; return (pxl8_gfx_pipeline){ MAKE_ID(i, s->generation) }; } } pxl8_error("Out of pipeline slots"); return (pxl8_gfx_pipeline){ PXL8_GFX_INVALID_ID }; } pxl8_gfx_bindings pxl8_create_bindings(pxl8_renderer* r, const pxl8_gfx_bindings_desc* desc) { for (u32 i = 0; i < PXL8_GFX_MAX_BINDINGS; i++) { if (!r->bindings[i].active) { bindings_slot* s = &r->bindings[i]; s->desc = *desc; s->generation = NEXT_GEN(s->generation); s->active = true; return (pxl8_gfx_bindings){ MAKE_ID(i, s->generation) }; } } pxl8_error("Out of bindings slots"); return (pxl8_gfx_bindings){ PXL8_GFX_INVALID_ID }; } pxl8_gfx_pass pxl8_create_pass(pxl8_renderer* r, const pxl8_gfx_pass_desc* desc) { for (u32 i = 0; i < PXL8_GFX_MAX_PASSES; i++) { if (!r->passes[i].active) { pass_slot* s = &r->passes[i]; s->desc = *desc; s->generation = NEXT_GEN(s->generation); s->active = true; return (pxl8_gfx_pass){ MAKE_ID(i, s->generation) }; } } pxl8_error("Out of pass slots"); return (pxl8_gfx_pass){ PXL8_GFX_INVALID_ID }; } #define VALID_TEX(r, h) (SLOT_INDEX((h).id) < PXL8_GFX_MAX_TEXTURES && \ r->textures[SLOT_INDEX((h).id)].active && \ r->textures[SLOT_INDEX((h).id)].generation == SLOT_GEN((h).id)) #define VALID_BUF(r, h) (SLOT_INDEX((h).id) < PXL8_GFX_MAX_BUFFERS && \ r->buffers[SLOT_INDEX((h).id)].active && \ r->buffers[SLOT_INDEX((h).id)].generation == SLOT_GEN((h).id)) #define VALID_PASS(r, h) (SLOT_INDEX((h).id) < PXL8_GFX_MAX_PASSES && \ r->passes[SLOT_INDEX((h).id)].active && \ r->passes[SLOT_INDEX((h).id)].generation == SLOT_GEN((h).id)) #define VALID_PIPELINE(r, h) (SLOT_INDEX((h).id) < PXL8_GFX_MAX_PIPELINES && \ r->pipelines[SLOT_INDEX((h).id)].active && \ r->pipelines[SLOT_INDEX((h).id)].generation == SLOT_GEN((h).id)) #define VALID_BINDINGS(r, h) (SLOT_INDEX((h).id) < PXL8_GFX_MAX_BINDINGS && \ r->bindings[SLOT_INDEX((h).id)].active && \ r->bindings[SLOT_INDEX((h).id)].generation == SLOT_GEN((h).id)) void pxl8_destroy_texture(pxl8_renderer* r, pxl8_gfx_texture tex) { if (!VALID_TEX(r, tex)) return; texture_slot* s = &r->textures[SLOT_INDEX(tex.id)]; pxl8_free(s->data); s->data = NULL; s->active = false; } void pxl8_destroy_buffer(pxl8_renderer* r, pxl8_gfx_buffer buf) { if (!VALID_BUF(r, buf)) return; buffer_slot* s = &r->buffers[SLOT_INDEX(buf.id)]; pxl8_free(s->data); s->data = NULL; s->active = false; } void pxl8_destroy_pipeline(pxl8_renderer* r, pxl8_gfx_pipeline pip) { u32 idx = SLOT_INDEX(pip.id); if (idx < PXL8_GFX_MAX_PIPELINES && r->pipelines[idx].generation == SLOT_GEN(pip.id)) { r->pipelines[idx].active = false; } } void pxl8_destroy_bindings(pxl8_renderer* r, pxl8_gfx_bindings bnd) { u32 idx = SLOT_INDEX(bnd.id); if (idx < PXL8_GFX_MAX_BINDINGS && r->bindings[idx].generation == SLOT_GEN(bnd.id)) { r->bindings[idx].active = false; } } void pxl8_destroy_pass(pxl8_renderer* r, pxl8_gfx_pass pass) { u32 idx = SLOT_INDEX(pass.id); if (idx < PXL8_GFX_MAX_PASSES && r->passes[idx].generation == SLOT_GEN(pass.id)) { r->passes[idx].active = false; } } void pxl8_update_buffer(pxl8_renderer* r, pxl8_gfx_buffer buf, const pxl8_gfx_range* data) { if (!VALID_BUF(r, buf)) return; buffer_slot* s = &r->buffers[SLOT_INDEX(buf.id)]; u32 copy_size = data->size < s->size ? data->size : s->size; memcpy(s->data, data->ptr, copy_size); } i32 pxl8_append_buffer(pxl8_renderer* r, pxl8_gfx_buffer buf, const pxl8_gfx_range* data) { if (!VALID_BUF(r, buf)) return -1; buffer_slot* s = &r->buffers[SLOT_INDEX(buf.id)]; if (s->append_pos + data->size > s->size) return -1; i32 offset = (i32)s->append_pos; memcpy((u8*)s->data + s->append_pos, data->ptr, data->size); s->append_pos += data->size; return offset; } void pxl8_update_texture(pxl8_renderer* r, pxl8_gfx_texture tex, const pxl8_gfx_range* data, u32 x, u32 y, u32 w, u32 h) { if (!VALID_TEX(r, tex)) return; texture_slot* s = &r->textures[SLOT_INDEX(tex.id)]; u32 bpp = (s->format == PXL8_GFX_FORMAT_INDEXED8) ? 1 : (s->format == PXL8_GFX_FORMAT_DEPTH16) ? 2 : 4; const u8* src = data->ptr; u8* dst = (u8*)s->data + (y * s->width + x) * bpp; for (u32 row = 0; row < h; row++) { memcpy(dst, src, w * bpp); src += w * bpp; dst += s->width * bpp; } } void* pxl8_buffer_ptr(pxl8_renderer* r, pxl8_gfx_buffer buf) { if (!VALID_BUF(r, buf)) return NULL; return r->buffers[SLOT_INDEX(buf.id)].data; } u32 pxl8_buffer_size(pxl8_renderer* r, pxl8_gfx_buffer buf) { if (!VALID_BUF(r, buf)) return 0; return r->buffers[SLOT_INDEX(buf.id)].size; } void* pxl8_texture_get_data(pxl8_renderer* r, pxl8_gfx_texture tex) { if (!VALID_TEX(r, tex)) return NULL; return r->textures[SLOT_INDEX(tex.id)].data; } u32 pxl8_texture_get_width(pxl8_renderer* r, pxl8_gfx_texture tex) { if (!VALID_TEX(r, tex)) return 0; return r->textures[SLOT_INDEX(tex.id)].width; } u32 pxl8_texture_get_height(pxl8_renderer* r, pxl8_gfx_texture tex) { if (!VALID_TEX(r, tex)) return 0; return r->textures[SLOT_INDEX(tex.id)].height; } pxl8_gfx_texture_format pxl8_texture_get_format(pxl8_renderer* r, pxl8_gfx_texture tex) { if (!VALID_TEX(r, tex)) return PXL8_GFX_FORMAT_INDEXED8; return r->textures[SLOT_INDEX(tex.id)].format; } pxl8_gfx_cmdbuf* pxl8_cmdbuf_create(u32 capacity) { pxl8_gfx_cmdbuf* cb = pxl8_malloc(sizeof(pxl8_gfx_cmdbuf)); cb->commands = pxl8_malloc(capacity * sizeof(pxl8_gfx_cmd)); cb->capacity = capacity; cb->count = 0; return cb; } void pxl8_cmdbuf_destroy(pxl8_gfx_cmdbuf* cb) { if (!cb) return; pxl8_free(cb->commands); pxl8_free(cb); } void pxl8_cmdbuf_reset(pxl8_gfx_cmdbuf* cb) { if (cb) { cb->count = 0; } } static pxl8_gfx_cmd* cmd_alloc(pxl8_gfx_cmdbuf* cb) { if (cb->count >= cb->capacity) { cb->capacity *= 2; cb->commands = pxl8_realloc(cb->commands, cb->capacity * sizeof(pxl8_gfx_cmd)); } return &cb->commands[cb->count++]; } void pxl8_begin_pass(pxl8_gfx_cmdbuf* cb, pxl8_gfx_pass pass) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_BEGIN_PASS; cmd->begin_pass.pass = pass; } void pxl8_end_pass(pxl8_gfx_cmdbuf* cb) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_END_PASS; } void pxl8_set_pipeline(pxl8_gfx_cmdbuf* cb, pxl8_gfx_pipeline pipeline) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_SET_PIPELINE; cmd->set_pipeline.pipeline = pipeline; } void pxl8_set_bindings(pxl8_gfx_cmdbuf* cb, pxl8_gfx_bindings bindings) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_SET_BINDINGS; cmd->set_bindings.bindings = bindings; } void pxl8_set_viewport(pxl8_gfx_cmdbuf* cb, i32 x, i32 y, u32 w, u32 h) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_SET_VIEWPORT; cmd->set_viewport.x = x; cmd->set_viewport.y = y; cmd->set_viewport.w = w; cmd->set_viewport.h = h; } void pxl8_set_scissor(pxl8_gfx_cmdbuf* cb, i32 x, i32 y, u32 w, u32 h) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_SET_SCISSOR; cmd->set_scissor.x = x; cmd->set_scissor.y = y; cmd->set_scissor.w = w; cmd->set_scissor.h = h; } void pxl8_set_draw_params(pxl8_gfx_cmdbuf* cb, const pxl8_gfx_cmd_draw_params* p) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_SET_DRAW_PARAMS; cmd->draw_params = *p; } void pxl8_draw(pxl8_gfx_cmdbuf* cb, pxl8_gfx_buffer vb, pxl8_gfx_buffer ib, u32 first, u32 count, u32 base_vertex) { pxl8_gfx_cmd* cmd = cmd_alloc(cb); cmd->type = PXL8_GFX_CMD_DRAW; cmd->draw.vertex_buffer = vb; cmd->draw.index_buffer = ib; cmd->draw.first_index = first; cmd->draw.index_count = count; cmd->draw.base_vertex = base_vertex; } static void execute_draw( pxl8_renderer* r, const pxl8_gfx_cmd_draw* cmd ) { if (!VALID_BUF(r, cmd->vertex_buffer)) return; bool use_indices = pxl8_gfx_handle_valid(cmd->index_buffer) && VALID_BUF(r, cmd->index_buffer); if (!VALID_PASS(r, r->current_pass)) return; if (!VALID_PIPELINE(r, r->current_pipeline)) return; u64 exec_start = STATS_START(); STATS_INC(&r->stats, draw_calls, 1); buffer_slot* vb = &r->buffers[SLOT_INDEX(cmd->vertex_buffer.id)]; buffer_slot* ib = use_indices ? &r->buffers[SLOT_INDEX(cmd->index_buffer.id)] : NULL; pass_slot* pass = &r->passes[SLOT_INDEX(r->current_pass.id)]; pipeline_slot* pip = &r->pipelines[SLOT_INDEX(r->current_pipeline.id)]; if (!VALID_TEX(r, pass->desc.color.texture)) { pxl8_error("draw: invalid color texture"); STATS_ADD(&r->stats, execute_draw_ns, exec_start); return; } if (!VALID_TEX(r, pass->desc.depth.texture)) { pxl8_error("draw: invalid depth texture"); STATS_ADD(&r->stats, execute_draw_ns, exec_start); return; } texture_slot* color_tex = &r->textures[SLOT_INDEX(pass->desc.color.texture.id)]; texture_slot* depth_tex = &r->textures[SLOT_INDEX(pass->desc.depth.texture.id)]; u8* fb = color_tex->data; u16* zb = depth_tex->data; u32 fb_w = color_tex->width; u32 fb_h = color_tex->height; if (r->viewport_w == 0 || r->viewport_h == 0) { STATS_ADD(&r->stats, execute_draw_ns, exec_start); return; } i32 vp_x = r->viewport_x; i32 vp_y = r->viewport_y; u32 vp_w = r->viewport_w; u32 vp_h = r->viewport_h; i32 clip_min_x = vp_x; i32 clip_min_y = vp_y; i32 clip_max_x = vp_x + (i32)vp_w - 1; i32 clip_max_y = vp_y + (i32)vp_h - 1; if (r->scissor_w > 0 && r->scissor_h > 0) { i32 sc_min_x = r->scissor_x; i32 sc_min_y = r->scissor_y; i32 sc_max_x = r->scissor_x + (i32)r->scissor_w - 1; i32 sc_max_y = r->scissor_y + (i32)r->scissor_h - 1; if (sc_min_x > clip_min_x) clip_min_x = sc_min_x; if (sc_min_y > clip_min_y) clip_min_y = sc_min_y; if (sc_max_x < clip_max_x) clip_max_x = sc_max_x; if (sc_max_y < clip_max_y) clip_max_y = sc_max_y; } if (clip_min_x < 0) clip_min_x = 0; if (clip_min_y < 0) clip_min_y = 0; if (clip_max_x >= (i32)fb_w) clip_max_x = (i32)fb_w - 1; if (clip_max_y >= (i32)fb_h) clip_max_y = (i32)fb_h - 1; if (clip_min_x > clip_max_x || clip_min_y > clip_max_y) { STATS_ADD(&r->stats, execute_draw_ns, exec_start); return; } const pxl8_vertex* vertices = vb->data; const u16* indices = use_indices ? ib->data : NULL; pxl8_mat4 mv = pxl8_mat4_multiply(r->current_draw_params.view, r->current_draw_params.model); pxl8_mat4 mvp = pxl8_mat4_multiply(r->current_draw_params.projection, mv); f32 near = 0.1f; pxl8_shader_fn shader = pip->desc.shader; if (!shader) { STATS_ADD(&r->stats, execute_draw_ns, exec_start); return; } pxl8_shader_bindings shader_bindings = {0}; pxl8_shader_uniforms shader_uniforms = r->current_draw_params.shader; shader_uniforms.dither = pip->desc.dither; shader_uniforms.emissive = pip->desc.emissive; if (VALID_BINDINGS(r, r->current_bindings)) { bindings_slot* bnd = &r->bindings[SLOT_INDEX(r->current_bindings.id)]; shader_bindings.colormap = (const u8*)bnd->desc.colormap; shader_bindings.palette = bnd->desc.palette; const pxl8_atlas* atlas = bnd->desc.atlas; if (atlas && bnd->desc.texture_id != UINT32_MAX) { const pxl8_atlas_entry* entry = pxl8_atlas_get_entry(atlas, bnd->desc.texture_id); if (entry && entry->active) { shader_bindings.atlas = pxl8_atlas_get_pixels_tiled(atlas); shader_bindings.width = (u32)entry->w; shader_bindings.height = (u32)entry->h; shader_bindings.use_tiled = true; shader_bindings.tiled.base = entry->tiled_base; shader_bindings.tiled.log2_w = entry->log2_w; } } } bool double_sided = pip->desc.double_sided; pxl8_gfx_cull_mode cull_mode = pip->desc.rasterizer.cull; bool is_wireframe = pip->desc.rasterizer.fill == PXL8_GFX_FILL_WIREFRAME; for (u32 i = cmd->first_index; i < cmd->first_index + cmd->index_count; i += 3) { STATS_INC(&r->stats, triangles, 1); u16 i0, i1, i2; if (use_indices) { if (i + 2 >= ib->size / sizeof(u16)) break; i0 = indices[i] + cmd->base_vertex; i1 = indices[i + 1] + cmd->base_vertex; i2 = indices[i + 2] + cmd->base_vertex; } else { i0 = (u16)(i + cmd->base_vertex); i1 = (u16)(i + 1 + cmd->base_vertex); i2 = (u16)(i + 2 + cmd->base_vertex); } if (i0 >= vb->size / sizeof(pxl8_vertex)) continue; if (i1 >= vb->size / sizeof(pxl8_vertex)) continue; if (i2 >= vb->size / sizeof(pxl8_vertex)) continue; const pxl8_vertex* v0 = &vertices[i0]; const pxl8_vertex* v1 = &vertices[i1]; const pxl8_vertex* v2 = &vertices[i2]; raster_vertex rv0, rv1, rv2; pxl8_vec4 p0 = {v0->position.x, v0->position.y, v0->position.z, 1.0f}; pxl8_vec4 p1 = {v1->position.x, v1->position.y, v1->position.z, 1.0f}; pxl8_vec4 p2 = {v2->position.x, v2->position.y, v2->position.z, 1.0f}; rv0.clip_pos = pxl8_mat4_multiply_vec4(mvp, p0); rv1.clip_pos = pxl8_mat4_multiply_vec4(mvp, p1); rv2.clip_pos = pxl8_mat4_multiply_vec4(mvp, p2); if (!is_wireframe) { pxl8_vec4 w0 = pxl8_mat4_multiply_vec4(r->current_draw_params.model, p0); pxl8_vec4 w1 = pxl8_mat4_multiply_vec4(r->current_draw_params.model, p1); pxl8_vec4 w2 = pxl8_mat4_multiply_vec4(r->current_draw_params.model, p2); rv0.world_pos = (pxl8_vec3){w0.x, w0.y, w0.z}; rv1.world_pos = (pxl8_vec3){w1.x, w1.y, w1.z}; rv2.world_pos = (pxl8_vec3){w2.x, w2.y, w2.z}; rv0.normal = pxl8_vec3_normalize(pxl8_mat4_multiply_vec3(r->current_draw_params.model, v0->normal)); rv1.normal = pxl8_vec3_normalize(pxl8_mat4_multiply_vec3(r->current_draw_params.model, v1->normal)); rv2.normal = pxl8_vec3_normalize(pxl8_mat4_multiply_vec3(r->current_draw_params.model, v2->normal)); rv0.u = v0->u; rv0.v = v0->v; rv1.u = v1->u; rv1.v = v1->v; rv2.u = v2->u; rv2.v = v2->v; rv0.color = v0->color; rv1.color = v1->color; rv2.color = v2->color; rv0.light = v0->light; rv1.light = v1->light; rv2.light = v2->light; } else { rv0.world_pos = (pxl8_vec3){0, 0, 0}; rv1.world_pos = (pxl8_vec3){0, 0, 0}; rv2.world_pos = (pxl8_vec3){0, 0, 0}; rv0.normal = (pxl8_vec3){0, 0, 0}; rv1.normal = (pxl8_vec3){0, 0, 0}; rv2.normal = (pxl8_vec3){0, 0, 0}; rv0.u = 0.0f; rv0.v = 0.0f; rv1.u = 0.0f; rv1.v = 0.0f; rv2.u = 0.0f; rv2.v = 0.0f; rv0.color = 0; rv1.color = 0; rv2.color = 0; rv0.light = 0; rv1.light = 0; rv2.light = 0; } raster_vertex clipped[6]; i32 clipped_count = clip_triangle_near(&rv0, &rv1, &rv2, near, clipped); for (i32 t = 0; t < clipped_count; t += 3) { STATS_INC(&r->stats, clipped_triangles, 1); if (is_wireframe) { f32 hw = (f32)vp_w * 0.5f; f32 hh = (f32)vp_h * 0.5f; raster_vertex* wv0 = &clipped[t]; raster_vertex* wv1 = &clipped[t+1]; raster_vertex* wv2 = &clipped[t+2]; i32 sx0 = (i32)((f32)vp_x + hw + wv0->clip_pos.x / wv0->clip_pos.w * hw); i32 sy0 = (i32)((f32)vp_y + hh - wv0->clip_pos.y / wv0->clip_pos.w * hh); i32 sx1 = (i32)((f32)vp_x + hw + wv1->clip_pos.x / wv1->clip_pos.w * hw); i32 sy1 = (i32)((f32)vp_y + hh - wv1->clip_pos.y / wv1->clip_pos.w * hh); i32 sx2 = (i32)((f32)vp_x + hw + wv2->clip_pos.x / wv2->clip_pos.w * hw); i32 sy2 = (i32)((f32)vp_y + hh - wv2->clip_pos.y / wv2->clip_pos.w * hh); f32 cross = (f32)(sx1 - sx0) * (f32)(sy2 - sy0) - (f32)(sy1 - sy0) * (f32)(sx2 - sx0); if (!double_sided) { if (cull_mode == PXL8_GFX_CULL_BACK && cross >= 0.0f) continue; if (cull_mode == PXL8_GFX_CULL_FRONT && cross <= 0.0f) continue; } u8 wire_color = v0->color ? v0->color : 15; draw_line_clipped(fb, fb_w, fb_h, sx0, sy0, sx1, sy1, wire_color, clip_min_x, clip_min_y, clip_max_x, clip_max_y, &r->stats); draw_line_clipped(fb, fb_w, fb_h, sx1, sy1, sx2, sy2, wire_color, clip_min_x, clip_min_y, clip_max_x, clip_max_y, &r->stats); draw_line_clipped(fb, fb_w, fb_h, sx2, sy2, sx0, sy0, wire_color, clip_min_x, clip_min_y, clip_max_x, clip_max_y, &r->stats); } else { tri_setup setup; if (!setup_tri(&setup, &clipped[t], &clipped[t+1], &clipped[t+2], vp_x, vp_y, vp_w, vp_h, clip_min_x, clip_min_y, clip_max_x, clip_max_y, cull_mode, double_sided)) { continue; } u64 raster_start = STATS_START(); rasterize_triangle(&setup, fb, zb, fb_w, shader, &pip->desc, &shader_bindings, &shader_uniforms, &r->stats); STATS_ADD(&r->stats, raster_ns, raster_start); } } } STATS_ADD(&r->stats, execute_draw_ns, exec_start); } void pxl8_gfx_submit(pxl8_renderer* r, pxl8_gfx_cmdbuf* cb) { u64 submit_start = STATS_START(); for (u32 i = 0; i < cb->count; i++) { pxl8_gfx_cmd* cmd = &cb->commands[i]; switch (cmd->type) { case PXL8_GFX_CMD_BEGIN_PASS: r->current_pass = cmd->begin_pass.pass; if (VALID_PASS(r, cmd->begin_pass.pass)) { pass_slot* p = &r->passes[SLOT_INDEX(cmd->begin_pass.pass.id)]; if (p->desc.color.load == PXL8_GFX_LOAD_CLEAR) { pxl8_clear(r, p->desc.color.texture, p->desc.color.clear_value); } if (p->desc.depth.load == PXL8_GFX_LOAD_CLEAR) { pxl8_clear_depth(r, p->desc.depth.texture); } if (p->desc.light_accum.load == PXL8_GFX_LOAD_CLEAR) { pxl8_clear_light(r, p->desc.light_accum.texture); } } break; case PXL8_GFX_CMD_END_PASS: r->current_pass = (pxl8_gfx_pass){ PXL8_GFX_INVALID_ID }; break; case PXL8_GFX_CMD_SET_PIPELINE: r->current_pipeline = cmd->set_pipeline.pipeline; break; case PXL8_GFX_CMD_SET_BINDINGS: r->current_bindings = cmd->set_bindings.bindings; break; case PXL8_GFX_CMD_SET_VIEWPORT: r->viewport_x = cmd->set_viewport.x; r->viewport_y = cmd->set_viewport.y; r->viewport_w = cmd->set_viewport.w; r->viewport_h = cmd->set_viewport.h; break; case PXL8_GFX_CMD_SET_SCISSOR: r->scissor_x = cmd->set_scissor.x; r->scissor_y = cmd->set_scissor.y; r->scissor_w = cmd->set_scissor.w; r->scissor_h = cmd->set_scissor.h; break; case PXL8_GFX_CMD_SET_DRAW_PARAMS: r->current_draw_params = cmd->draw_params; break; case PXL8_GFX_CMD_DRAW: execute_draw(r, &cmd->draw); break; case PXL8_GFX_CMD_RESOLVE: break; } } for (u32 i = 0; i < PXL8_GFX_MAX_BUFFERS; i++) { if (r->buffers[i].active && r->buffers[i].usage == PXL8_GFX_USAGE_STREAM) { r->buffers[i].append_pos = 0; } } STATS_ADD(&r->stats, submit_ns, submit_start); } void pxl8_clear(pxl8_renderer* r, pxl8_gfx_texture target, u8 color) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format == PXL8_GFX_FORMAT_INDEXED8) { memset(s->data, color, s->width * s->height); } } void pxl8_clear_depth(pxl8_renderer* r, pxl8_gfx_texture target) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format == PXL8_GFX_FORMAT_DEPTH16) { memset(s->data, 0xFF, s->width * s->height * 2); } } void pxl8_clear_light(pxl8_renderer* r, pxl8_gfx_texture target) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format == PXL8_GFX_FORMAT_LIGHT_ACCUM) { memset(s->data, 0, s->width * s->height * 4); } } void pxl8_draw_pixel(pxl8_renderer* r, pxl8_gfx_texture target, i32 x, i32 y, u8 color) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (x < 0 || y < 0 || (u32)x >= s->width || (u32)y >= s->height) return; if (s->format == PXL8_GFX_FORMAT_INDEXED8) { ((u8*)s->data)[y * s->width + x] = color; } } u8 pxl8_get_pixel(pxl8_renderer* r, pxl8_gfx_texture target, i32 x, i32 y) { if (!VALID_TEX(r, target)) return 0; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (x < 0 || y < 0 || (u32)x >= s->width || (u32)y >= s->height) return 0; if (s->format == PXL8_GFX_FORMAT_INDEXED8) { return ((u8*)s->data)[y * s->width + x]; } return 0; } void pxl8_draw_line(pxl8_renderer* r, pxl8_gfx_texture target, i32 x0, i32 y0, i32 x1, i32 y1, u8 color) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format != PXL8_GFX_FORMAT_INDEXED8) return; u8* fb = s->data; i32 w = (i32)s->width; i32 h = (i32)s->height; i32 dx = abs(x1 - x0); i32 dy = -abs(y1 - y0); i32 sx = x0 < x1 ? 1 : -1; i32 sy = y0 < y1 ? 1 : -1; i32 err = dx + dy; while (true) { if (x0 >= 0 && x0 < w && y0 >= 0 && y0 < h) { fb[y0 * w + x0] = color; } if (x0 == x1 && y0 == y1) break; i32 e2 = 2 * err; if (e2 >= dy) { err += dy; x0 += sx; } if (e2 <= dx) { err += dx; y0 += sy; } } } void pxl8_draw_rect(pxl8_renderer* r, pxl8_gfx_texture target, i32 x, i32 y, i32 w, i32 h, u8 color) { pxl8_draw_line(r, target, x, y, x + w - 1, y, color); pxl8_draw_line(r, target, x + w - 1, y, x + w - 1, y + h - 1, color); pxl8_draw_line(r, target, x + w - 1, y + h - 1, x, y + h - 1, color); pxl8_draw_line(r, target, x, y + h - 1, x, y, color); } void pxl8_draw_rect_fill(pxl8_renderer* r, pxl8_gfx_texture target, i32 x, i32 y, i32 w, i32 h, u8 color) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format != PXL8_GFX_FORMAT_INDEXED8) return; u8* fb = s->data; i32 tw = (i32)s->width; i32 th = (i32)s->height; i32 x0 = x < 0 ? 0 : x; i32 y0 = y < 0 ? 0 : y; i32 x1 = x + w > tw ? tw : x + w; i32 y1 = y + h > th ? th : y + h; for (i32 py = y0; py < y1; py++) { memset(&fb[py * tw + x0], color, (size_t)(x1 - x0)); } } void pxl8_draw_circle(pxl8_renderer* r, pxl8_gfx_texture target, i32 cx, i32 cy, i32 radius, u8 color) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format != PXL8_GFX_FORMAT_INDEXED8) return; u8* fb = s->data; i32 w = (i32)s->width; i32 h = (i32)s->height; i32 px = 0, py = radius; i32 d = 3 - 2 * radius; #define PLOT(xx, yy) if ((xx) >= 0 && (xx) < w && (yy) >= 0 && (yy) < h) fb[(yy) * w + (xx)] = color while (py >= px) { PLOT(cx + px, cy + py); PLOT(cx - px, cy + py); PLOT(cx + px, cy - py); PLOT(cx - px, cy - py); PLOT(cx + py, cy + px); PLOT(cx - py, cy + px); PLOT(cx + py, cy - px); PLOT(cx - py, cy - px); px++; if (d > 0) { py--; d += 4 * (px - py) + 10; } else { d += 4 * px + 6; } } #undef PLOT } void pxl8_draw_circle_fill(pxl8_renderer* r, pxl8_gfx_texture target, i32 cx, i32 cy, i32 radius, u8 color) { if (!VALID_TEX(r, target)) return; texture_slot* s = &r->textures[SLOT_INDEX(target.id)]; if (s->format != PXL8_GFX_FORMAT_INDEXED8) return; u8* fb = s->data; i32 w = (i32)s->width; i32 h = (i32)s->height; i32 r2 = radius * radius; for (i32 y = -radius; y <= radius; y++) { i32 py = cy + y; if (py < 0 || py >= h) continue; i32 hspan = (i32)sqrtf((f32)(r2 - y * y)); i32 x0 = cx - hspan; i32 x1 = cx + hspan; if (x0 < 0) x0 = 0; if (x1 >= w) x1 = w - 1; if (x0 <= x1) { memset(&fb[py * w + x0], color, (size_t)(x1 - x0 + 1)); } } } void pxl8_resolve_to_rgba(pxl8_renderer* r, pxl8_gfx_texture color, pxl8_gfx_texture light_accum, const u32* palette, u32* output) { if (!VALID_TEX(r, color)) return; texture_slot* cs = &r->textures[SLOT_INDEX(color.id)]; u8* fb = cs->data; u32 w = cs->width; u32 h = cs->height; u32 total = w * h; (void)light_accum; #if defined(PXL8_SIMD_SSE) || defined(PXL8_SIMD_NEON) pxl8_i32_simd alpha_mask = pxl8_i32_simd_set((i32)0xFF000000); u32 i = 0; for (; i + 4 <= total; i += 4) { pxl8_i32_simd base = pxl8_i32_simd_set4( (i32)palette[fb[i + 0]], (i32)palette[fb[i + 1]], (i32)palette[fb[i + 2]], (i32)palette[fb[i + 3]] ); base = pxl8_i32_simd_or(base, alpha_mask); pxl8_i32_simd_store((i32*)&output[i], base); } for (; i < total; i++) { output[i] = palette[fb[i]] | 0xFF000000; } #else for (u32 i = 0; i < total; i++) { output[i] = palette[fb[i]] | 0xFF000000; } #endif }