use tiled atlas texture sampling, increase shader speed using inv sqrt

This commit is contained in:
asrael 2026-02-02 17:48:25 -06:00
parent 0c0aa792c1
commit 1f717b7c61
57 changed files with 3681 additions and 2982 deletions

106
src/gfx/shaders/cpu/lit.c Normal file
View file

@ -0,0 +1,106 @@
#include "pxl8_macros.h"
#include "pxl8_shader.h"
#include "pxl8_shader_builtins.h"
u8 pxl8_shader_lit(
pxl8_shader_ctx* ctx,
const pxl8_shader_bindings* bindings,
const pxl8_shader_uniforms* uniforms
) {
u8 tex_idx = 0;
if (bindings && bindings->atlas) {
tex_idx = pxl8_sample_indexed(bindings, ctx->v_uv);
if (pxl8_unlikely(tex_idx == 0)) return 0;
} else {
if (uniforms && uniforms->dither) {
tex_idx = pxl8_gfx_dither(ctx->v_color, (u32)ctx->x, (u32)ctx->y);
} else {
f32 clamped = pxl8_clamp(ctx->v_color, 0.0f, 255.0f);
tex_idx = (u8)(clamped);
}
}
f32 light = ctx->v_light;
if (uniforms) {
f32 ambient = (f32)uniforms->ambient / 255.0f;
if (ambient > light) light = ambient;
if (uniforms->celestial_intensity > 0.0f) {
f32 ndotl = -(ctx->v_normal.x * uniforms->celestial_dir.x +
ctx->v_normal.y * uniforms->celestial_dir.y +
ctx->v_normal.z * uniforms->celestial_dir.z);
if (ndotl > 0.0f) {
light += ndotl * uniforms->celestial_intensity;
}
}
f32 dyn_strength = 0.0f;
f32 dyn_r = 0.0f;
f32 dyn_g = 0.0f;
f32 dyn_b = 0.0f;
for (u32 i = 0; i < uniforms->lights_count; i++) {
const pxl8_light* l = &uniforms->lights[i];
f32 lx = l->position.x - ctx->v_world.x;
f32 ly = l->position.y - ctx->v_world.y;
f32 lz = l->position.z - ctx->v_world.z;
f32 dist_sq = lx * lx + ly * ly + lz * lz;
if (dist_sq >= l->radius_sq) continue;
f32 inv_dist = pxl8_fast_inv_sqrt(dist_sq);
f32 nx = lx * inv_dist;
f32 ny = ly * inv_dist;
f32 nz = lz * inv_dist;
f32 ndotl = ctx->v_normal.x * nx + ctx->v_normal.y * ny + ctx->v_normal.z * nz;
if (ndotl <= 0.0f) continue;
f32 falloff = 1.0f - dist_sq * l->inv_radius_sq;
if (falloff <= 0.0f) continue;
if (uniforms->dither && falloff < 0.33f) {
f32 threshold = (PXL8_BAYER_4X4[((u32)ctx->y & 3) * 4 + ((u32)ctx->x & 3)] + 0.5f) * (1.0f / 16.0f);
if (falloff < threshold * 0.33f) continue;
}
f32 strength = ((f32)l->intensity / 255.0f) * falloff * ndotl;
if (strength <= 0.0f) continue;
dyn_strength += strength;
dyn_r += strength * (f32)l->r;
dyn_g += strength * (f32)l->g;
dyn_b += strength * (f32)l->b;
}
if (dyn_strength > 0.0f) {
f32 inv = pxl8_fast_rcp(dyn_strength);
u8 r = (u8)pxl8_clamp(dyn_r * inv, 0.0f, 255.0f);
u8 g = (u8)pxl8_clamp(dyn_g * inv, 0.0f, 255.0f);
u8 b = (u8)pxl8_clamp(dyn_b * inv, 0.0f, 255.0f);
u8 a = (u8)pxl8_clamp(dyn_strength * 255.0f, 0.0f, 255.0f);
ctx->out_light_color = (u32)r | ((u32)g << 8) | ((u32)b << 16) | ((u32)a << 24);
light += dyn_strength;
}
}
if (light > 1.0f) light = 1.0f;
if (light < 0.0f) light = 0.0f;
f32 light_f = light * 255.0f;
u8 light_u8 = (u8)light_f;
if (uniforms && uniforms->dither) {
light_u8 = pxl8_gfx_dither(light_f, (u32)ctx->x, (u32)ctx->y);
}
u8 shaded = pxl8_colormap_lookup(bindings, tex_idx, light_u8);
if (uniforms && uniforms->emissive) {
u32 rgb = 0x00FFFFFF;
if (bindings && bindings->palette) {
rgb = bindings->palette[tex_idx] & 0x00FFFFFF;
}
pxl8_set_light_tint(ctx, rgb, 1.0f);
}
return shaded;
}