pxl8/src/gfx/shaders/cpu/lit.c

164 lines
5.8 KiB
C
Raw Normal View History

2026-02-02 17:48:25 -06:00
#include "pxl8_macros.h"
#include "pxl8_shader.h"
#include "pxl8_shader_builtins.h"
void pxl8_shader_lit(
const pxl8_shader_ctx* ctx,
2026-02-02 17:48:25 -06:00
const pxl8_shader_bindings* bindings,
const pxl8_shader_uniforms* uniforms,
u8* colors_out
2026-02-02 17:48:25 -06:00
) {
#if defined(PXL8_SIMD_SSE) || defined(PXL8_SIMD_NEON)
f32 uv_x[4], uv_y[4], color_f[4];
i32 px[4], py[4];
pxl8_f32_simd_store(uv_x, ctx->v_uv.x);
pxl8_f32_simd_store(uv_y, ctx->v_uv.y);
pxl8_f32_simd_store(color_f, ctx->v_color);
pxl8_i32_simd_store(px, ctx->x);
pxl8_i32_simd_store(py, ctx->y);
u8 tex_idx[4];
for (u32 i = 0; i < ctx->color_count; i++) {
if (bindings && bindings->atlas) {
tex_idx[i] = pxl8_sample_indexed(bindings, (pxl8_vec2){{ uv_x[i], uv_y[i] }});
} else {
if (uniforms && uniforms->dither) {
tex_idx[i] = pxl8_gfx_dither(color_f[i], (u32)px[i], (u32)py[i]);
} else {
f32 clamped = pxl8_clamp(color_f[i], 0.0f, 255.0f);
tex_idx[i] = (u8)(clamped);
}
}
}
pxl8_f32_simd light = ctx->v_light;
if (uniforms) {
pxl8_f32_simd ambient = pxl8_f32_simd_set((f32)uniforms->ambient / 255.0f);
light = pxl8_f32_simd_max(light, ambient);
if (uniforms->celestial_intensity > 0.0f) {
pxl8_vec3_simd cel_dir = pxl8_vec3_simd_set(uniforms->celestial_dir);
pxl8_f32_simd ndotl = pxl8_f32_simd_sub(
pxl8_f32_simd_zero(),
pxl8_vec3_simd_dot(ctx->v_normal, cel_dir)
);
pxl8_f32_simd cel_contrib = pxl8_f32_simd_mul(
pxl8_f32_simd_max(ndotl, pxl8_f32_simd_zero()),
pxl8_f32_simd_set(uniforms->celestial_intensity)
);
light = pxl8_f32_simd_add(light, cel_contrib);
}
for (u32 i = 0; i < uniforms->lights_count; i++) {
const pxl8_light* l = &uniforms->lights[i];
pxl8_vec3_simd light_pos = pxl8_vec3_simd_set(l->position);
pxl8_vec3_simd to_light = pxl8_vec3_simd_sub(light_pos, ctx->v_world);
pxl8_f32_simd dist_sq = pxl8_vec3_simd_dot(to_light, to_light);
pxl8_f32_simd in_range = pxl8_f32_simd_cmpgt(
pxl8_f32_simd_set(l->radius_sq), dist_sq
);
if (!pxl8_f32_simd_movemask(in_range)) continue;
pxl8_f32_simd inv_dist = pxl8_f32_simd_rsqrt(dist_sq);
pxl8_vec3_simd light_dir = pxl8_vec3_simd_scale(to_light, inv_dist);
pxl8_f32_simd ndotl = pxl8_vec3_simd_dot(ctx->v_normal, light_dir);
ndotl = pxl8_f32_simd_max(ndotl, pxl8_f32_simd_zero());
pxl8_f32_simd falloff = pxl8_f32_simd_sub(
pxl8_f32_simd_set(1.0f),
pxl8_f32_simd_mul(dist_sq, pxl8_f32_simd_set(l->inv_radius_sq))
);
falloff = pxl8_f32_simd_max(falloff, pxl8_f32_simd_zero());
pxl8_f32_simd strength = pxl8_f32_simd_mul(
pxl8_f32_simd_mul(pxl8_f32_simd_set(l->intensity), falloff),
ndotl
);
light = pxl8_f32_simd_add(light, strength);
}
}
light = pxl8_f32_simd_clamp(light, pxl8_f32_simd_zero(), pxl8_f32_simd_set(1.0f));
pxl8_f32_simd light_f = pxl8_f32_simd_mul(light, pxl8_f32_simd_set(255.0f));
f32 light_arr[4];
pxl8_f32_simd_store(light_arr, light_f);
for (u32 i = 0; i < ctx->color_count; i++) {
u8 light_u8;
if (uniforms && uniforms->dither) {
light_u8 = pxl8_gfx_dither(light_arr[i], (u32)px[i], (u32)py[i]);
} else {
light_u8 = (u8)light_arr[i];
}
colors_out[i] = pxl8_colormap_lookup(bindings, tex_idx[i], light_u8);
}
#else
2026-02-02 17:48:25 -06:00
u8 tex_idx = 0;
if (bindings && bindings->atlas) {
tex_idx = pxl8_sample_indexed(bindings, ctx->v_uv);
} else {
if (uniforms && uniforms->dither) {
tex_idx = pxl8_gfx_dither(ctx->v_color, (u32)ctx->x, (u32)ctx->y);
} else {
f32 clamped = pxl8_clamp(ctx->v_color, 0.0f, 255.0f);
tex_idx = (u8)(clamped);
}
}
f32 light = ctx->v_light;
if (uniforms) {
f32 ambient = (f32)uniforms->ambient / 255.0f;
if (ambient > light) light = ambient;
if (uniforms->celestial_intensity > 0.0f) {
f32 ndotl = -(ctx->v_normal.x * uniforms->celestial_dir.x +
ctx->v_normal.y * uniforms->celestial_dir.y +
ctx->v_normal.z * uniforms->celestial_dir.z);
if (ndotl > 0.0f) {
light += ndotl * uniforms->celestial_intensity;
}
}
for (u32 i = 0; i < uniforms->lights_count; i++) {
const pxl8_light* l = &uniforms->lights[i];
f32 lx = l->position.x - ctx->v_world.x;
f32 ly = l->position.y - ctx->v_world.y;
f32 lz = l->position.z - ctx->v_world.z;
f32 dist_sq = lx * lx + ly * ly + lz * lz;
if (dist_sq >= l->radius_sq) continue;
f32 inv_dist = pxl8_fast_inv_sqrt(dist_sq);
f32 nx = lx * inv_dist;
f32 ny = ly * inv_dist;
f32 nz = lz * inv_dist;
f32 ndotl = ctx->v_normal.x * nx + ctx->v_normal.y * ny + ctx->v_normal.z * nz;
if (ndotl <= 0.0f) continue;
f32 falloff = 1.0f - dist_sq * l->inv_radius_sq;
if (falloff <= 0.0f) continue;
f32 strength = l->intensity * falloff * ndotl;
light += strength;
2026-02-02 17:48:25 -06:00
}
}
if (light > 1.0f) light = 1.0f;
if (light < 0.0f) light = 0.0f;
f32 light_f = light * 255.0f;
u8 light_u8 = (u8)light_f;
if (uniforms && uniforms->dither) {
light_u8 = pxl8_gfx_dither(light_f, (u32)ctx->x, (u32)ctx->y);
}
colors_out[0] = pxl8_colormap_lookup(bindings, tex_idx, light_u8);
#endif
2026-02-02 17:48:25 -06:00
}