use tiled atlas texture sampling, increase shader speed using inv sqrt

This commit is contained in:
asrael 2026-02-04 07:37:20 -06:00
parent e92748c2d9
commit c4226b36fe
34 changed files with 1045 additions and 520 deletions

View file

@ -1,13 +1,18 @@
#include "pxl8_macros.h"
#include "pxl8_shader.h"
#include "pxl8_shader_builtins.h"
u8 pxl8_shader_lit(pxl8_shader_ctx* ctx) {
u8 pxl8_shader_lit(
pxl8_shader_ctx* ctx,
const pxl8_shader_bindings* bindings,
const pxl8_shader_uniforms* uniforms
) {
u8 tex_idx = 0;
if (ctx->bindings && ctx->bindings->texture) {
tex_idx = pxl8_sample_indexed(ctx, ctx->v_uv);
if (tex_idx == 0) return 0;
if (bindings && bindings->atlas) {
tex_idx = pxl8_sample_indexed(bindings, ctx->v_uv);
if (pxl8_unlikely(tex_idx == 0)) return 0;
} else {
if (ctx->uniforms && ctx->uniforms->dither) {
if (uniforms && uniforms->dither) {
tex_idx = pxl8_gfx_dither(ctx->v_color, (u32)ctx->x, (u32)ctx->y);
} else {
f32 clamped = pxl8_clamp(ctx->v_color, 0.0f, 255.0f);
@ -17,24 +22,16 @@ u8 pxl8_shader_lit(pxl8_shader_ctx* ctx) {
f32 light = ctx->v_light;
const pxl8_shader_uniforms* u = ctx->uniforms;
if (u) {
f32 ambient = (f32)u->ambient / 255.0f;
if (uniforms) {
f32 ambient = (f32)uniforms->ambient / 255.0f;
if (ambient > light) light = ambient;
if (u->celestial_intensity > 0.0f) {
f32 dx = u->celestial_dir.x;
f32 dy = u->celestial_dir.y;
f32 dz = u->celestial_dir.z;
f32 len = pxl8_sqrt(dx * dx + dy * dy + dz * dz);
if (len > 0.0001f) {
dx /= len;
dy /= len;
dz /= len;
f32 ndotl = -(ctx->v_normal.x * dx + ctx->v_normal.y * dy + ctx->v_normal.z * dz);
if (ndotl > 0.0f) {
light += ndotl * u->celestial_intensity;
}
if (uniforms->celestial_intensity > 0.0f) {
f32 ndotl = -(ctx->v_normal.x * uniforms->celestial_dir.x +
ctx->v_normal.y * uniforms->celestial_dir.y +
ctx->v_normal.z * uniforms->celestial_dir.z);
if (ndotl > 0.0f) {
light += ndotl * uniforms->celestial_intensity;
}
}
@ -43,16 +40,15 @@ u8 pxl8_shader_lit(pxl8_shader_ctx* ctx) {
f32 dyn_g = 0.0f;
f32 dyn_b = 0.0f;
for (u32 i = 0; i < u->lights_count; i++) {
const pxl8_light* l = &u->lights[i];
for (u32 i = 0; i < uniforms->lights_count; i++) {
const pxl8_light* l = &uniforms->lights[i];
f32 lx = l->position.x - ctx->v_world.x;
f32 ly = l->position.y - ctx->v_world.y;
f32 lz = l->position.z - ctx->v_world.z;
f32 dist_sq = lx * lx + ly * ly + lz * lz;
if (dist_sq >= l->radius_sq) continue;
f32 dist = pxl8_sqrt(dist_sq);
f32 inv_dist = dist > 0.0001f ? (1.0f / dist) : 0.0f;
f32 inv_dist = pxl8_fast_inv_sqrt(dist_sq);
f32 nx = lx * inv_dist;
f32 ny = ly * inv_dist;
f32 nz = lz * inv_dist;
@ -62,6 +58,10 @@ u8 pxl8_shader_lit(pxl8_shader_ctx* ctx) {
f32 falloff = 1.0f - dist_sq * l->inv_radius_sq;
if (falloff <= 0.0f) continue;
if (uniforms->dither && falloff < 0.33f) {
f32 threshold = (PXL8_BAYER_4X4[((u32)ctx->y & 3) * 4 + ((u32)ctx->x & 3)] + 0.5f) * (1.0f / 16.0f);
if (falloff < threshold * 0.33f) continue;
}
f32 strength = ((f32)l->intensity / 255.0f) * falloff * ndotl;
if (strength <= 0.0f) continue;
@ -73,7 +73,7 @@ u8 pxl8_shader_lit(pxl8_shader_ctx* ctx) {
}
if (dyn_strength > 0.0f) {
f32 inv = 1.0f / dyn_strength;
f32 inv = pxl8_fast_rcp(dyn_strength);
u8 r = (u8)pxl8_clamp(dyn_r * inv, 0.0f, 255.0f);
u8 g = (u8)pxl8_clamp(dyn_g * inv, 0.0f, 255.0f);
u8 b = (u8)pxl8_clamp(dyn_b * inv, 0.0f, 255.0f);
@ -88,16 +88,16 @@ u8 pxl8_shader_lit(pxl8_shader_ctx* ctx) {
f32 light_f = light * 255.0f;
u8 light_u8 = (u8)light_f;
if (ctx->uniforms && ctx->uniforms->dither) {
if (uniforms && uniforms->dither) {
light_u8 = pxl8_gfx_dither(light_f, (u32)ctx->x, (u32)ctx->y);
}
u8 shaded = pxl8_colormap_lookup(ctx, tex_idx, light_u8);
u8 shaded = pxl8_colormap_lookup(bindings, tex_idx, light_u8);
if (ctx->uniforms && ctx->uniforms->emissive) {
if (uniforms && uniforms->emissive) {
u32 rgb = 0x00FFFFFF;
if (ctx->bindings && ctx->bindings->palette) {
rgb = ctx->bindings->palette[tex_idx] & 0x00FFFFFF;
if (bindings && bindings->palette) {
rgb = bindings->palette[tex_idx] & 0x00FFFFFF;
}
pxl8_set_light_tint(ctx, rgb, 1.0f);
}