#include "pxl8_macros.h" #include "pxl8_shader.h" #include "pxl8_shader_builtins.h" void pxl8_shader_lit( const pxl8_shader_ctx* ctx, const pxl8_shader_bindings* bindings, const pxl8_shader_uniforms* uniforms, u8* colors_out ) { #if defined(PXL8_SIMD_SSE) || defined(PXL8_SIMD_NEON) f32 uv_x[4], uv_y[4], color_f[4]; i32 px[4], py[4]; pxl8_f32_simd_store(uv_x, ctx->v_uv.x); pxl8_f32_simd_store(uv_y, ctx->v_uv.y); pxl8_f32_simd_store(color_f, ctx->v_color); pxl8_i32_simd_store(px, ctx->x); pxl8_i32_simd_store(py, ctx->y); u8 tex_idx[4]; for (u32 i = 0; i < ctx->color_count; i++) { if (bindings && bindings->atlas) { tex_idx[i] = pxl8_sample_indexed(bindings, (pxl8_vec2){{ uv_x[i], uv_y[i] }}); } else { if (uniforms && uniforms->dither) { tex_idx[i] = pxl8_gfx_dither(color_f[i], (u32)px[i], (u32)py[i]); } else { f32 clamped = pxl8_clamp(color_f[i], 0.0f, 255.0f); tex_idx[i] = (u8)(clamped); } } } pxl8_f32_simd light = ctx->v_light; if (uniforms) { pxl8_f32_simd ambient = pxl8_f32_simd_set((f32)uniforms->ambient / 255.0f); light = pxl8_f32_simd_max(light, ambient); if (uniforms->celestial_intensity > 0.0f) { pxl8_vec3_simd cel_dir = pxl8_vec3_simd_set(uniforms->celestial_dir); pxl8_f32_simd ndotl = pxl8_f32_simd_sub( pxl8_f32_simd_zero(), pxl8_vec3_simd_dot(ctx->v_normal, cel_dir) ); pxl8_f32_simd cel_contrib = pxl8_f32_simd_mul( pxl8_f32_simd_max(ndotl, pxl8_f32_simd_zero()), pxl8_f32_simd_set(uniforms->celestial_intensity) ); light = pxl8_f32_simd_add(light, cel_contrib); } for (u32 i = 0; i < uniforms->lights_count; i++) { const pxl8_light* l = &uniforms->lights[i]; pxl8_vec3_simd light_pos = pxl8_vec3_simd_set(l->position); pxl8_vec3_simd to_light = pxl8_vec3_simd_sub(light_pos, ctx->v_world); pxl8_f32_simd dist_sq = pxl8_vec3_simd_dot(to_light, to_light); pxl8_f32_simd in_range = pxl8_f32_simd_cmpgt( pxl8_f32_simd_set(l->radius_sq), dist_sq ); if (!pxl8_f32_simd_movemask(in_range)) continue; pxl8_f32_simd inv_dist = pxl8_f32_simd_rsqrt(dist_sq); pxl8_vec3_simd light_dir = pxl8_vec3_simd_scale(to_light, inv_dist); pxl8_f32_simd ndotl = pxl8_vec3_simd_dot(ctx->v_normal, light_dir); ndotl = pxl8_f32_simd_max(ndotl, pxl8_f32_simd_zero()); pxl8_f32_simd falloff = pxl8_f32_simd_sub( pxl8_f32_simd_set(1.0f), pxl8_f32_simd_mul(dist_sq, pxl8_f32_simd_set(l->inv_radius_sq)) ); falloff = pxl8_f32_simd_max(falloff, pxl8_f32_simd_zero()); pxl8_f32_simd strength = pxl8_f32_simd_mul( pxl8_f32_simd_mul(pxl8_f32_simd_set(l->intensity), falloff), ndotl ); light = pxl8_f32_simd_add(light, strength); } } light = pxl8_f32_simd_clamp(light, pxl8_f32_simd_zero(), pxl8_f32_simd_set(1.0f)); pxl8_f32_simd light_f = pxl8_f32_simd_mul(light, pxl8_f32_simd_set(255.0f)); f32 light_arr[4]; pxl8_f32_simd_store(light_arr, light_f); for (u32 i = 0; i < ctx->color_count; i++) { u8 light_u8; if (uniforms && uniforms->dither) { light_u8 = pxl8_gfx_dither(light_arr[i], (u32)px[i], (u32)py[i]); } else { light_u8 = (u8)light_arr[i]; } colors_out[i] = pxl8_colormap_lookup(bindings, tex_idx[i], light_u8); } #else u8 tex_idx = 0; if (bindings && bindings->atlas) { tex_idx = pxl8_sample_indexed(bindings, ctx->v_uv); } else { if (uniforms && uniforms->dither) { tex_idx = pxl8_gfx_dither(ctx->v_color, (u32)ctx->x, (u32)ctx->y); } else { f32 clamped = pxl8_clamp(ctx->v_color, 0.0f, 255.0f); tex_idx = (u8)(clamped); } } f32 light = ctx->v_light; if (uniforms) { f32 ambient = (f32)uniforms->ambient / 255.0f; if (ambient > light) light = ambient; if (uniforms->celestial_intensity > 0.0f) { f32 ndotl = -(ctx->v_normal.x * uniforms->celestial_dir.x + ctx->v_normal.y * uniforms->celestial_dir.y + ctx->v_normal.z * uniforms->celestial_dir.z); if (ndotl > 0.0f) { light += ndotl * uniforms->celestial_intensity; } } for (u32 i = 0; i < uniforms->lights_count; i++) { const pxl8_light* l = &uniforms->lights[i]; f32 lx = l->position.x - ctx->v_world.x; f32 ly = l->position.y - ctx->v_world.y; f32 lz = l->position.z - ctx->v_world.z; f32 dist_sq = lx * lx + ly * ly + lz * lz; if (dist_sq >= l->radius_sq) continue; f32 inv_dist = pxl8_fast_inv_sqrt(dist_sq); f32 nx = lx * inv_dist; f32 ny = ly * inv_dist; f32 nz = lz * inv_dist; f32 ndotl = ctx->v_normal.x * nx + ctx->v_normal.y * ny + ctx->v_normal.z * nz; if (ndotl <= 0.0f) continue; f32 falloff = 1.0f - dist_sq * l->inv_radius_sq; if (falloff <= 0.0f) continue; f32 strength = l->intensity * falloff * ndotl; light += strength; } } if (light > 1.0f) light = 1.0f; if (light < 0.0f) light = 0.0f; f32 light_f = light * 255.0f; u8 light_u8 = (u8)light_f; if (uniforms && uniforms->dither) { light_u8 = pxl8_gfx_dither(light_f, (u32)ctx->x, (u32)ctx->y); } colors_out[0] = pxl8_colormap_lookup(bindings, tex_idx, light_u8); #endif }