refactor: reorganize pxl8 into client/src/ module structure

- core/: main entry, types, logging, I/O, RNG
- asset/: ase loader, cart, save, embed
- gfx/: graphics, animation, atlas, fonts, tilemap, transitions
- sfx/: audio
- script/: lua/fennel runtime, REPL
- hal/: platform abstraction (SDL3)
- world/: BSP, world, procedural gen
- math/: math utilities
- game/: GUI, replay
- lua/: Lua API modules
This commit is contained in:
asrael 2026-01-12 21:46:31 -06:00
parent 272e0bc615
commit 39b604b333
106 changed files with 6078 additions and 3715 deletions

302
client/src/math/pxl8_math.c Normal file
View file

@ -0,0 +1,302 @@
#include "pxl8_math.h"
pxl8_vec2 pxl8_vec2_add(pxl8_vec2 a, pxl8_vec2 b) {
return (pxl8_vec2){
.x = a.x + b.x,
.y = a.y + b.y,
};
}
pxl8_vec2 pxl8_vec2_sub(pxl8_vec2 a, pxl8_vec2 b) {
return (pxl8_vec2){
.x = a.x - b.x,
.y = a.y - b.y,
};
}
pxl8_vec2 pxl8_vec2_scale(pxl8_vec2 v, f32 s) {
return (pxl8_vec2){
.x = v.x * s,
.y = v.y * s,
};
}
f32 pxl8_vec2_dot(pxl8_vec2 a, pxl8_vec2 b) {
return a.x * b.x + a.y * b.y;
}
f32 pxl8_vec2_length(pxl8_vec2 v) {
return sqrtf(v.x * v.x + v.y * v.y);
}
pxl8_vec2 pxl8_vec2_normalize(pxl8_vec2 v) {
f32 len = pxl8_vec2_length(v);
if (len < 1e-6f) return (pxl8_vec2){0};
return pxl8_vec2_scale(v, 1.0f / len);
}
pxl8_vec3 pxl8_vec3_add(pxl8_vec3 a, pxl8_vec3 b) {
return (pxl8_vec3){
.x = a.x + b.x,
.y = a.y + b.y,
.z = a.z + b.z,
};
}
pxl8_vec3 pxl8_vec3_sub(pxl8_vec3 a, pxl8_vec3 b) {
return (pxl8_vec3){
.x = a.x - b.x,
.y = a.y - b.y,
.z = a.z - b.z,
};
}
pxl8_vec3 pxl8_vec3_scale(pxl8_vec3 v, f32 s) {
return (pxl8_vec3){
.x = v.x * s,
.y = v.y * s,
.z = v.z * s,
};
}
f32 pxl8_vec3_dot(pxl8_vec3 a, pxl8_vec3 b) {
return a.x * b.x + a.y * b.y + a.z * b.z;
}
pxl8_vec3 pxl8_vec3_cross(pxl8_vec3 a, pxl8_vec3 b) {
return (pxl8_vec3){
.x = a.y * b.z - a.z * b.y,
.y = a.z * b.x - a.x * b.z,
.z = a.x * b.y - a.y * b.x,
};
}
f32 pxl8_vec3_length(pxl8_vec3 v) {
return sqrtf(pxl8_vec3_dot(v, v));
}
pxl8_vec3 pxl8_vec3_lerp(pxl8_vec3 a, pxl8_vec3 b, f32 t) {
return (pxl8_vec3){
a.x + (b.x - a.x) * t,
a.y + (b.y - a.y) * t,
a.z + (b.z - a.z) * t
};
}
pxl8_vec3 pxl8_vec3_normalize(pxl8_vec3 v) {
f32 len = pxl8_vec3_length(v);
if (len < 1e-6f) return (pxl8_vec3){0};
return pxl8_vec3_scale(v, 1.0f / len);
}
pxl8_mat4 pxl8_mat4_identity(void) {
pxl8_mat4 mat = {0};
mat.m[0] = mat.m[5] = mat.m[10] = mat.m[15] = 1.0f;
return mat;
}
pxl8_mat4 pxl8_mat4_mul(pxl8_mat4 a, pxl8_mat4 b) {
pxl8_mat4 mat = {0};
for (i32 col = 0; col < 4; col++) {
for (i32 row = 0; row < 4; row++) {
mat.m[col * 4 + row] =
a.m[0 * 4 + row] * b.m[col * 4 + 0] +
a.m[1 * 4 + row] * b.m[col * 4 + 1] +
a.m[2 * 4 + row] * b.m[col * 4 + 2] +
a.m[3 * 4 + row] * b.m[col * 4 + 3];
}
}
return mat;
}
pxl8_vec4 pxl8_mat4_mul_vec4(pxl8_mat4 m, pxl8_vec4 v) {
return (pxl8_vec4){
.x = m.m[0] * v.x + m.m[4] * v.y + m.m[8] * v.z + m.m[12] * v.w,
.y = m.m[1] * v.x + m.m[5] * v.y + m.m[9] * v.z + m.m[13] * v.w,
.z = m.m[2] * v.x + m.m[6] * v.y + m.m[10] * v.z + m.m[14] * v.w,
.w = m.m[3] * v.x + m.m[7] * v.y + m.m[11] * v.z + m.m[15] * v.w,
};
}
pxl8_mat4 pxl8_mat4_translate(f32 x, f32 y, f32 z) {
pxl8_mat4 mat = pxl8_mat4_identity();
mat.m[12] = x;
mat.m[13] = y;
mat.m[14] = z;
return mat;
}
pxl8_mat4 pxl8_mat4_rotate_x(f32 angle) {
pxl8_mat4 mat = pxl8_mat4_identity();
f32 c = cosf(angle);
f32 s = sinf(angle);
mat.m[5] = c;
mat.m[9] = -s;
mat.m[6] = s;
mat.m[10] = c;
return mat;
}
pxl8_mat4 pxl8_mat4_rotate_y(f32 angle) {
pxl8_mat4 mat = pxl8_mat4_identity();
f32 c = cosf(angle);
f32 s = sinf(angle);
mat.m[0] = c;
mat.m[8] = s;
mat.m[2] = -s;
mat.m[10] = c;
return mat;
}
pxl8_mat4 pxl8_mat4_rotate_z(f32 angle) {
pxl8_mat4 mat = pxl8_mat4_identity();
f32 c = cosf(angle);
f32 s = sinf(angle);
mat.m[0] = c;
mat.m[4] = -s;
mat.m[1] = s;
mat.m[5] = c;
return mat;
}
pxl8_mat4 pxl8_mat4_scale(f32 x, f32 y, f32 z) {
pxl8_mat4 mat = pxl8_mat4_identity();
mat.m[0] = x;
mat.m[5] = y;
mat.m[10] = z;
return mat;
}
pxl8_mat4 pxl8_mat4_ortho(f32 left, f32 right, f32 bottom, f32 top, f32 near, f32 far) {
pxl8_mat4 mat = {0};
mat.m[0] = 2.0f / (right - left);
mat.m[5] = 2.0f / (top - bottom);
mat.m[10] = -2.0f / (far - near);
mat.m[12] = -(right + left) / (right - left);
mat.m[13] = -(top + bottom) / (top - bottom);
mat.m[14] = -(far + near) / (far - near);
mat.m[15] = 1.0f;
return mat;
}
pxl8_mat4 pxl8_mat4_perspective(f32 fov, f32 aspect, f32 near, f32 far) {
pxl8_mat4 mat = {0};
f32 tan_half_fov = tanf(fov / 2.0f);
mat.m[0] = 1.0f / (aspect * tan_half_fov);
mat.m[5] = 1.0f / tan_half_fov;
mat.m[10] = -(far + near) / (far - near);
mat.m[14] = -(2.0f * far * near) / (far - near);
mat.m[11] = -1.0f;
return mat;
}
pxl8_mat4 pxl8_mat4_lookat(pxl8_vec3 eye, pxl8_vec3 center, pxl8_vec3 up) {
pxl8_mat4 mat = pxl8_mat4_identity();
pxl8_vec3 f = pxl8_vec3_normalize(pxl8_vec3_sub(center, eye));
pxl8_vec3 s = pxl8_vec3_normalize(pxl8_vec3_cross(f, up));
pxl8_vec3 u = pxl8_vec3_cross(s, f);
mat.m[0] = s.x;
mat.m[4] = s.y;
mat.m[8] = s.z;
mat.m[1] = u.x;
mat.m[5] = u.y;
mat.m[9] = u.z;
mat.m[2] = -f.x;
mat.m[6] = -f.y;
mat.m[10] = -f.z;
mat.m[12] = -pxl8_vec3_dot(s, eye);
mat.m[13] = -pxl8_vec3_dot(u, eye);
mat.m[14] = pxl8_vec3_dot(f, eye);
return mat;
}
pxl8_frustum pxl8_frustum_from_matrix(pxl8_mat4 vp) {
pxl8_frustum frustum;
const f32* m = vp.m;
frustum.planes[0].normal.x = m[3] - m[0];
frustum.planes[0].normal.y = m[7] - m[4];
frustum.planes[0].normal.z = m[11] - m[8];
frustum.planes[0].distance = m[15] - m[12];
frustum.planes[1].normal.x = m[3] + m[0];
frustum.planes[1].normal.y = m[7] + m[4];
frustum.planes[1].normal.z = m[11] + m[8];
frustum.planes[1].distance = m[15] + m[12];
frustum.planes[2].normal.x = m[3] + m[1];
frustum.planes[2].normal.y = m[7] + m[5];
frustum.planes[2].normal.z = m[11] + m[9];
frustum.planes[2].distance = m[15] + m[13];
frustum.planes[3].normal.x = m[3] - m[1];
frustum.planes[3].normal.y = m[7] - m[5];
frustum.planes[3].normal.z = m[11] - m[9];
frustum.planes[3].distance = m[15] - m[13];
frustum.planes[4].normal.x = m[3] - m[2];
frustum.planes[4].normal.y = m[7] - m[6];
frustum.planes[4].normal.z = m[11] - m[10];
frustum.planes[4].distance = m[15] - m[14];
frustum.planes[5].normal.x = m[3] + m[2];
frustum.planes[5].normal.y = m[7] + m[6];
frustum.planes[5].normal.z = m[11] + m[10];
frustum.planes[5].distance = m[15] + m[14];
for (i32 i = 0; i < 6; i++) {
f32 len = pxl8_vec3_length(frustum.planes[i].normal);
if (len > 1e-6f) {
f32 inv_len = 1.0f / len;
frustum.planes[i].normal = pxl8_vec3_scale(frustum.planes[i].normal, inv_len);
frustum.planes[i].distance *= inv_len;
}
}
return frustum;
}
bool pxl8_frustum_test_aabb(const pxl8_frustum* frustum, pxl8_vec3 min, pxl8_vec3 max) {
for (i32 i = 0; i < 6; i++) {
pxl8_vec3 normal = frustum->planes[i].normal;
f32 d = frustum->planes[i].distance;
pxl8_vec3 p_vertex = {
(normal.x >= 0.0f) ? max.x : min.x,
(normal.y >= 0.0f) ? max.y : min.y,
(normal.z >= 0.0f) ? max.z : min.z
};
f32 p_dist = pxl8_vec3_dot(normal, p_vertex) + d;
if (p_dist < 0.0f) {
return false;
}
}
return true;
}

View file

@ -0,0 +1,72 @@
#pragma once
#include <math.h>
#include "pxl8_types.h"
#define PXL8_PI 3.14159265358979323846f
#define PXL8_TAU (PXL8_PI * 2.0f)
typedef struct pxl8_vec2 {
f32 x, y;
} pxl8_vec2;
typedef struct pxl8_vec3 {
f32 x, y, z;
} pxl8_vec3;
typedef struct pxl8_vec4 {
f32 x, y, z, w;
} pxl8_vec4;
typedef struct pxl8_mat4 {
f32 m[16];
} pxl8_mat4;
typedef struct pxl8_plane {
pxl8_vec3 normal;
f32 distance;
} pxl8_plane;
typedef struct pxl8_frustum {
pxl8_plane planes[6];
} pxl8_frustum;
#ifdef __cplusplus
extern "C" {
#endif
pxl8_vec2 pxl8_vec2_add(pxl8_vec2 a, pxl8_vec2 b);
f32 pxl8_vec2_dot(pxl8_vec2 a, pxl8_vec2 b);
f32 pxl8_vec2_length(pxl8_vec2 v);
pxl8_vec2 pxl8_vec2_normalize(pxl8_vec2 v);
pxl8_vec2 pxl8_vec2_scale(pxl8_vec2 v, f32 s);
pxl8_vec2 pxl8_vec2_sub(pxl8_vec2 a, pxl8_vec2 b);
pxl8_vec3 pxl8_vec3_add(pxl8_vec3 a, pxl8_vec3 b);
pxl8_vec3 pxl8_vec3_cross(pxl8_vec3 a, pxl8_vec3 b);
f32 pxl8_vec3_dot(pxl8_vec3 a, pxl8_vec3 b);
f32 pxl8_vec3_length(pxl8_vec3 v);
pxl8_vec3 pxl8_vec3_lerp(pxl8_vec3 a, pxl8_vec3 b, f32 t);
pxl8_vec3 pxl8_vec3_normalize(pxl8_vec3 v);
pxl8_vec3 pxl8_vec3_scale(pxl8_vec3 v, f32 s);
pxl8_vec3 pxl8_vec3_sub(pxl8_vec3 a, pxl8_vec3 b);
pxl8_mat4 pxl8_mat4_identity(void);
pxl8_mat4 pxl8_mat4_lookat(pxl8_vec3 eye, pxl8_vec3 center, pxl8_vec3 up);
pxl8_mat4 pxl8_mat4_mul(pxl8_mat4 a, pxl8_mat4 b);
pxl8_vec4 pxl8_mat4_mul_vec4(pxl8_mat4 m, pxl8_vec4 v);
pxl8_mat4 pxl8_mat4_ortho(f32 left, f32 right, f32 bottom, f32 top, f32 near, f32 far);
pxl8_mat4 pxl8_mat4_perspective(f32 fov, f32 aspect, f32 near, f32 far);
pxl8_mat4 pxl8_mat4_rotate_x(f32 angle);
pxl8_mat4 pxl8_mat4_rotate_y(f32 angle);
pxl8_mat4 pxl8_mat4_rotate_z(f32 angle);
pxl8_mat4 pxl8_mat4_scale(f32 x, f32 y, f32 z);
pxl8_mat4 pxl8_mat4_translate(f32 x, f32 y, f32 z);
pxl8_frustum pxl8_frustum_from_matrix(pxl8_mat4 vp);
bool pxl8_frustum_test_aabb(const pxl8_frustum* frustum, pxl8_vec3 min, pxl8_vec3 max);
#ifdef __cplusplus
}
#endif

299
client/src/math/pxl8_simd.h Normal file
View file

@ -0,0 +1,299 @@
#pragma once
#include "pxl8_types.h"
#if defined(__x86_64__) || defined(_M_X64)
#define PXL8_SIMD_SSE2 1
#include <emmintrin.h>
#elif defined(__aarch64__) || defined(_M_ARM64)
#define PXL8_SIMD_NEON 1
#include <arm_neon.h>
#else
#define PXL8_SIMD_SCALAR 1
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if defined(PXL8_SIMD_SSE2)
typedef struct { __m128 v; } pxl8_f32x4;
typedef struct { __m128i v; } pxl8_i32x4;
typedef struct { __m128i v; } pxl8_u16x8;
static inline pxl8_f32x4 pxl8_f32x4_splat(f32 x) {
return (pxl8_f32x4){ _mm_set1_ps(x) };
}
static inline pxl8_f32x4 pxl8_f32x4_new(f32 a, f32 b, f32 c, f32 d) {
return (pxl8_f32x4){ _mm_set_ps(d, c, b, a) };
}
static inline pxl8_f32x4 pxl8_f32x4_add(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_add_ps(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_sub(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_sub_ps(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_mul(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_mul_ps(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_div(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_div_ps(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_min(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_min_ps(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_max(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_max_ps(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_cmplt(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ _mm_cmplt_ps(a.v, b.v) };
}
static inline i32 pxl8_f32x4_movemask(pxl8_f32x4 a) {
return _mm_movemask_ps(a.v);
}
static inline pxl8_i32x4 pxl8_f32x4_to_i32x4(pxl8_f32x4 a) {
return (pxl8_i32x4){ _mm_cvttps_epi32(a.v) };
}
static inline void pxl8_f32x4_store(pxl8_f32x4 a, f32* out) {
_mm_storeu_ps(out, a.v);
}
static inline pxl8_i32x4 pxl8_i32x4_splat(i32 x) {
return (pxl8_i32x4){ _mm_set1_epi32(x) };
}
static inline pxl8_i32x4 pxl8_i32x4_slli(pxl8_i32x4 a, i32 n) {
return (pxl8_i32x4){ _mm_slli_epi32(a.v, n) };
}
static inline pxl8_i32x4 pxl8_i32x4_srai(pxl8_i32x4 a, i32 n) {
return (pxl8_i32x4){ _mm_srai_epi32(a.v, n) };
}
static inline pxl8_i32x4 pxl8_i32x4_and(pxl8_i32x4 a, pxl8_i32x4 b) {
return (pxl8_i32x4){ _mm_and_si128(a.v, b.v) };
}
static inline pxl8_i32x4 pxl8_i32x4_or(pxl8_i32x4 a, pxl8_i32x4 b) {
return (pxl8_i32x4){ _mm_or_si128(a.v, b.v) };
}
static inline void pxl8_i32x4_store(pxl8_i32x4 a, i32* out) {
_mm_storeu_si128((__m128i*)out, a.v);
}
static inline pxl8_u16x8 pxl8_u16x8_cmplt(pxl8_u16x8 a, pxl8_u16x8 b) {
return (pxl8_u16x8){ _mm_cmplt_epi16(a.v, b.v) };
}
static inline pxl8_u16x8 pxl8_u16x8_blend(pxl8_u16x8 a, pxl8_u16x8 b, pxl8_u16x8 mask) {
__m128i not_mask = _mm_andnot_si128(mask.v, a.v);
__m128i and_mask = _mm_and_si128(mask.v, b.v);
return (pxl8_u16x8){ _mm_or_si128(not_mask, and_mask) };
}
static inline i32 pxl8_u16x8_movemask(pxl8_u16x8 a) {
return _mm_movemask_epi8(a.v);
}
#elif defined(PXL8_SIMD_NEON)
typedef struct { float32x4_t v; } pxl8_f32x4;
typedef struct { int32x4_t v; } pxl8_i32x4;
typedef struct { uint16x8_t v; } pxl8_u16x8;
static inline pxl8_f32x4 pxl8_f32x4_splat(f32 x) {
return (pxl8_f32x4){ vdupq_n_f32(x) };
}
static inline pxl8_f32x4 pxl8_f32x4_new(f32 a, f32 b, f32 c, f32 d) {
f32 arr[4] = {a, b, c, d};
return (pxl8_f32x4){ vld1q_f32(arr) };
}
static inline pxl8_f32x4 pxl8_f32x4_add(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ vaddq_f32(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_sub(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ vsubq_f32(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_mul(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ vmulq_f32(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_div(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ vdivq_f32(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_min(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ vminq_f32(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_max(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){ vmaxq_f32(a.v, b.v) };
}
static inline pxl8_f32x4 pxl8_f32x4_cmplt(pxl8_f32x4 a, pxl8_f32x4 b) {
uint32x4_t cmp = vcltq_f32(a.v, b.v);
return (pxl8_f32x4){ vreinterpretq_f32_u32(cmp) };
}
static inline i32 pxl8_f32x4_movemask(pxl8_f32x4 a) {
uint32x4_t input = vreinterpretq_u32_f32(a.v);
static const i32 shifts[4] = {0, 1, 2, 3};
uint32x4_t shifted = vshrq_n_u32(input, 31);
return vgetq_lane_u32(shifted, 0) | (vgetq_lane_u32(shifted, 1) << 1) |
(vgetq_lane_u32(shifted, 2) << 2) | (vgetq_lane_u32(shifted, 3) << 3);
}
static inline pxl8_i32x4 pxl8_f32x4_to_i32x4(pxl8_f32x4 a) {
return (pxl8_i32x4){ vcvtq_s32_f32(a.v) };
}
static inline void pxl8_f32x4_store(pxl8_f32x4 a, f32* out) {
vst1q_f32(out, a.v);
}
static inline pxl8_i32x4 pxl8_i32x4_splat(i32 x) {
return (pxl8_i32x4){ vdupq_n_s32(x) };
}
static inline pxl8_i32x4 pxl8_i32x4_slli(pxl8_i32x4 a, i32 n) {
return (pxl8_i32x4){ vshlq_s32(a.v, vdupq_n_s32(n)) };
}
static inline pxl8_i32x4 pxl8_i32x4_srai(pxl8_i32x4 a, i32 n) {
return (pxl8_i32x4){ vshlq_s32(a.v, vdupq_n_s32(-n)) };
}
static inline pxl8_i32x4 pxl8_i32x4_and(pxl8_i32x4 a, pxl8_i32x4 b) {
return (pxl8_i32x4){ vandq_s32(a.v, b.v) };
}
static inline pxl8_i32x4 pxl8_i32x4_or(pxl8_i32x4 a, pxl8_i32x4 b) {
return (pxl8_i32x4){ vorrq_s32(a.v, b.v) };
}
static inline void pxl8_i32x4_store(pxl8_i32x4 a, i32* out) {
vst1q_s32(out, a.v);
}
#else
typedef struct { f32 v[4]; } pxl8_f32x4;
typedef struct { i32 v[4]; } pxl8_i32x4;
typedef struct { u16 v[8]; } pxl8_u16x8;
static inline pxl8_f32x4 pxl8_f32x4_splat(f32 x) {
return (pxl8_f32x4){{ x, x, x, x }};
}
static inline pxl8_f32x4 pxl8_f32x4_new(f32 a, f32 b, f32 c, f32 d) {
return (pxl8_f32x4){{ a, b, c, d }};
}
static inline pxl8_f32x4 pxl8_f32x4_add(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){{ a.v[0]+b.v[0], a.v[1]+b.v[1], a.v[2]+b.v[2], a.v[3]+b.v[3] }};
}
static inline pxl8_f32x4 pxl8_f32x4_sub(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){{ a.v[0]-b.v[0], a.v[1]-b.v[1], a.v[2]-b.v[2], a.v[3]-b.v[3] }};
}
static inline pxl8_f32x4 pxl8_f32x4_mul(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){{ a.v[0]*b.v[0], a.v[1]*b.v[1], a.v[2]*b.v[2], a.v[3]*b.v[3] }};
}
static inline pxl8_f32x4 pxl8_f32x4_div(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){{ a.v[0]/b.v[0], a.v[1]/b.v[1], a.v[2]/b.v[2], a.v[3]/b.v[3] }};
}
static inline pxl8_f32x4 pxl8_f32x4_min(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){{
a.v[0]<b.v[0]?a.v[0]:b.v[0], a.v[1]<b.v[1]?a.v[1]:b.v[1],
a.v[2]<b.v[2]?a.v[2]:b.v[2], a.v[3]<b.v[3]?a.v[3]:b.v[3]
}};
}
static inline pxl8_f32x4 pxl8_f32x4_max(pxl8_f32x4 a, pxl8_f32x4 b) {
return (pxl8_f32x4){{
a.v[0]>b.v[0]?a.v[0]:b.v[0], a.v[1]>b.v[1]?a.v[1]:b.v[1],
a.v[2]>b.v[2]?a.v[2]:b.v[2], a.v[3]>b.v[3]?a.v[3]:b.v[3]
}};
}
static inline pxl8_f32x4 pxl8_f32x4_cmplt(pxl8_f32x4 a, pxl8_f32x4 b) {
pxl8_f32x4 r;
u32* rv = (u32*)r.v;
rv[0] = a.v[0] < b.v[0] ? 0xFFFFFFFF : 0;
rv[1] = a.v[1] < b.v[1] ? 0xFFFFFFFF : 0;
rv[2] = a.v[2] < b.v[2] ? 0xFFFFFFFF : 0;
rv[3] = a.v[3] < b.v[3] ? 0xFFFFFFFF : 0;
return r;
}
static inline i32 pxl8_f32x4_movemask(pxl8_f32x4 a) {
u32* av = (u32*)a.v;
return ((av[0] >> 31) & 1) | ((av[1] >> 31) & 1) << 1 |
((av[2] >> 31) & 1) << 2 | ((av[3] >> 31) & 1) << 3;
}
static inline pxl8_i32x4 pxl8_f32x4_to_i32x4(pxl8_f32x4 a) {
return (pxl8_i32x4){{ (i32)a.v[0], (i32)a.v[1], (i32)a.v[2], (i32)a.v[3] }};
}
static inline void pxl8_f32x4_store(pxl8_f32x4 a, f32* out) {
out[0] = a.v[0]; out[1] = a.v[1]; out[2] = a.v[2]; out[3] = a.v[3];
}
static inline pxl8_i32x4 pxl8_i32x4_splat(i32 x) {
return (pxl8_i32x4){{ x, x, x, x }};
}
static inline pxl8_i32x4 pxl8_i32x4_slli(pxl8_i32x4 a, i32 n) {
return (pxl8_i32x4){{ a.v[0]<<n, a.v[1]<<n, a.v[2]<<n, a.v[3]<<n }};
}
static inline pxl8_i32x4 pxl8_i32x4_srai(pxl8_i32x4 a, i32 n) {
return (pxl8_i32x4){{ a.v[0]>>n, a.v[1]>>n, a.v[2]>>n, a.v[3]>>n }};
}
static inline pxl8_i32x4 pxl8_i32x4_and(pxl8_i32x4 a, pxl8_i32x4 b) {
return (pxl8_i32x4){{ a.v[0]&b.v[0], a.v[1]&b.v[1], a.v[2]&b.v[2], a.v[3]&b.v[3] }};
}
static inline pxl8_i32x4 pxl8_i32x4_or(pxl8_i32x4 a, pxl8_i32x4 b) {
return (pxl8_i32x4){{ a.v[0]|b.v[0], a.v[1]|b.v[1], a.v[2]|b.v[2], a.v[3]|b.v[3] }};
}
static inline void pxl8_i32x4_store(pxl8_i32x4 a, i32* out) {
out[0] = a.v[0]; out[1] = a.v[1]; out[2] = a.v[2]; out[3] = a.v[3];
}
#endif
static inline f32 pxl8_fast_inv_sqrt(f32 x) {
f32 half = 0.5f * x;
i32 i = *(i32*)&x;
i = 0x5f375a86 - (i >> 1);
f32 y = *(f32*)&i;
return y * (1.5f - half * y * y);
}
#ifdef __cplusplus
}
#endif