refactor: reorganize pxl8 into client/src/ module structure
- core/: main entry, types, logging, I/O, RNG - asset/: ase loader, cart, save, embed - gfx/: graphics, animation, atlas, fonts, tilemap, transitions - sfx/: audio - script/: lua/fennel runtime, REPL - hal/: platform abstraction (SDL3) - world/: BSP, world, procedural gen - math/: math utilities - game/: GUI, replay - lua/: Lua API modules
This commit is contained in:
parent
272e0bc615
commit
39b604b333
106 changed files with 6078 additions and 3715 deletions
302
client/src/math/pxl8_math.c
Normal file
302
client/src/math/pxl8_math.c
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
#include "pxl8_math.h"
|
||||
|
||||
pxl8_vec2 pxl8_vec2_add(pxl8_vec2 a, pxl8_vec2 b) {
|
||||
return (pxl8_vec2){
|
||||
.x = a.x + b.x,
|
||||
.y = a.y + b.y,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec2 pxl8_vec2_sub(pxl8_vec2 a, pxl8_vec2 b) {
|
||||
return (pxl8_vec2){
|
||||
.x = a.x - b.x,
|
||||
.y = a.y - b.y,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec2 pxl8_vec2_scale(pxl8_vec2 v, f32 s) {
|
||||
return (pxl8_vec2){
|
||||
.x = v.x * s,
|
||||
.y = v.y * s,
|
||||
};
|
||||
}
|
||||
|
||||
f32 pxl8_vec2_dot(pxl8_vec2 a, pxl8_vec2 b) {
|
||||
return a.x * b.x + a.y * b.y;
|
||||
}
|
||||
|
||||
f32 pxl8_vec2_length(pxl8_vec2 v) {
|
||||
return sqrtf(v.x * v.x + v.y * v.y);
|
||||
}
|
||||
|
||||
pxl8_vec2 pxl8_vec2_normalize(pxl8_vec2 v) {
|
||||
f32 len = pxl8_vec2_length(v);
|
||||
|
||||
if (len < 1e-6f) return (pxl8_vec2){0};
|
||||
|
||||
return pxl8_vec2_scale(v, 1.0f / len);
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_add(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
return (pxl8_vec3){
|
||||
.x = a.x + b.x,
|
||||
.y = a.y + b.y,
|
||||
.z = a.z + b.z,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_sub(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
return (pxl8_vec3){
|
||||
.x = a.x - b.x,
|
||||
.y = a.y - b.y,
|
||||
.z = a.z - b.z,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_scale(pxl8_vec3 v, f32 s) {
|
||||
return (pxl8_vec3){
|
||||
.x = v.x * s,
|
||||
.y = v.y * s,
|
||||
.z = v.z * s,
|
||||
};
|
||||
}
|
||||
|
||||
f32 pxl8_vec3_dot(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_cross(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
return (pxl8_vec3){
|
||||
.x = a.y * b.z - a.z * b.y,
|
||||
.y = a.z * b.x - a.x * b.z,
|
||||
.z = a.x * b.y - a.y * b.x,
|
||||
};
|
||||
}
|
||||
|
||||
f32 pxl8_vec3_length(pxl8_vec3 v) {
|
||||
return sqrtf(pxl8_vec3_dot(v, v));
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_lerp(pxl8_vec3 a, pxl8_vec3 b, f32 t) {
|
||||
return (pxl8_vec3){
|
||||
a.x + (b.x - a.x) * t,
|
||||
a.y + (b.y - a.y) * t,
|
||||
a.z + (b.z - a.z) * t
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_normalize(pxl8_vec3 v) {
|
||||
f32 len = pxl8_vec3_length(v);
|
||||
|
||||
if (len < 1e-6f) return (pxl8_vec3){0};
|
||||
|
||||
return pxl8_vec3_scale(v, 1.0f / len);
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_identity(void) {
|
||||
pxl8_mat4 mat = {0};
|
||||
|
||||
mat.m[0] = mat.m[5] = mat.m[10] = mat.m[15] = 1.0f;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_mul(pxl8_mat4 a, pxl8_mat4 b) {
|
||||
pxl8_mat4 mat = {0};
|
||||
|
||||
for (i32 col = 0; col < 4; col++) {
|
||||
for (i32 row = 0; row < 4; row++) {
|
||||
mat.m[col * 4 + row] =
|
||||
a.m[0 * 4 + row] * b.m[col * 4 + 0] +
|
||||
a.m[1 * 4 + row] * b.m[col * 4 + 1] +
|
||||
a.m[2 * 4 + row] * b.m[col * 4 + 2] +
|
||||
a.m[3 * 4 + row] * b.m[col * 4 + 3];
|
||||
}
|
||||
}
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_vec4 pxl8_mat4_mul_vec4(pxl8_mat4 m, pxl8_vec4 v) {
|
||||
return (pxl8_vec4){
|
||||
.x = m.m[0] * v.x + m.m[4] * v.y + m.m[8] * v.z + m.m[12] * v.w,
|
||||
.y = m.m[1] * v.x + m.m[5] * v.y + m.m[9] * v.z + m.m[13] * v.w,
|
||||
.z = m.m[2] * v.x + m.m[6] * v.y + m.m[10] * v.z + m.m[14] * v.w,
|
||||
.w = m.m[3] * v.x + m.m[7] * v.y + m.m[11] * v.z + m.m[15] * v.w,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_translate(f32 x, f32 y, f32 z) {
|
||||
pxl8_mat4 mat = pxl8_mat4_identity();
|
||||
|
||||
mat.m[12] = x;
|
||||
mat.m[13] = y;
|
||||
mat.m[14] = z;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_rotate_x(f32 angle) {
|
||||
pxl8_mat4 mat = pxl8_mat4_identity();
|
||||
f32 c = cosf(angle);
|
||||
f32 s = sinf(angle);
|
||||
|
||||
mat.m[5] = c;
|
||||
mat.m[9] = -s;
|
||||
mat.m[6] = s;
|
||||
mat.m[10] = c;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_rotate_y(f32 angle) {
|
||||
pxl8_mat4 mat = pxl8_mat4_identity();
|
||||
f32 c = cosf(angle);
|
||||
f32 s = sinf(angle);
|
||||
|
||||
mat.m[0] = c;
|
||||
mat.m[8] = s;
|
||||
mat.m[2] = -s;
|
||||
mat.m[10] = c;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_rotate_z(f32 angle) {
|
||||
pxl8_mat4 mat = pxl8_mat4_identity();
|
||||
f32 c = cosf(angle);
|
||||
f32 s = sinf(angle);
|
||||
|
||||
mat.m[0] = c;
|
||||
mat.m[4] = -s;
|
||||
mat.m[1] = s;
|
||||
mat.m[5] = c;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_scale(f32 x, f32 y, f32 z) {
|
||||
pxl8_mat4 mat = pxl8_mat4_identity();
|
||||
|
||||
mat.m[0] = x;
|
||||
mat.m[5] = y;
|
||||
mat.m[10] = z;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_ortho(f32 left, f32 right, f32 bottom, f32 top, f32 near, f32 far) {
|
||||
pxl8_mat4 mat = {0};
|
||||
|
||||
mat.m[0] = 2.0f / (right - left);
|
||||
mat.m[5] = 2.0f / (top - bottom);
|
||||
mat.m[10] = -2.0f / (far - near);
|
||||
mat.m[12] = -(right + left) / (right - left);
|
||||
mat.m[13] = -(top + bottom) / (top - bottom);
|
||||
mat.m[14] = -(far + near) / (far - near);
|
||||
mat.m[15] = 1.0f;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_perspective(f32 fov, f32 aspect, f32 near, f32 far) {
|
||||
pxl8_mat4 mat = {0};
|
||||
f32 tan_half_fov = tanf(fov / 2.0f);
|
||||
|
||||
mat.m[0] = 1.0f / (aspect * tan_half_fov);
|
||||
mat.m[5] = 1.0f / tan_half_fov;
|
||||
mat.m[10] = -(far + near) / (far - near);
|
||||
mat.m[14] = -(2.0f * far * near) / (far - near);
|
||||
mat.m[11] = -1.0f;
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_mat4 pxl8_mat4_lookat(pxl8_vec3 eye, pxl8_vec3 center, pxl8_vec3 up) {
|
||||
pxl8_mat4 mat = pxl8_mat4_identity();
|
||||
pxl8_vec3 f = pxl8_vec3_normalize(pxl8_vec3_sub(center, eye));
|
||||
pxl8_vec3 s = pxl8_vec3_normalize(pxl8_vec3_cross(f, up));
|
||||
pxl8_vec3 u = pxl8_vec3_cross(s, f);
|
||||
|
||||
mat.m[0] = s.x;
|
||||
mat.m[4] = s.y;
|
||||
mat.m[8] = s.z;
|
||||
mat.m[1] = u.x;
|
||||
mat.m[5] = u.y;
|
||||
mat.m[9] = u.z;
|
||||
mat.m[2] = -f.x;
|
||||
mat.m[6] = -f.y;
|
||||
mat.m[10] = -f.z;
|
||||
mat.m[12] = -pxl8_vec3_dot(s, eye);
|
||||
mat.m[13] = -pxl8_vec3_dot(u, eye);
|
||||
mat.m[14] = pxl8_vec3_dot(f, eye);
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
pxl8_frustum pxl8_frustum_from_matrix(pxl8_mat4 vp) {
|
||||
pxl8_frustum frustum;
|
||||
const f32* m = vp.m;
|
||||
|
||||
frustum.planes[0].normal.x = m[3] - m[0];
|
||||
frustum.planes[0].normal.y = m[7] - m[4];
|
||||
frustum.planes[0].normal.z = m[11] - m[8];
|
||||
frustum.planes[0].distance = m[15] - m[12];
|
||||
|
||||
frustum.planes[1].normal.x = m[3] + m[0];
|
||||
frustum.planes[1].normal.y = m[7] + m[4];
|
||||
frustum.planes[1].normal.z = m[11] + m[8];
|
||||
frustum.planes[1].distance = m[15] + m[12];
|
||||
|
||||
frustum.planes[2].normal.x = m[3] + m[1];
|
||||
frustum.planes[2].normal.y = m[7] + m[5];
|
||||
frustum.planes[2].normal.z = m[11] + m[9];
|
||||
frustum.planes[2].distance = m[15] + m[13];
|
||||
|
||||
frustum.planes[3].normal.x = m[3] - m[1];
|
||||
frustum.planes[3].normal.y = m[7] - m[5];
|
||||
frustum.planes[3].normal.z = m[11] - m[9];
|
||||
frustum.planes[3].distance = m[15] - m[13];
|
||||
|
||||
frustum.planes[4].normal.x = m[3] - m[2];
|
||||
frustum.planes[4].normal.y = m[7] - m[6];
|
||||
frustum.planes[4].normal.z = m[11] - m[10];
|
||||
frustum.planes[4].distance = m[15] - m[14];
|
||||
|
||||
frustum.planes[5].normal.x = m[3] + m[2];
|
||||
frustum.planes[5].normal.y = m[7] + m[6];
|
||||
frustum.planes[5].normal.z = m[11] + m[10];
|
||||
frustum.planes[5].distance = m[15] + m[14];
|
||||
|
||||
for (i32 i = 0; i < 6; i++) {
|
||||
f32 len = pxl8_vec3_length(frustum.planes[i].normal);
|
||||
if (len > 1e-6f) {
|
||||
f32 inv_len = 1.0f / len;
|
||||
frustum.planes[i].normal = pxl8_vec3_scale(frustum.planes[i].normal, inv_len);
|
||||
frustum.planes[i].distance *= inv_len;
|
||||
}
|
||||
}
|
||||
|
||||
return frustum;
|
||||
}
|
||||
|
||||
bool pxl8_frustum_test_aabb(const pxl8_frustum* frustum, pxl8_vec3 min, pxl8_vec3 max) {
|
||||
for (i32 i = 0; i < 6; i++) {
|
||||
pxl8_vec3 normal = frustum->planes[i].normal;
|
||||
f32 d = frustum->planes[i].distance;
|
||||
|
||||
pxl8_vec3 p_vertex = {
|
||||
(normal.x >= 0.0f) ? max.x : min.x,
|
||||
(normal.y >= 0.0f) ? max.y : min.y,
|
||||
(normal.z >= 0.0f) ? max.z : min.z
|
||||
};
|
||||
|
||||
f32 p_dist = pxl8_vec3_dot(normal, p_vertex) + d;
|
||||
|
||||
if (p_dist < 0.0f) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
72
client/src/math/pxl8_math.h
Normal file
72
client/src/math/pxl8_math.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#pragma once
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "pxl8_types.h"
|
||||
|
||||
#define PXL8_PI 3.14159265358979323846f
|
||||
#define PXL8_TAU (PXL8_PI * 2.0f)
|
||||
|
||||
typedef struct pxl8_vec2 {
|
||||
f32 x, y;
|
||||
} pxl8_vec2;
|
||||
|
||||
typedef struct pxl8_vec3 {
|
||||
f32 x, y, z;
|
||||
} pxl8_vec3;
|
||||
|
||||
typedef struct pxl8_vec4 {
|
||||
f32 x, y, z, w;
|
||||
} pxl8_vec4;
|
||||
|
||||
typedef struct pxl8_mat4 {
|
||||
f32 m[16];
|
||||
} pxl8_mat4;
|
||||
|
||||
typedef struct pxl8_plane {
|
||||
pxl8_vec3 normal;
|
||||
f32 distance;
|
||||
} pxl8_plane;
|
||||
|
||||
typedef struct pxl8_frustum {
|
||||
pxl8_plane planes[6];
|
||||
} pxl8_frustum;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
pxl8_vec2 pxl8_vec2_add(pxl8_vec2 a, pxl8_vec2 b);
|
||||
f32 pxl8_vec2_dot(pxl8_vec2 a, pxl8_vec2 b);
|
||||
f32 pxl8_vec2_length(pxl8_vec2 v);
|
||||
pxl8_vec2 pxl8_vec2_normalize(pxl8_vec2 v);
|
||||
pxl8_vec2 pxl8_vec2_scale(pxl8_vec2 v, f32 s);
|
||||
pxl8_vec2 pxl8_vec2_sub(pxl8_vec2 a, pxl8_vec2 b);
|
||||
|
||||
pxl8_vec3 pxl8_vec3_add(pxl8_vec3 a, pxl8_vec3 b);
|
||||
pxl8_vec3 pxl8_vec3_cross(pxl8_vec3 a, pxl8_vec3 b);
|
||||
f32 pxl8_vec3_dot(pxl8_vec3 a, pxl8_vec3 b);
|
||||
f32 pxl8_vec3_length(pxl8_vec3 v);
|
||||
pxl8_vec3 pxl8_vec3_lerp(pxl8_vec3 a, pxl8_vec3 b, f32 t);
|
||||
pxl8_vec3 pxl8_vec3_normalize(pxl8_vec3 v);
|
||||
pxl8_vec3 pxl8_vec3_scale(pxl8_vec3 v, f32 s);
|
||||
pxl8_vec3 pxl8_vec3_sub(pxl8_vec3 a, pxl8_vec3 b);
|
||||
|
||||
pxl8_mat4 pxl8_mat4_identity(void);
|
||||
pxl8_mat4 pxl8_mat4_lookat(pxl8_vec3 eye, pxl8_vec3 center, pxl8_vec3 up);
|
||||
pxl8_mat4 pxl8_mat4_mul(pxl8_mat4 a, pxl8_mat4 b);
|
||||
pxl8_vec4 pxl8_mat4_mul_vec4(pxl8_mat4 m, pxl8_vec4 v);
|
||||
pxl8_mat4 pxl8_mat4_ortho(f32 left, f32 right, f32 bottom, f32 top, f32 near, f32 far);
|
||||
pxl8_mat4 pxl8_mat4_perspective(f32 fov, f32 aspect, f32 near, f32 far);
|
||||
pxl8_mat4 pxl8_mat4_rotate_x(f32 angle);
|
||||
pxl8_mat4 pxl8_mat4_rotate_y(f32 angle);
|
||||
pxl8_mat4 pxl8_mat4_rotate_z(f32 angle);
|
||||
pxl8_mat4 pxl8_mat4_scale(f32 x, f32 y, f32 z);
|
||||
pxl8_mat4 pxl8_mat4_translate(f32 x, f32 y, f32 z);
|
||||
|
||||
pxl8_frustum pxl8_frustum_from_matrix(pxl8_mat4 vp);
|
||||
bool pxl8_frustum_test_aabb(const pxl8_frustum* frustum, pxl8_vec3 min, pxl8_vec3 max);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
299
client/src/math/pxl8_simd.h
Normal file
299
client/src/math/pxl8_simd.h
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
#pragma once
|
||||
|
||||
#include "pxl8_types.h"
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#define PXL8_SIMD_SSE2 1
|
||||
#include <emmintrin.h>
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define PXL8_SIMD_NEON 1
|
||||
#include <arm_neon.h>
|
||||
#else
|
||||
#define PXL8_SIMD_SCALAR 1
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(PXL8_SIMD_SSE2)
|
||||
|
||||
typedef struct { __m128 v; } pxl8_f32x4;
|
||||
typedef struct { __m128i v; } pxl8_i32x4;
|
||||
typedef struct { __m128i v; } pxl8_u16x8;
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_splat(f32 x) {
|
||||
return (pxl8_f32x4){ _mm_set1_ps(x) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_new(f32 a, f32 b, f32 c, f32 d) {
|
||||
return (pxl8_f32x4){ _mm_set_ps(d, c, b, a) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_add(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_add_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_sub(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_sub_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_mul(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_mul_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_div(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_div_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_min(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_min_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_max(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_max_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_cmplt(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ _mm_cmplt_ps(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline i32 pxl8_f32x4_movemask(pxl8_f32x4 a) {
|
||||
return _mm_movemask_ps(a.v);
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_f32x4_to_i32x4(pxl8_f32x4 a) {
|
||||
return (pxl8_i32x4){ _mm_cvttps_epi32(a.v) };
|
||||
}
|
||||
|
||||
static inline void pxl8_f32x4_store(pxl8_f32x4 a, f32* out) {
|
||||
_mm_storeu_ps(out, a.v);
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_splat(i32 x) {
|
||||
return (pxl8_i32x4){ _mm_set1_epi32(x) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_slli(pxl8_i32x4 a, i32 n) {
|
||||
return (pxl8_i32x4){ _mm_slli_epi32(a.v, n) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_srai(pxl8_i32x4 a, i32 n) {
|
||||
return (pxl8_i32x4){ _mm_srai_epi32(a.v, n) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_and(pxl8_i32x4 a, pxl8_i32x4 b) {
|
||||
return (pxl8_i32x4){ _mm_and_si128(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_or(pxl8_i32x4 a, pxl8_i32x4 b) {
|
||||
return (pxl8_i32x4){ _mm_or_si128(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline void pxl8_i32x4_store(pxl8_i32x4 a, i32* out) {
|
||||
_mm_storeu_si128((__m128i*)out, a.v);
|
||||
}
|
||||
|
||||
static inline pxl8_u16x8 pxl8_u16x8_cmplt(pxl8_u16x8 a, pxl8_u16x8 b) {
|
||||
return (pxl8_u16x8){ _mm_cmplt_epi16(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_u16x8 pxl8_u16x8_blend(pxl8_u16x8 a, pxl8_u16x8 b, pxl8_u16x8 mask) {
|
||||
__m128i not_mask = _mm_andnot_si128(mask.v, a.v);
|
||||
__m128i and_mask = _mm_and_si128(mask.v, b.v);
|
||||
return (pxl8_u16x8){ _mm_or_si128(not_mask, and_mask) };
|
||||
}
|
||||
|
||||
static inline i32 pxl8_u16x8_movemask(pxl8_u16x8 a) {
|
||||
return _mm_movemask_epi8(a.v);
|
||||
}
|
||||
|
||||
#elif defined(PXL8_SIMD_NEON)
|
||||
|
||||
typedef struct { float32x4_t v; } pxl8_f32x4;
|
||||
typedef struct { int32x4_t v; } pxl8_i32x4;
|
||||
typedef struct { uint16x8_t v; } pxl8_u16x8;
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_splat(f32 x) {
|
||||
return (pxl8_f32x4){ vdupq_n_f32(x) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_new(f32 a, f32 b, f32 c, f32 d) {
|
||||
f32 arr[4] = {a, b, c, d};
|
||||
return (pxl8_f32x4){ vld1q_f32(arr) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_add(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ vaddq_f32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_sub(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ vsubq_f32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_mul(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ vmulq_f32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_div(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ vdivq_f32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_min(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ vminq_f32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_max(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){ vmaxq_f32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_cmplt(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
uint32x4_t cmp = vcltq_f32(a.v, b.v);
|
||||
return (pxl8_f32x4){ vreinterpretq_f32_u32(cmp) };
|
||||
}
|
||||
|
||||
static inline i32 pxl8_f32x4_movemask(pxl8_f32x4 a) {
|
||||
uint32x4_t input = vreinterpretq_u32_f32(a.v);
|
||||
static const i32 shifts[4] = {0, 1, 2, 3};
|
||||
uint32x4_t shifted = vshrq_n_u32(input, 31);
|
||||
return vgetq_lane_u32(shifted, 0) | (vgetq_lane_u32(shifted, 1) << 1) |
|
||||
(vgetq_lane_u32(shifted, 2) << 2) | (vgetq_lane_u32(shifted, 3) << 3);
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_f32x4_to_i32x4(pxl8_f32x4 a) {
|
||||
return (pxl8_i32x4){ vcvtq_s32_f32(a.v) };
|
||||
}
|
||||
|
||||
static inline void pxl8_f32x4_store(pxl8_f32x4 a, f32* out) {
|
||||
vst1q_f32(out, a.v);
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_splat(i32 x) {
|
||||
return (pxl8_i32x4){ vdupq_n_s32(x) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_slli(pxl8_i32x4 a, i32 n) {
|
||||
return (pxl8_i32x4){ vshlq_s32(a.v, vdupq_n_s32(n)) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_srai(pxl8_i32x4 a, i32 n) {
|
||||
return (pxl8_i32x4){ vshlq_s32(a.v, vdupq_n_s32(-n)) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_and(pxl8_i32x4 a, pxl8_i32x4 b) {
|
||||
return (pxl8_i32x4){ vandq_s32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_or(pxl8_i32x4 a, pxl8_i32x4 b) {
|
||||
return (pxl8_i32x4){ vorrq_s32(a.v, b.v) };
|
||||
}
|
||||
|
||||
static inline void pxl8_i32x4_store(pxl8_i32x4 a, i32* out) {
|
||||
vst1q_s32(out, a.v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef struct { f32 v[4]; } pxl8_f32x4;
|
||||
typedef struct { i32 v[4]; } pxl8_i32x4;
|
||||
typedef struct { u16 v[8]; } pxl8_u16x8;
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_splat(f32 x) {
|
||||
return (pxl8_f32x4){{ x, x, x, x }};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_new(f32 a, f32 b, f32 c, f32 d) {
|
||||
return (pxl8_f32x4){{ a, b, c, d }};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_add(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){{ a.v[0]+b.v[0], a.v[1]+b.v[1], a.v[2]+b.v[2], a.v[3]+b.v[3] }};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_sub(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){{ a.v[0]-b.v[0], a.v[1]-b.v[1], a.v[2]-b.v[2], a.v[3]-b.v[3] }};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_mul(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){{ a.v[0]*b.v[0], a.v[1]*b.v[1], a.v[2]*b.v[2], a.v[3]*b.v[3] }};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_div(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){{ a.v[0]/b.v[0], a.v[1]/b.v[1], a.v[2]/b.v[2], a.v[3]/b.v[3] }};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_min(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){{
|
||||
a.v[0]<b.v[0]?a.v[0]:b.v[0], a.v[1]<b.v[1]?a.v[1]:b.v[1],
|
||||
a.v[2]<b.v[2]?a.v[2]:b.v[2], a.v[3]<b.v[3]?a.v[3]:b.v[3]
|
||||
}};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_max(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
return (pxl8_f32x4){{
|
||||
a.v[0]>b.v[0]?a.v[0]:b.v[0], a.v[1]>b.v[1]?a.v[1]:b.v[1],
|
||||
a.v[2]>b.v[2]?a.v[2]:b.v[2], a.v[3]>b.v[3]?a.v[3]:b.v[3]
|
||||
}};
|
||||
}
|
||||
|
||||
static inline pxl8_f32x4 pxl8_f32x4_cmplt(pxl8_f32x4 a, pxl8_f32x4 b) {
|
||||
pxl8_f32x4 r;
|
||||
u32* rv = (u32*)r.v;
|
||||
rv[0] = a.v[0] < b.v[0] ? 0xFFFFFFFF : 0;
|
||||
rv[1] = a.v[1] < b.v[1] ? 0xFFFFFFFF : 0;
|
||||
rv[2] = a.v[2] < b.v[2] ? 0xFFFFFFFF : 0;
|
||||
rv[3] = a.v[3] < b.v[3] ? 0xFFFFFFFF : 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline i32 pxl8_f32x4_movemask(pxl8_f32x4 a) {
|
||||
u32* av = (u32*)a.v;
|
||||
return ((av[0] >> 31) & 1) | ((av[1] >> 31) & 1) << 1 |
|
||||
((av[2] >> 31) & 1) << 2 | ((av[3] >> 31) & 1) << 3;
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_f32x4_to_i32x4(pxl8_f32x4 a) {
|
||||
return (pxl8_i32x4){{ (i32)a.v[0], (i32)a.v[1], (i32)a.v[2], (i32)a.v[3] }};
|
||||
}
|
||||
|
||||
static inline void pxl8_f32x4_store(pxl8_f32x4 a, f32* out) {
|
||||
out[0] = a.v[0]; out[1] = a.v[1]; out[2] = a.v[2]; out[3] = a.v[3];
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_splat(i32 x) {
|
||||
return (pxl8_i32x4){{ x, x, x, x }};
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_slli(pxl8_i32x4 a, i32 n) {
|
||||
return (pxl8_i32x4){{ a.v[0]<<n, a.v[1]<<n, a.v[2]<<n, a.v[3]<<n }};
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_srai(pxl8_i32x4 a, i32 n) {
|
||||
return (pxl8_i32x4){{ a.v[0]>>n, a.v[1]>>n, a.v[2]>>n, a.v[3]>>n }};
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_and(pxl8_i32x4 a, pxl8_i32x4 b) {
|
||||
return (pxl8_i32x4){{ a.v[0]&b.v[0], a.v[1]&b.v[1], a.v[2]&b.v[2], a.v[3]&b.v[3] }};
|
||||
}
|
||||
|
||||
static inline pxl8_i32x4 pxl8_i32x4_or(pxl8_i32x4 a, pxl8_i32x4 b) {
|
||||
return (pxl8_i32x4){{ a.v[0]|b.v[0], a.v[1]|b.v[1], a.v[2]|b.v[2], a.v[3]|b.v[3] }};
|
||||
}
|
||||
|
||||
static inline void pxl8_i32x4_store(pxl8_i32x4 a, i32* out) {
|
||||
out[0] = a.v[0]; out[1] = a.v[1]; out[2] = a.v[2]; out[3] = a.v[3];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline f32 pxl8_fast_inv_sqrt(f32 x) {
|
||||
f32 half = 0.5f * x;
|
||||
i32 i = *(i32*)&x;
|
||||
i = 0x5f375a86 - (i >> 1);
|
||||
f32 y = *(f32*)&i;
|
||||
return y * (1.5f - half * y * y);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue