remove simd, scalar math + compiler optimizations are good enough
This commit is contained in:
parent
e2c7998663
commit
4d84122ef3
8 changed files with 41 additions and 509 deletions
|
|
@ -1,7 +1,6 @@
|
|||
#include <math.h>
|
||||
|
||||
#include "pxl8_math.h"
|
||||
#include "pxl8_simd.h"
|
||||
|
||||
pxl8_vec2 pxl8_vec2_add(pxl8_vec2 a, pxl8_vec2 b) {
|
||||
return (pxl8_vec2){
|
||||
|
|
@ -41,45 +40,31 @@ pxl8_vec2 pxl8_vec2_normalize(pxl8_vec2 v) {
|
|||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_add(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
pxl8_simd_vec_f32 va = pxl8_simd_set_f32(a.x, a.y, a.z, 0);
|
||||
pxl8_simd_vec_f32 vb = pxl8_simd_set_f32(b.x, b.y, b.z, 0);
|
||||
pxl8_simd_vec_f32 result = pxl8_simd_add_f32(va, vb);
|
||||
|
||||
return (pxl8_vec3){
|
||||
.x = result.f32_array[0],
|
||||
.y = result.f32_array[1],
|
||||
.z = result.f32_array[2],
|
||||
.x = a.x + b.x,
|
||||
.y = a.y + b.y,
|
||||
.z = a.z + b.z,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_sub(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
pxl8_simd_vec_f32 va = pxl8_simd_set_f32(a.x, a.y, a.z, 0);
|
||||
pxl8_simd_vec_f32 vb = pxl8_simd_set_f32(b.x, b.y, b.z, 0);
|
||||
pxl8_simd_vec_f32 result = pxl8_simd_sub_f32(va, vb);
|
||||
|
||||
return (pxl8_vec3){
|
||||
.x = result.f32_array[0],
|
||||
.y = result.f32_array[1],
|
||||
.z = result.f32_array[2],
|
||||
.x = a.x - b.x,
|
||||
.y = a.y - b.y,
|
||||
.z = a.z - b.z,
|
||||
};
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_scale(pxl8_vec3 v, f32 s) {
|
||||
pxl8_simd_vec_f32 vv = pxl8_simd_set_f32(v.x, v.y, v.z, 0);
|
||||
pxl8_simd_vec_f32 result = pxl8_simd_scale_f32(vv, s);
|
||||
|
||||
return (pxl8_vec3){
|
||||
.x = result.f32_array[0],
|
||||
.y = result.f32_array[1],
|
||||
.z = result.f32_array[2],
|
||||
.x = v.x * s,
|
||||
.y = v.y * s,
|
||||
.z = v.z * s,
|
||||
};
|
||||
}
|
||||
|
||||
f32 pxl8_vec3_dot(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
pxl8_simd_vec_f32 va = pxl8_simd_set_f32(a.x, a.y, a.z, 0);
|
||||
pxl8_simd_vec_f32 vb = pxl8_simd_set_f32(b.x, b.y, b.z, 0);
|
||||
|
||||
return pxl8_simd_dot3_f32(va, vb);
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
pxl8_vec3 pxl8_vec3_cross(pxl8_vec3 a, pxl8_vec3 b) {
|
||||
|
|
@ -115,13 +100,11 @@ pxl8_mat4 pxl8_mat4_multiply(pxl8_mat4 a, pxl8_mat4 b) {
|
|||
|
||||
for (i32 i = 0; i < 4; i++) {
|
||||
for (i32 j = 0; j < 4; j++) {
|
||||
pxl8_simd_vec_f32 row = pxl8_simd_set_f32(
|
||||
a.m[i * 4 + 0], a.m[i * 4 + 1], a.m[i * 4 + 2], a.m[i * 4 + 3]
|
||||
);
|
||||
pxl8_simd_vec_f32 col = pxl8_simd_set_f32(
|
||||
b.m[0 * 4 + j], b.m[1 * 4 + j], b.m[2 * 4 + j], b.m[3 * 4 + j]
|
||||
);
|
||||
mat.m[i * 4 + j] = pxl8_simd_dot4_f32(row, col);
|
||||
mat.m[i * 4 + j] =
|
||||
a.m[i * 4 + 0] * b.m[0 * 4 + j] +
|
||||
a.m[i * 4 + 1] * b.m[1 * 4 + j] +
|
||||
a.m[i * 4 + 2] * b.m[2 * 4 + j] +
|
||||
a.m[i * 4 + 3] * b.m[3 * 4 + j];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -129,17 +112,11 @@ pxl8_mat4 pxl8_mat4_multiply(pxl8_mat4 a, pxl8_mat4 b) {
|
|||
}
|
||||
|
||||
pxl8_vec4 pxl8_mat4_multiply_vec4(pxl8_mat4 m, pxl8_vec4 v) {
|
||||
pxl8_simd_vec_f32 row0 = pxl8_simd_set_f32(m.m[0], m.m[1], m.m[2], m.m[3]);
|
||||
pxl8_simd_vec_f32 row1 = pxl8_simd_set_f32(m.m[4], m.m[5], m.m[6], m.m[7]);
|
||||
pxl8_simd_vec_f32 row2 = pxl8_simd_set_f32(m.m[8], m.m[9], m.m[10], m.m[11]);
|
||||
pxl8_simd_vec_f32 row3 = pxl8_simd_set_f32(m.m[12], m.m[13], m.m[14], m.m[15]);
|
||||
pxl8_simd_vec_f32 vec = pxl8_simd_set_f32(v.x, v.y, v.z, v.w);
|
||||
|
||||
return (pxl8_vec4){
|
||||
.x = pxl8_simd_dot4_f32(row0, vec),
|
||||
.y = pxl8_simd_dot4_f32(row1, vec),
|
||||
.z = pxl8_simd_dot4_f32(row2, vec),
|
||||
.w = pxl8_simd_dot4_f32(row3, vec),
|
||||
.x = m.m[0] * v.x + m.m[1] * v.y + m.m[2] * v.z + m.m[3] * v.w,
|
||||
.y = m.m[4] * v.x + m.m[5] * v.y + m.m[6] * v.z + m.m[7] * v.w,
|
||||
.z = m.m[8] * v.x + m.m[9] * v.y + m.m[10] * v.z + m.m[11] * v.w,
|
||||
.w = m.m[12] * v.x + m.m[13] * v.y + m.m[14] * v.z + m.m[15] * v.w,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue