cleanup some more f32 simd
This commit is contained in:
parent
670cd3c97e
commit
e2c7998663
2 changed files with 34 additions and 22 deletions
|
|
@ -810,29 +810,11 @@ void pxl8_3d_clear_zbuffer(pxl8_gfx* gfx) {
|
||||||
i32 count = gfx->zbuffer_width * gfx->zbuffer_height;
|
i32 count = gfx->zbuffer_width * gfx->zbuffer_height;
|
||||||
const f32 far_z = 1e30f;
|
const f32 far_z = 1e30f;
|
||||||
|
|
||||||
#if defined(PXL8_SIMD_AVX2)
|
#if !defined(PXL8_SIMD_SCALAR)
|
||||||
__m256 far_vec = _mm256_set1_ps(far_z);
|
pxl8_simd_vec_f32 far_vec = pxl8_simd_set1_f32(far_z);
|
||||||
i32 i = 0;
|
i32 i = 0;
|
||||||
for (; i + 7 < count; i += 8) {
|
for (; i + PXL8_SIMD_WIDTH_F32 <= count; i += PXL8_SIMD_WIDTH_F32) {
|
||||||
_mm256_storeu_ps(&gfx->zbuffer[i], far_vec);
|
pxl8_simd_store_f32(&gfx->zbuffer[i], far_vec);
|
||||||
}
|
|
||||||
for (; i < count; i++) {
|
|
||||||
gfx->zbuffer[i] = far_z;
|
|
||||||
}
|
|
||||||
#elif defined(PXL8_SIMD_SSE2)
|
|
||||||
__m128 far_vec = _mm_set1_ps(far_z);
|
|
||||||
i32 i = 0;
|
|
||||||
for (; i + 3 < count; i += 4) {
|
|
||||||
_mm_storeu_ps(&gfx->zbuffer[i], far_vec);
|
|
||||||
}
|
|
||||||
for (; i < count; i++) {
|
|
||||||
gfx->zbuffer[i] = far_z;
|
|
||||||
}
|
|
||||||
#elif defined(PXL8_SIMD_NEON)
|
|
||||||
float32x4_t far_vec = vdupq_n_f32(far_z);
|
|
||||||
i32 i = 0;
|
|
||||||
for (; i + 3 < count; i += 4) {
|
|
||||||
vst1q_f32(&gfx->zbuffer[i], far_vec);
|
|
||||||
}
|
}
|
||||||
for (; i < count; i++) {
|
for (; i < count; i++) {
|
||||||
gfx->zbuffer[i] = far_z;
|
gfx->zbuffer[i] = far_z;
|
||||||
|
|
|
||||||
|
|
@ -7,20 +7,24 @@
|
||||||
#define PXL8_SIMD_AVX2 1
|
#define PXL8_SIMD_AVX2 1
|
||||||
#define PXL8_SIMD_WIDTH_U8 32
|
#define PXL8_SIMD_WIDTH_U8 32
|
||||||
#define PXL8_SIMD_WIDTH_U32 8
|
#define PXL8_SIMD_WIDTH_U32 8
|
||||||
|
#define PXL8_SIMD_WIDTH_F32 8
|
||||||
#elif defined(__SSE2__)
|
#elif defined(__SSE2__)
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#define PXL8_SIMD_SSE2 1
|
#define PXL8_SIMD_SSE2 1
|
||||||
#define PXL8_SIMD_WIDTH_U8 16
|
#define PXL8_SIMD_WIDTH_U8 16
|
||||||
#define PXL8_SIMD_WIDTH_U32 4
|
#define PXL8_SIMD_WIDTH_U32 4
|
||||||
|
#define PXL8_SIMD_WIDTH_F32 4
|
||||||
#elif defined(__ARM_NEON)
|
#elif defined(__ARM_NEON)
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#define PXL8_SIMD_NEON 1
|
#define PXL8_SIMD_NEON 1
|
||||||
#define PXL8_SIMD_WIDTH_U8 16
|
#define PXL8_SIMD_WIDTH_U8 16
|
||||||
#define PXL8_SIMD_WIDTH_U32 4
|
#define PXL8_SIMD_WIDTH_U32 4
|
||||||
|
#define PXL8_SIMD_WIDTH_F32 4
|
||||||
#else
|
#else
|
||||||
#define PXL8_SIMD_SCALAR 1
|
#define PXL8_SIMD_SCALAR 1
|
||||||
#define PXL8_SIMD_WIDTH_U8 1
|
#define PXL8_SIMD_WIDTH_U8 1
|
||||||
#define PXL8_SIMD_WIDTH_U32 1
|
#define PXL8_SIMD_WIDTH_U32 1
|
||||||
|
#define PXL8_SIMD_WIDTH_F32 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef union {
|
typedef union {
|
||||||
|
|
@ -218,6 +222,32 @@ static inline pxl8_simd_vec_f32 pxl8_simd_set_f32(f32 x, f32 y, f32 z, f32 w) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline pxl8_simd_vec_f32 pxl8_simd_set1_f32(f32 value) {
|
||||||
|
pxl8_simd_vec_f32 result;
|
||||||
|
#if defined(PXL8_SIMD_AVX2)
|
||||||
|
result.avx2 = _mm256_set1_ps(value);
|
||||||
|
#elif defined(PXL8_SIMD_SSE2)
|
||||||
|
result.sse = _mm_set1_ps(value);
|
||||||
|
#elif defined(PXL8_SIMD_NEON)
|
||||||
|
result.neon = vdupq_n_f32(value);
|
||||||
|
#else
|
||||||
|
result.f32_array[0] = value;
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void pxl8_simd_store_f32(f32* dest, pxl8_simd_vec_f32 vec) {
|
||||||
|
#if defined(PXL8_SIMD_AVX2)
|
||||||
|
_mm256_storeu_ps(dest, vec.avx2);
|
||||||
|
#elif defined(PXL8_SIMD_SSE2)
|
||||||
|
_mm_storeu_ps(dest, vec.sse);
|
||||||
|
#elif defined(PXL8_SIMD_NEON)
|
||||||
|
vst1q_f32(dest, vec.neon);
|
||||||
|
#else
|
||||||
|
dest[0] = vec.f32_array[0];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static inline pxl8_simd_vec_f32 pxl8_simd_add_f32(pxl8_simd_vec_f32 a, pxl8_simd_vec_f32 b) {
|
static inline pxl8_simd_vec_f32 pxl8_simd_add_f32(pxl8_simd_vec_f32 a, pxl8_simd_vec_f32 b) {
|
||||||
pxl8_simd_vec_f32 result;
|
pxl8_simd_vec_f32 result;
|
||||||
#if defined(PXL8_SIMD_AVX2)
|
#if defined(PXL8_SIMD_AVX2)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue