cleanup some more f32 simd
This commit is contained in:
parent
670cd3c97e
commit
e2c7998663
2 changed files with 34 additions and 22 deletions
|
|
@ -810,29 +810,11 @@ void pxl8_3d_clear_zbuffer(pxl8_gfx* gfx) {
|
|||
i32 count = gfx->zbuffer_width * gfx->zbuffer_height;
|
||||
const f32 far_z = 1e30f;
|
||||
|
||||
#if defined(PXL8_SIMD_AVX2)
|
||||
__m256 far_vec = _mm256_set1_ps(far_z);
|
||||
#if !defined(PXL8_SIMD_SCALAR)
|
||||
pxl8_simd_vec_f32 far_vec = pxl8_simd_set1_f32(far_z);
|
||||
i32 i = 0;
|
||||
for (; i + 7 < count; i += 8) {
|
||||
_mm256_storeu_ps(&gfx->zbuffer[i], far_vec);
|
||||
}
|
||||
for (; i < count; i++) {
|
||||
gfx->zbuffer[i] = far_z;
|
||||
}
|
||||
#elif defined(PXL8_SIMD_SSE2)
|
||||
__m128 far_vec = _mm_set1_ps(far_z);
|
||||
i32 i = 0;
|
||||
for (; i + 3 < count; i += 4) {
|
||||
_mm_storeu_ps(&gfx->zbuffer[i], far_vec);
|
||||
}
|
||||
for (; i < count; i++) {
|
||||
gfx->zbuffer[i] = far_z;
|
||||
}
|
||||
#elif defined(PXL8_SIMD_NEON)
|
||||
float32x4_t far_vec = vdupq_n_f32(far_z);
|
||||
i32 i = 0;
|
||||
for (; i + 3 < count; i += 4) {
|
||||
vst1q_f32(&gfx->zbuffer[i], far_vec);
|
||||
for (; i + PXL8_SIMD_WIDTH_F32 <= count; i += PXL8_SIMD_WIDTH_F32) {
|
||||
pxl8_simd_store_f32(&gfx->zbuffer[i], far_vec);
|
||||
}
|
||||
for (; i < count; i++) {
|
||||
gfx->zbuffer[i] = far_z;
|
||||
|
|
|
|||
|
|
@ -7,20 +7,24 @@
|
|||
#define PXL8_SIMD_AVX2 1
|
||||
#define PXL8_SIMD_WIDTH_U8 32
|
||||
#define PXL8_SIMD_WIDTH_U32 8
|
||||
#define PXL8_SIMD_WIDTH_F32 8
|
||||
#elif defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#define PXL8_SIMD_SSE2 1
|
||||
#define PXL8_SIMD_WIDTH_U8 16
|
||||
#define PXL8_SIMD_WIDTH_U32 4
|
||||
#define PXL8_SIMD_WIDTH_F32 4
|
||||
#elif defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#define PXL8_SIMD_NEON 1
|
||||
#define PXL8_SIMD_WIDTH_U8 16
|
||||
#define PXL8_SIMD_WIDTH_U32 4
|
||||
#define PXL8_SIMD_WIDTH_F32 4
|
||||
#else
|
||||
#define PXL8_SIMD_SCALAR 1
|
||||
#define PXL8_SIMD_WIDTH_U8 1
|
||||
#define PXL8_SIMD_WIDTH_U32 1
|
||||
#define PXL8_SIMD_WIDTH_F32 1
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
|
|
@ -218,6 +222,32 @@ static inline pxl8_simd_vec_f32 pxl8_simd_set_f32(f32 x, f32 y, f32 z, f32 w) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline pxl8_simd_vec_f32 pxl8_simd_set1_f32(f32 value) {
|
||||
pxl8_simd_vec_f32 result;
|
||||
#if defined(PXL8_SIMD_AVX2)
|
||||
result.avx2 = _mm256_set1_ps(value);
|
||||
#elif defined(PXL8_SIMD_SSE2)
|
||||
result.sse = _mm_set1_ps(value);
|
||||
#elif defined(PXL8_SIMD_NEON)
|
||||
result.neon = vdupq_n_f32(value);
|
||||
#else
|
||||
result.f32_array[0] = value;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void pxl8_simd_store_f32(f32* dest, pxl8_simd_vec_f32 vec) {
|
||||
#if defined(PXL8_SIMD_AVX2)
|
||||
_mm256_storeu_ps(dest, vec.avx2);
|
||||
#elif defined(PXL8_SIMD_SSE2)
|
||||
_mm_storeu_ps(dest, vec.sse);
|
||||
#elif defined(PXL8_SIMD_NEON)
|
||||
vst1q_f32(dest, vec.neon);
|
||||
#else
|
||||
dest[0] = vec.f32_array[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline pxl8_simd_vec_f32 pxl8_simd_add_f32(pxl8_simd_vec_f32 a, pxl8_simd_vec_f32 b) {
|
||||
pxl8_simd_vec_f32 result;
|
||||
#if defined(PXL8_SIMD_AVX2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue