use AVX2 when available, cleanup simd
This commit is contained in:
parent
2abc6c9486
commit
670cd3c97e
2 changed files with 40 additions and 13 deletions
|
|
@ -810,11 +810,11 @@ void pxl8_3d_clear_zbuffer(pxl8_gfx* gfx) {
|
|||
i32 count = gfx->zbuffer_width * gfx->zbuffer_height;
|
||||
const f32 far_z = 1e30f;
|
||||
|
||||
#if defined(PXL8_SIMD_NEON)
|
||||
float32x4_t far_vec = vdupq_n_f32(far_z);
|
||||
#if defined(PXL8_SIMD_AVX2)
|
||||
__m256 far_vec = _mm256_set1_ps(far_z);
|
||||
i32 i = 0;
|
||||
for (; i + 3 < count; i += 4) {
|
||||
vst1q_f32(&gfx->zbuffer[i], far_vec);
|
||||
for (; i + 7 < count; i += 8) {
|
||||
_mm256_storeu_ps(&gfx->zbuffer[i], far_vec);
|
||||
}
|
||||
for (; i < count; i++) {
|
||||
gfx->zbuffer[i] = far_z;
|
||||
|
|
@ -823,7 +823,16 @@ void pxl8_3d_clear_zbuffer(pxl8_gfx* gfx) {
|
|||
__m128 far_vec = _mm_set1_ps(far_z);
|
||||
i32 i = 0;
|
||||
for (; i + 3 < count; i += 4) {
|
||||
_mm_store_ps(&gfx->zbuffer[i], far_vec);
|
||||
_mm_storeu_ps(&gfx->zbuffer[i], far_vec);
|
||||
}
|
||||
for (; i < count; i++) {
|
||||
gfx->zbuffer[i] = far_z;
|
||||
}
|
||||
#elif defined(PXL8_SIMD_NEON)
|
||||
float32x4_t far_vec = vdupq_n_f32(far_z);
|
||||
i32 i = 0;
|
||||
for (; i + 3 < count; i += 4) {
|
||||
vst1q_f32(&gfx->zbuffer[i], far_vec);
|
||||
}
|
||||
for (; i < count; i++) {
|
||||
gfx->zbuffer[i] = far_z;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue