mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
swr/rast: Use gather instruction for i32gather_ps on simd16/avx512
Speed up avx512 platforms; fixes performance regression caused
by swithc to simdlib.
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Cc: mesa-stable@lists.freedesktop.org
(cherry picked from commit 439904847e)
This commit is contained in:
parent
f3caa303cf
commit
c798200543
1 changed files with 1 additions and 11 deletions
|
|
@ -484,17 +484,7 @@ SIMD_WRAPPER_2(unpacklo_ps);
|
|||
template<ScaleFactor ScaleT>
|
||||
static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
|
||||
{
|
||||
uint32_t *pOffsets = (uint32_t*)&idx;
|
||||
Float vResult;
|
||||
float* pResult = (float*)&vResult;
|
||||
for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
|
||||
{
|
||||
uint32_t offset = pOffsets[i];
|
||||
offset = offset * static_cast<uint32_t>(ScaleT);
|
||||
pResult[i] = *(float const*)(((uint8_t const*)p + offset));
|
||||
}
|
||||
|
||||
return vResult;
|
||||
return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT));
|
||||
}
|
||||
|
||||
static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue