mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 07:58:07 +02:00
radv: improve LDS alignment check for load/store vectorization
Previously, this could vectorize two scalar 16-bit loads into a u8vec4 load. No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11113>
This commit is contained in:
parent
4870d7d829
commit
d2b9c7e982
1 changed files with 16 additions and 6 deletions
|
|
@ -3124,13 +3124,23 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_s
|
|||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_store_shared:
|
||||
if (bit_size * num_components ==
|
||||
96) /* 96 bit loads require 128 bit alignment and are split otherwise */
|
||||
96) { /* 96 bit loads require 128 bit alignment and are split otherwise */
|
||||
return align % 16 == 0;
|
||||
else if (bit_size * num_components ==
|
||||
128) /* 128 bit loads require 64 bit alignment and are split otherwise */
|
||||
return align % 8 == 0;
|
||||
else
|
||||
return align % (bit_size == 8 ? 2 : 4) == 0;
|
||||
} else if (bit_size == 16 && (align % 4)) {
|
||||
/* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU
|
||||
* vectorization, because our vectorizer requires the scalar IR to already contain vectors.
|
||||
*/
|
||||
return (align % 2 == 0) && num_components <= 2;
|
||||
} else {
|
||||
if (num_components == 3) {
|
||||
/* AMD hardware can't do 3-component loads except for 96-bit loads, handled above. */
|
||||
return false;
|
||||
}
|
||||
unsigned req = bit_size * num_components;
|
||||
if (req == 64 || req == 128) /* 64-bit and 128-bit loads can use ds_read2_b{32,64} */
|
||||
req /= 2u;
|
||||
return align % (req / 8u) == 0;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue