radv: improve LDS alignment check for load/store vectorization

Previously, this could vectorize two scalar 16-bit loads into a u8vec4
load.

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11113>
This commit is contained in:
Rhys Perry 2021-06-01 11:20:00 +01:00 committed by Marge Bot
parent 4870d7d829
commit d2b9c7e982

View file

@ -3124,13 +3124,23 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_s
case nir_intrinsic_load_shared:
case nir_intrinsic_store_shared:
if (bit_size * num_components ==
96) /* 96 bit loads require 128 bit alignment and are split otherwise */
96) { /* 96 bit loads require 128 bit alignment and are split otherwise */
return align % 16 == 0;
else if (bit_size * num_components ==
128) /* 128 bit loads require 64 bit alignment and are split otherwise */
return align % 8 == 0;
else
return align % (bit_size == 8 ? 2 : 4) == 0;
} else if (bit_size == 16 && (align % 4)) {
/* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU
* vectorization, because our vectorizer requires the scalar IR to already contain vectors.
*/
return (align % 2 == 0) && num_components <= 2;
} else {
if (num_components == 3) {
/* AMD hardware can't do 3-component loads except for 96-bit loads, handled above. */
return false;
}
unsigned req = bit_size * num_components;
if (req == 64 || req == 128) /* 64-bit and 128-bit loads can use ds_read2_b{32,64} */
req /= 2u;
return align % (req / 8u) == 0;
}
default:
return false;
}