nir: reject unsupported component counts from all vectorize callbacks

If you allow an unsupported component count in the callback for loads, nir_opt_load_store_vectorize will align num_components to the next supported vector size, essentially overfetching. This changes all callbacks to reject it. AMD will enable it in a later commit. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29398>
2026-01-03 20:10:17 +01:00 · 2024-06-15 01:11:12 -04:00 · 2024-06-15 01:11:12 -04:00 · 65ace5649b
commit 65ace5649b
parent 02923e237d
6 changed files with 6 additions and 6 deletions
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@ -2089,7 +2089,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
                       nir_intrinsic_instr *high,
                       void *data)
 {
-        if (hole_size)
+        if (hole_size || !nir_num_components_valid(num_components))
                return false;

        /* TMU general access only supports 32-bit vectors */
--- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
+++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
@ -340,7 +340,7 @@ bool nir_load_store_vectorize_test::mem_vectorize_callback(
   nir_intrinsic_instr *low, nir_intrinsic_instr *high,
   void *data)
 {
-   if (hole_size)
+   if (hole_size || !nir_num_components_valid(num_components))
      return false;

   /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@ -113,7 +113,7 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
                             unsigned hole_size, nir_intrinsic_instr *low,
                             nir_intrinsic_instr *high, void *data)
 {
-   if (hole_size)
+   if (hole_size || !nir_num_components_valid(num_components))
      return false;

   struct ir3_compiler *compiler = data;
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@ -3273,7 +3273,7 @@ ntt_should_vectorize_io(unsigned align, unsigned bit_size,
                        nir_intrinsic_instr *low, nir_intrinsic_instr *high,
                        void *data)
 {
-   if (bit_size != 32 || hole_size)
+   if (bit_size != 32 || hole_size || !nir_num_components_valid(num_components))
      return false;

   /* Our offset alignment should aways be at least 4 bytes */
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@ -1420,7 +1420,7 @@ brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
    * those back into 32-bit ones anyway and UBO loads aren't split in NIR so
    * we don't want to make a mess for the back-end.
    */
-   if (bit_size > 32 || hole_size)
+   if (bit_size > 32 || hole_size || !nir_num_components_valid(num_components))
      return false;

   if (low->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||
--- a/src/intel/compiler/elk/elk_nir.c
+++ b/src/intel/compiler/elk/elk_nir.c
@ -1137,7 +1137,7 @@ elk_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
    * those back into 32-bit ones anyway and UBO loads aren't split in NIR so
    * we don't want to make a mess for the back-end.
    */
-   if (bit_size > 32 || hole_size)
+   if (bit_size > 32 || hole_size || !nir_num_components_valid(num_components))
      return false;

   if (low->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||