From 82919e2dcb1cc07745a920a928e269cc00bcb27f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 29 Oct 2022 17:29:37 -0400
Subject: [PATCH] amd: lower subdword UBO loads in NIR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes broken subdword UBO loads with LLVM.

It's only needed for LLVM, but it's done for both LLVM and ACO because
the pass can be fully validated only with ACO and the Vulkan CTS right now.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19399>
---
 src/amd/llvm/ac_nir_to_llvm.c                | 27 ++++----------------
 src/amd/vulkan/radv_pipeline.c               |  6 +++++
 src/gallium/drivers/radeonsi/si_shader_nir.c |  6 +++++
 3 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index f721e218541..0389682ff0f 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2324,34 +2324,17 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrin
    LLVMValueRef offset = get_src(ctx, instr->src[1]);
    int num_components = instr->num_components;
 
+   assert(instr->dest.ssa.bit_size >= 32 && instr->dest.ssa.bit_size % 32 == 0);
+
    if (ctx->abi->load_ubo)
       rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
 
-   /* Convert to a scalar 32-bit load. */
+   /* Convert to a 32-bit load. */
    if (instr->dest.ssa.bit_size == 64)
       num_components *= 2;
-   else if (instr->dest.ssa.bit_size == 16)
-      num_components = DIV_ROUND_UP(num_components, 2);
-   else if (instr->dest.ssa.bit_size == 8)
-      num_components = DIV_ROUND_UP(num_components, 4);
 
-   ret =
-      ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL,
-                           ctx->ac.f32, 0, true, true);
-
-   /* Convert to the original type. */
-   if (instr->dest.ssa.bit_size == 64) {
-      ret = LLVMBuildBitCast(ctx->ac.builder, ret,
-                             LLVMVectorType(ctx->ac.i64, num_components / 2), "");
-   } else if (instr->dest.ssa.bit_size == 16) {
-      ret = LLVMBuildBitCast(ctx->ac.builder, ret,
-                             LLVMVectorType(ctx->ac.i16, num_components * 2), "");
-   } else if (instr->dest.ssa.bit_size == 8) {
-      ret = LLVMBuildBitCast(ctx->ac.builder, ret,
-                             LLVMVectorType(ctx->ac.i8, num_components * 4), "");
-   }
-
-   ret = ac_trim_vector(&ctx->ac, ret, instr->num_components);
+   ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL,
+                              ctx->ac.f32, 0, true, true);
    ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
 
    return exit_waterfall(ctx, &wctx, ret);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 3382e416679..f16b21706a4 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3182,6 +3182,12 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
       }
    }
 
+   NIR_PASS(_, stage->nir, ac_nir_lower_subdword_loads,
+            (ac_nir_lower_subdword_options) {
+               .modes_1_comp = nir_var_mem_ubo,
+               .modes_N_comps = nir_var_mem_ubo
+            });
+
    progress = false;
    NIR_PASS(progress, stage->nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, pipeline_layout);
    /* Gather info in the case that nir_vk_lower_ycbcr_tex might have emitted resinfo instructions. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 19786d0dcbb..624e4116472 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -25,6 +25,7 @@
 #include "nir_builder.h"
 #include "nir_xfb_info.h"
 #include "si_pipe.h"
+#include "ac_nir.h"
 
 
 static bool si_alu_to_scalar_filter(const nir_instr *instr, const void *data)
@@ -358,6 +359,11 @@ char *si_finalize_nir(struct pipe_screen *screen, void *nirptr)
 
    nir_lower_io_passes(nir);
 
+   NIR_PASS_V(nir, ac_nir_lower_subdword_loads,
+              (ac_nir_lower_subdword_options) {
+                 .modes_1_comp = nir_var_mem_ubo,
+                 .modes_N_comps = nir_var_mem_ubo
+              });
    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
 
    /* Remove dead derefs, so that we can remove uniforms. */