diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index f721e218541..0389682ff0f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2324,34 +2324,17 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrin LLVMValueRef offset = get_src(ctx, instr->src[1]); int num_components = instr->num_components; + assert(instr->dest.ssa.bit_size >= 32 && instr->dest.ssa.bit_size % 32 == 0); + if (ctx->abi->load_ubo) rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); - /* Convert to a scalar 32-bit load. */ + /* Convert to a 32-bit load. */ if (instr->dest.ssa.bit_size == 64) num_components *= 2; - else if (instr->dest.ssa.bit_size == 16) - num_components = DIV_ROUND_UP(num_components, 2); - else if (instr->dest.ssa.bit_size == 8) - num_components = DIV_ROUND_UP(num_components, 4); - ret = - ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, - ctx->ac.f32, 0, true, true); - - /* Convert to the original type. */ - if (instr->dest.ssa.bit_size == 64) { - ret = LLVMBuildBitCast(ctx->ac.builder, ret, - LLVMVectorType(ctx->ac.i64, num_components / 2), ""); - } else if (instr->dest.ssa.bit_size == 16) { - ret = LLVMBuildBitCast(ctx->ac.builder, ret, - LLVMVectorType(ctx->ac.i16, num_components * 2), ""); - } else if (instr->dest.ssa.bit_size == 8) { - ret = LLVMBuildBitCast(ctx->ac.builder, ret, - LLVMVectorType(ctx->ac.i8, num_components * 4), ""); - } - - ret = ac_trim_vector(&ctx->ac, ret, instr->num_components); + ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, + ctx->ac.f32, 0, true, true); ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); return exit_waterfall(ctx, &wctx, ret); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 3382e416679..f16b21706a4 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3182,6 +3182,12 @@ radv_postprocess_nir(struct radv_pipeline *pipeline, } } + NIR_PASS(_, stage->nir, ac_nir_lower_subdword_loads, + (ac_nir_lower_subdword_options) { + .modes_1_comp = nir_var_mem_ubo, + .modes_N_comps = nir_var_mem_ubo + }); + progress = false; NIR_PASS(progress, stage->nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, pipeline_layout); /* Gather info in the case that nir_vk_lower_ycbcr_tex might have emitted resinfo instructions. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 19786d0dcbb..624e4116472 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -25,6 +25,7 @@ #include "nir_builder.h" #include "nir_xfb_info.h" #include "si_pipe.h" +#include "ac_nir.h" static bool si_alu_to_scalar_filter(const nir_instr *instr, const void *data) @@ -358,6 +359,11 @@ char *si_finalize_nir(struct pipe_screen *screen, void *nirptr) nir_lower_io_passes(nir); + NIR_PASS_V(nir, ac_nir_lower_subdword_loads, + (ac_nir_lower_subdword_options) { + .modes_1_comp = nir_var_mem_ubo, + .modes_N_comps = nir_var_mem_ubo + }); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); /* Remove dead derefs, so that we can remove uniforms. */