From 44bc1e6bf4b60fd87a2edc88fc6e13bfc293c395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 26 Jan 2026 13:03:00 -0500 Subject: [PATCH] nir: add dest_type to load_buffer_amd for lowering the result to 16 bits Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Georg Lehmann Part-of: --- .../nir/ac_nir_lower_image_opcodes_cdna.c | 28 +++++++++++-------- src/compiler/nir/nir_intrinsics.py | 2 +- src/compiler/nir/nir_validate.c | 10 +++++++ .../drivers/radeonsi/si_nir_lower_vs_inputs.c | 3 +- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/amd/common/nir/ac_nir_lower_image_opcodes_cdna.c b/src/amd/common/nir/ac_nir_lower_image_opcodes_cdna.c index eb5df454e60..8a94ff49e22 100644 --- a/src/amd/common/nir/ac_nir_lower_image_opcodes_cdna.c +++ b/src/amd/common/nir/ac_nir_lower_image_opcodes_cdna.c @@ -108,9 +108,9 @@ static nir_def *lower_image_coords(nir_builder *b, nir_def *desc, nir_def *coord } static nir_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size, - nir_def *desc, nir_def *coord, - enum gl_access_qualifier access, enum glsl_sampler_dim dim, - bool is_array, bool handle_out_of_bounds) + nir_def *desc, nir_def *coord, + enum gl_access_qualifier access, enum glsl_sampler_dim dim, + bool is_array, bool handle_out_of_bounds, nir_alu_type dest_type) { nir_def *zero = nir_imm_int(b, 0); @@ -120,7 +120,8 @@ static nir_def *emulated_image_load(nir_builder *b, unsigned num_components, uns handle_out_of_bounds), .base = 0, .memory_modes = nir_var_image, - .access = access | ACCESS_USES_FORMAT_AMD); + .access = access | ACCESS_USES_FORMAT_AMD, + .dest_type = dest_type); } static void emulated_image_store(nir_builder *b, nir_def *desc, nir_def *coord, @@ -146,9 +147,10 @@ static nir_def *get_dim(nir_builder *b, nir_def *desc, unsigned dim) * This basically converts the tex opcode into 1 or more image_load opcodes. */ static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components, - unsigned bit_size, nir_def *desc, - nir_def *sampler_desc, nir_def *coord_vec, - enum glsl_sampler_dim sampler_dim, bool is_array) + unsigned bit_size, nir_def *desc, + nir_def *sampler_desc, nir_def *coord_vec, + enum glsl_sampler_dim sampler_dim, bool is_array, + nir_alu_type dest_type) { const enum gl_access_qualifier access = ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER; @@ -206,7 +208,7 @@ static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components, /* Load the texel. */ result_nearest = emulated_image_load(b, num_components, bit_size, desc, nir_vec(b, coord0, num_coord_components), - access, sampler_dim, is_array, false); + access, sampler_dim, is_array, false, dest_type); } nir_push_else(b, if_nearest); { @@ -278,7 +280,7 @@ static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components, /* Load the linear filter texel. */ texel[i] = emulated_image_load(b, num_components, bit_size, desc, nir_vec(b, texel_coord, num_coord_components), - access, sampler_dim, is_array, false); + access, sampler_dim, is_array, false, dest_type); /* Multiply the texel by the weight. */ texel[i] = nir_fmul(b, texel[i], texel_weight); @@ -389,7 +391,8 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data) case nir_intrinsic_image_deref_load: case nir_intrinsic_bindless_image_load: result = emulated_image_load(b, intr->def.num_components, intr->def.bit_size, - desc, intr->src[1].ssa, access, dim, is_array, true); + desc, intr->src[1].ssa, access, dim, is_array, true, + nir_intrinsic_dest_type(intr)); nir_def_rewrite_uses_after_instr(dst, result, instr); nir_instr_remove(instr); return true; @@ -479,7 +482,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data) result = emulated_image_load(b, tex->def.num_components, tex->def.bit_size, desc, coord, ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, - tex->sampler_dim, tex->is_array, true); + tex->sampler_dim, tex->is_array, true, tex->dest_type); nir_def_rewrite_uses_after_instr(dst, result, instr); nir_instr_remove(instr); return true; @@ -487,7 +490,8 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data) case nir_texop_tex: case nir_texop_txl: result = emulated_tex_level_zero(b, tex->def.num_components, tex->def.bit_size, - desc, sampler_desc, coord, tex->sampler_dim, tex->is_array); + desc, sampler_desc, coord, tex->sampler_dim, tex->is_array, + tex->dest_type); nir_def_rewrite_uses_after_instr(dst, result, instr); nir_instr_remove(instr); return true; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 7514ce05116..cd369bc002f 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1804,7 +1804,7 @@ intrinsic("unit_test_assert_eq", src_comp=[0, 0], flags=[]) # src[] = { descriptor, vector byte offset, scalar byte offset, index offset } # The index offset is multiplied by the stride in the descriptor. # The vector/scalar offsets are in bytes, BASE is a constant byte offset. -intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, MEMORY_MODES, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) +intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, MEMORY_MODES, ACCESS, ALIGN_MUL, ALIGN_OFFSET, DEST_TYPE], flags=[CAN_ELIMINATE]) # src[] = { store value, descriptor, vector byte offset, scalar byte offset, index offset } intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, MEMORY_MODES, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index dfc728f405d..ae563ae1677 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -708,6 +708,16 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) break; } + case nir_intrinsic_load_buffer_amd: + if (nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD) { + nir_alu_type dest_type = nir_intrinsic_dest_type(instr); + validate_assert(state, nir_alu_type_get_type_size(dest_type) && + nir_alu_type_get_base_type(dest_type)); + validate_assert(state, nir_alu_type_get_type_size(dest_type) == + instr->def.bit_size); + } + break; + case nir_intrinsic_store_buffer_amd: if (nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD) { unsigned writemask = nir_intrinsic_write_mask(instr); diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c index 111a526f550..fedc4c90a5f 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c @@ -494,7 +494,8 @@ load_vs_input_from_vertex_buffer(nir_builder *b, unsigned input_index, zero, zero, vertex_index, .base = fetch_stride * i, .access = ACCESS_USES_FORMAT_AMD | ACCESS_CAN_REORDER | - ACCESS_CAN_SPECULATE); + ACCESS_CAN_SPECULATE, + .dest_type = nir_intrinsic_dest_type(intr)); } if (num_fetches == 1 && channels_per_fetch > 1) {