diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index a9e5d80fec8..8b591c73a0b 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -754,7 +754,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_vulkan_descriptor: case nir_intrinsic_load_input_attachment_target_pan: case nir_intrinsic_load_input_attachment_conv_pan: - case nir_intrinsic_load_converted_mem_pan: + case nir_intrinsic_load_global_cvt_pan: case nir_intrinsic_atomic_counter_read: case nir_intrinsic_atomic_counter_read_deref: case nir_intrinsic_is_sparse_texels_resident: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 50a821e531b..13df1a0c800 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1727,11 +1727,14 @@ store("tile_pan", [1, 1, 1], indices=[ACCESS, SRC_TYPE, IO_SEMANTICS]) # Load converted memory given an address and a conversion descriptor # src[] = { address, conversion } -load("converted_mem_pan", [1, 1], indices=[DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE]) +load("global_cvt_pan", [1, 1], indices=[DEST_TYPE, ACCESS], flags=[CAN_ELIMINATE]) -# Store a value to memory with conversion given an address and a conversion descriptor +# Store a value to memory with conversion given an address and a conversion +# descriptor. The hardware also supports AUTO32, meaning a global store without +# any conversion for 32-bit values, this behaviour can be enabled by setting +# SRC_TYPE = `32` (using nir_type_invalid instead of real types). # src[] = { value, address, conversion } -store("converted_mem_pan", [1, 1], indices=[IO_SEMANTICS]) +store("global_cvt_pan", [1, 1], indices=[SRC_TYPE, ACCESS]) # Load the address and potentially the conversion descriptor for a texel buffer index. # The 64 bit address is always in the first two channels, while the 32 bit diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index cbdedca1b77..6ff661e2447 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1185,7 +1185,7 @@ nir_get_io_data_src_number(const nir_intrinsic_instr *intr) case nir_intrinsic_store_raw_output_pan: case nir_intrinsic_store_combined_output_pan: case nir_intrinsic_store_tile_pan: - case nir_intrinsic_store_converted_mem_pan: + case nir_intrinsic_store_global_cvt_pan: case nir_intrinsic_store_tlb_sample_color_v3d: case nir_intrinsic_store_uvs_agx: case nir_intrinsic_store_local_pixel_agx: diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index c890030da51..858ed816e69 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -1741,8 +1741,7 @@ va_emit_load_texel_buf_index_address(bi_builder *b, bi_index dst, } static void -bi_emit_load_converted_mem(bi_builder *b, bi_index dst, - nir_intrinsic_instr *instr) +bi_emit_load_cvt(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr) { bi_index addr = bi_src_index(&instr->src[0]); bi_index icd = bi_src_index(&instr->src[1]); @@ -1754,14 +1753,25 @@ bi_emit_load_converted_mem(bi_builder *b, bi_index dst, } static void -bi_emit_store_converted_mem(bi_builder *b, nir_intrinsic_instr *instr) +bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr) { bi_index value = bi_src_index(&instr->src[0]); bi_index addr = bi_src_index(&instr->src[1]); bi_index icd = bi_src_index(&instr->src[2]); + const nir_alu_type src_type = nir_intrinsic_src_type(instr); + enum bi_register_format regfmt; + if (src_type == 32) { + assert(nir_src_bit_size(instr->src[0]) == 32); + regfmt = BI_REGISTER_FORMAT_AUTO; + } else { + assert(nir_src_bit_size(instr->src[0]) == + nir_alu_type_get_type_size(src_type)); + regfmt = bi_reg_fmt_for_nir(src_type); + } + bi_st_cvt(b, value, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd, - BI_REGISTER_FORMAT_AUTO, instr->num_components - 1); + regfmt, instr->num_components - 1); } static void @@ -2287,12 +2297,12 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_load_texel_buf_index_address(b, dst, instr); break; - case nir_intrinsic_load_converted_mem_pan: - bi_emit_load_converted_mem(b, dst, instr); + case nir_intrinsic_load_global_cvt_pan: + bi_emit_load_cvt(b, dst, instr); break; - case nir_intrinsic_store_converted_mem_pan: - bi_emit_store_converted_mem(b, instr); + case nir_intrinsic_store_global_cvt_pan: + bi_emit_store_cvt(b, instr); break; case nir_intrinsic_load_tile_pan: @@ -6453,12 +6463,12 @@ lower_texel_buffer_fetch(nir_builder *b, nir_tex_instr *tex, void *data) nir_def *loaded_mem; if (*arch >= 9) { nir_def *icd = nir_load_texel_buf_conv_pan(b, res_handle); - loaded_mem = nir_load_converted_mem_pan(b, tex->def.num_components, + loaded_mem = nir_load_global_cvt_pan(b, tex->def.num_components, tex->def.bit_size, texel_addr, icd, tex->dest_type); } else { nir_def *icd = nir_channel(b, loaded_texel_addr, 2); - loaded_mem = nir_load_converted_mem_pan(b, tex->def.num_components, + loaded_mem = nir_load_global_cvt_pan(b, tex->def.num_components, tex->def.bit_size, texel_addr, icd, tex->dest_type); } @@ -6508,9 +6518,9 @@ lower_buf_image_access(nir_builder *b, nir_intrinsic_instr *intr, void *data) icd = nir_load_texel_buf_conv_pan(b, res_handle); else icd = nir_channel(b, loaded_texel_addr, 2); - nir_def *loaded_mem = nir_load_converted_mem_pan( + nir_def *loaded_mem = nir_load_global_cvt_pan( b, intr->def.num_components, intr->def.bit_size, texel_addr, icd, - nir_intrinsic_dest_type(intr)); + .dest_type = nir_intrinsic_dest_type(intr)); nir_def_replace(&intr->def, loaded_mem); break; } @@ -6530,7 +6540,7 @@ lower_buf_image_access(nir_builder *b, nir_intrinsic_instr *intr, void *data) icd = nir_load_texel_buf_conv_pan(b, res_handle); else icd = nir_channel(b, loaded_texel_addr, 2); - nir_store_converted_mem_pan(b, value, texel_addr, icd); + nir_store_global_cvt_pan(b, value, texel_addr, icd, .src_type = 32); nir_instr_remove(&intr->instr); break; }