pan,nir: Rework converted_mem_pan intrinsics

First, rename them to make them a bit more clear.  They act on global
memory so they should be _global and they map to ld/st_cvt so so _cvt is
nice and obvious.  Second, they don't need IO semantics as they're not
IO.  But they do need ACCESS so that we can better control things like
CAN_REORDER.  Third, add a src_type to store_global_cvt even though it
won't be used just yet because we'll want it for lowering VS stores.

Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40391>
This commit is contained in:
Faith Ekstrand 2025-11-29 02:44:18 -05:00 committed by Marge Bot
parent 8541dca8ed
commit de338dc908
4 changed files with 31 additions and 18 deletions

View file

@ -754,7 +754,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_vulkan_descriptor:
case nir_intrinsic_load_input_attachment_target_pan:
case nir_intrinsic_load_input_attachment_conv_pan:
case nir_intrinsic_load_converted_mem_pan:
case nir_intrinsic_load_global_cvt_pan:
case nir_intrinsic_atomic_counter_read:
case nir_intrinsic_atomic_counter_read_deref:
case nir_intrinsic_is_sparse_texels_resident:

View file

@ -1727,11 +1727,14 @@ store("tile_pan", [1, 1, 1], indices=[ACCESS, SRC_TYPE, IO_SEMANTICS])
# Load converted memory given an address and a conversion descriptor
# src[] = { address, conversion }
load("converted_mem_pan", [1, 1], indices=[DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE])
load("global_cvt_pan", [1, 1], indices=[DEST_TYPE, ACCESS], flags=[CAN_ELIMINATE])
# Store a value to memory with conversion given an address and a conversion descriptor
# Store a value to memory with conversion given an address and a conversion
# descriptor. The hardware also supports AUTO32, meaning a global store without
# any conversion for 32-bit values, this behaviour can be enabled by setting
# SRC_TYPE = `32` (using nir_type_invalid instead of real types).
# src[] = { value, address, conversion }
store("converted_mem_pan", [1, 1], indices=[IO_SEMANTICS])
store("global_cvt_pan", [1, 1], indices=[SRC_TYPE, ACCESS])
# Load the address and potentially the conversion descriptor for a texel buffer index.
# The 64 bit address is always in the first two channels, while the 32 bit

View file

@ -1185,7 +1185,7 @@ nir_get_io_data_src_number(const nir_intrinsic_instr *intr)
case nir_intrinsic_store_raw_output_pan:
case nir_intrinsic_store_combined_output_pan:
case nir_intrinsic_store_tile_pan:
case nir_intrinsic_store_converted_mem_pan:
case nir_intrinsic_store_global_cvt_pan:
case nir_intrinsic_store_tlb_sample_color_v3d:
case nir_intrinsic_store_uvs_agx:
case nir_intrinsic_store_local_pixel_agx:

View file

@ -1741,8 +1741,7 @@ va_emit_load_texel_buf_index_address(bi_builder *b, bi_index dst,
}
static void
bi_emit_load_converted_mem(bi_builder *b, bi_index dst,
nir_intrinsic_instr *instr)
bi_emit_load_cvt(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr)
{
bi_index addr = bi_src_index(&instr->src[0]);
bi_index icd = bi_src_index(&instr->src[1]);
@ -1754,14 +1753,25 @@ bi_emit_load_converted_mem(bi_builder *b, bi_index dst,
}
static void
bi_emit_store_converted_mem(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr)
{
bi_index value = bi_src_index(&instr->src[0]);
bi_index addr = bi_src_index(&instr->src[1]);
bi_index icd = bi_src_index(&instr->src[2]);
const nir_alu_type src_type = nir_intrinsic_src_type(instr);
enum bi_register_format regfmt;
if (src_type == 32) {
assert(nir_src_bit_size(instr->src[0]) == 32);
regfmt = BI_REGISTER_FORMAT_AUTO;
} else {
assert(nir_src_bit_size(instr->src[0]) ==
nir_alu_type_get_type_size(src_type));
regfmt = bi_reg_fmt_for_nir(src_type);
}
bi_st_cvt(b, value, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
BI_REGISTER_FORMAT_AUTO, instr->num_components - 1);
regfmt, instr->num_components - 1);
}
static void
@ -2287,12 +2297,12 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_load_texel_buf_index_address(b, dst, instr);
break;
case nir_intrinsic_load_converted_mem_pan:
bi_emit_load_converted_mem(b, dst, instr);
case nir_intrinsic_load_global_cvt_pan:
bi_emit_load_cvt(b, dst, instr);
break;
case nir_intrinsic_store_converted_mem_pan:
bi_emit_store_converted_mem(b, instr);
case nir_intrinsic_store_global_cvt_pan:
bi_emit_store_cvt(b, instr);
break;
case nir_intrinsic_load_tile_pan:
@ -6453,12 +6463,12 @@ lower_texel_buffer_fetch(nir_builder *b, nir_tex_instr *tex, void *data)
nir_def *loaded_mem;
if (*arch >= 9) {
nir_def *icd = nir_load_texel_buf_conv_pan(b, res_handle);
loaded_mem = nir_load_converted_mem_pan(b, tex->def.num_components,
loaded_mem = nir_load_global_cvt_pan(b, tex->def.num_components,
tex->def.bit_size, texel_addr,
icd, tex->dest_type);
} else {
nir_def *icd = nir_channel(b, loaded_texel_addr, 2);
loaded_mem = nir_load_converted_mem_pan(b, tex->def.num_components,
loaded_mem = nir_load_global_cvt_pan(b, tex->def.num_components,
tex->def.bit_size, texel_addr,
icd, tex->dest_type);
}
@ -6508,9 +6518,9 @@ lower_buf_image_access(nir_builder *b, nir_intrinsic_instr *intr, void *data)
icd = nir_load_texel_buf_conv_pan(b, res_handle);
else
icd = nir_channel(b, loaded_texel_addr, 2);
nir_def *loaded_mem = nir_load_converted_mem_pan(
nir_def *loaded_mem = nir_load_global_cvt_pan(
b, intr->def.num_components, intr->def.bit_size, texel_addr, icd,
nir_intrinsic_dest_type(intr));
.dest_type = nir_intrinsic_dest_type(intr));
nir_def_replace(&intr->def, loaded_mem);
break;
}
@ -6530,7 +6540,7 @@ lower_buf_image_access(nir_builder *b, nir_intrinsic_instr *intr, void *data)
icd = nir_load_texel_buf_conv_pan(b, res_handle);
else
icd = nir_channel(b, loaded_texel_addr, 2);
nir_store_converted_mem_pan(b, value, texel_addr, icd);
nir_store_global_cvt_pan(b, value, texel_addr, icd, .src_type = 32);
nir_instr_remove(&intr->instr);
break;
}