ac/nir/tess: don't pass nir_intrinsic_instr to VMEM IO calc helpers

These will be used without intrinsics.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34863>
This commit is contained in:
Marek Olšák 2025-04-19 07:08:54 -04:00 committed by Marge Bot
parent 360494f50d
commit 4bbe497d9b
4 changed files with 48 additions and 47 deletions

View file

@ -133,11 +133,8 @@ ac_nir_store_parameters_to_attr_ring(nir_builder *b,
nir_def *num_export_threads_in_wave);
nir_def *
ac_nir_calc_io_off(nir_builder *b,
nir_intrinsic_instr *intrin,
nir_def *base_stride,
unsigned component_stride,
unsigned mapped_location);
ac_nir_calc_io_off(nir_builder *b, unsigned component, nir_def *io_offset, nir_def *base_stride,
unsigned component_stride, unsigned mapped_driver_location);
unsigned
ac_nir_map_io_location(unsigned location,

View file

@ -161,7 +161,9 @@ lower_es_output_store(nir_builder *b,
b->cursor = nir_before_instr(&intrin->instr);
unsigned mapped = ac_nir_map_io_location(io_sem.location, st->gs_inputs_read, st->map_io);
nir_def *io_off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4u, mapped);
nir_def *io_off = ac_nir_calc_io_off(b, nir_intrinsic_component(intrin),
nir_get_io_offset_src(intrin)->ssa,
nir_imm_int(b, 16u), 4u, mapped);
nir_def *store_val = intrin->src[0].ssa;
if (st->gfx_level <= GFX8) {
@ -292,7 +294,9 @@ gs_per_vertex_input_offset(nir_builder *b,
unsigned base_stride = st->gfx_level >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(instr);
unsigned mapped = ac_nir_map_io_location(io_sem.location, st->gs_inputs_read, st->map_io);
nir_def *io_off = ac_nir_calc_io_off(b, instr, nir_imm_int(b, base_stride * 4u), base_stride, mapped);
nir_def *io_off = ac_nir_calc_io_off(b, nir_intrinsic_component(instr),
nir_get_io_offset_src(instr)->ssa,
nir_imm_int(b, base_stride * 4u), base_stride, mapped);
nir_def *off = nir_iadd(b, io_off, vertex_offset);
return nir_imul_imm(b, off, 4u);
}

View file

@ -280,7 +280,9 @@ lower_ls_output_store(nir_builder *b,
nir_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b));
unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_via_lds, st->map_io);
nir_def *io_off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4u, mapped);
nir_def *io_off = ac_nir_calc_io_off(b, nir_intrinsic_component(intrin),
nir_get_io_offset_src(intrin)->ssa,
nir_imm_int(b, 16u), 4u, mapped);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
nir_def *off = nir_iadd_nuw(b, base_off_var, io_off);
@ -348,7 +350,9 @@ hs_per_vertex_input_lds_offset(nir_builder *b,
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(instr);
const unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_via_lds, st->map_io);
nir_def *io_offset = ac_nir_calc_io_off(b, instr, nir_imm_int(b, 16u), 4u, mapped);
nir_def *io_offset = ac_nir_calc_io_off(b, nir_intrinsic_component(instr),
nir_get_io_offset_src(instr)->ssa,
nir_imm_int(b, 16u), 4u, mapped);
nir_def *lds_offset = nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset);
/* The first LDS vec4 is reserved for the tf0/1 shader message group vote. */
@ -403,7 +407,8 @@ hs_output_lds_offset(nir_builder *b,
if (intrin) {
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
const unsigned mapped = hs_output_lds_map_io_location(b->shader, per_vertex, io_sem.location, st);
off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4, mapped);
off = ac_nir_calc_io_off(b, nir_intrinsic_component(intrin), nir_get_io_offset_src(intrin)->ssa,
nir_imm_int(b, 16u), 4, mapped);
} else {
off = nir_imm_int(b, 0);
}
@ -469,44 +474,38 @@ hs_output_vram_map_io_location(nir_shader *shader,
}
static nir_def *
hs_per_vertex_output_vmem_offset(nir_builder *b,
lower_tess_io_state *st,
nir_intrinsic_instr *intrin)
hs_per_vertex_output_vmem_offset(nir_builder *b, lower_tess_io_state *st, unsigned location,
unsigned component, nir_def *vertex_index, nir_def *io_offset)
{
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
nir_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL
? nir_imm_int(b, b->shader->info.tess.tcs_vertices_out)
: nir_load_patch_vertices_in(b);
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u));
nir_def *io_offset =
ac_nir_calc_io_off(b, intrin, attr_stride, 4u,
hs_output_vram_map_io_location(b->shader, true, io_sem.location, st));
nir_def *off =
ac_nir_calc_io_off(b, component, io_offset, attr_stride, 4u,
hs_output_vram_map_io_location(b->shader, true, location, st));
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u));
nir_def *vertex_index = nir_get_io_arrayed_index_src(intrin)->ssa;
nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, 16u);
return nir_iadd_nuw(b, nir_iadd_nuw(b, patch_offset, vertex_index_off), io_offset);
return nir_iadd_nuw(b, nir_iadd_nuw(b, patch_offset, vertex_index_off), off);
}
static nir_def *
hs_per_patch_output_vmem_offset(nir_builder *b,
lower_tess_io_state *st,
nir_intrinsic_instr *intrin,
unsigned const_base_offset)
hs_per_patch_output_vmem_offset(nir_builder *b, lower_tess_io_state *st, unsigned location,
unsigned component, nir_def *io_offset, unsigned const_base_offset)
{
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_def *per_patch_data_offset = nir_load_hs_out_patch_data_offset_amd(b);
nir_def * off =
intrin
? ac_nir_calc_io_off(b, intrin, nir_imul_imm(b, tcs_num_patches, 16u), 4u,
hs_output_vram_map_io_location(b->shader, false, nir_intrinsic_io_semantics(intrin).location, st))
nir_def *off =
io_offset
? ac_nir_calc_io_off(b, component, io_offset, nir_imul_imm(b, tcs_num_patches, 16u), 4u,
hs_output_vram_map_io_location(b->shader, false, location, st))
: nir_imm_int(b, 0);
if (const_base_offset)
@ -545,6 +544,7 @@ lower_hs_output_store(nir_builder *b,
intrin->intrinsic == nir_intrinsic_store_output);
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
const unsigned component = nir_intrinsic_component(intrin);
nir_def *store_val = intrin->src[0].ssa;
const unsigned write_mask = nir_intrinsic_write_mask(intrin);
const bool write_to_vmem = tcs_output_needs_vmem(intrin, b->shader, st);
@ -552,8 +552,11 @@ lower_hs_output_store(nir_builder *b,
if (write_to_vmem) {
nir_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output
? hs_per_vertex_output_vmem_offset(b, st, intrin)
: hs_per_patch_output_vmem_offset(b, st, intrin, 0);
? hs_per_vertex_output_vmem_offset(b, st, semantics.location, component,
nir_get_io_arrayed_index_src(intrin)->ssa,
nir_get_io_offset_src(intrin)->ssa)
: hs_per_patch_output_vmem_offset(b, st, semantics.location, component,
nir_get_io_offset_src(intrin)->ssa, 0);
nir_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
@ -575,8 +578,6 @@ lower_hs_output_store(nir_builder *b,
*/
if (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER) {
const unsigned component = nir_intrinsic_component(intrin);
if (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER) {
st->tcs_tess_level_inner_mask |= write_mask << component;
@ -1023,7 +1024,7 @@ hs_store_tess_factors_for_tes(nir_builder *b, tess_levels tessfactors, lower_tes
if (st->tcs_tess_level_outer_mask && tes_reads_outer) {
const unsigned tf_outer_loc = hs_output_vram_map_io_location(b->shader, false, VARYING_SLOT_TESS_LEVEL_OUTER, st);
nir_def *vmem_off_outer = hs_per_patch_output_vmem_offset(b, st, NULL, tf_outer_loc * 16);
nir_def *vmem_off_outer = hs_per_patch_output_vmem_offset(b, st, 0, 0, NULL, tf_outer_loc * 16);
nir_store_buffer_amd(b, tessfactors.outer, hs_ring_tess_offchip,
vmem_off_outer, offchip_offset, zero,
@ -1033,7 +1034,7 @@ hs_store_tess_factors_for_tes(nir_builder *b, tess_levels tessfactors, lower_tes
if (tessfactors.inner && st->tcs_tess_level_inner_mask && tes_reads_inner) {
const unsigned tf_inner_loc = hs_output_vram_map_io_location(b->shader, false, VARYING_SLOT_TESS_LEVEL_INNER, st);
nir_def *vmem_off_inner = hs_per_patch_output_vmem_offset(b, st, NULL, tf_inner_loc * 16);
nir_def *vmem_off_inner = hs_per_patch_output_vmem_offset(b, st, 0, 0, NULL, tf_inner_loc * 16);
nir_store_buffer_amd(b, tessfactors.inner, hs_ring_tess_offchip,
vmem_off_inner, offchip_offset, zero,
@ -1126,8 +1127,13 @@ lower_tes_input_load(nir_builder *b,
nir_def *offchip_ring = nir_load_ring_tess_offchip_amd(b);
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
nir_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input
? hs_per_vertex_output_vmem_offset(b, st, intrin)
: hs_per_patch_output_vmem_offset(b, st, intrin, 0);
? hs_per_vertex_output_vmem_offset(b, st, io_sem.location,
nir_intrinsic_component(intrin),
nir_get_io_arrayed_index_src(intrin)->ssa,
nir_get_io_offset_src(intrin)->ssa)
: hs_per_patch_output_vmem_offset(b, st, io_sem.location,
nir_intrinsic_component(intrin),
nir_get_io_offset_src(intrin)->ssa, 0);
nir_def *zero = nir_imm_int(b, 0);
nir_def *load = NULL;

View file

@ -57,16 +57,11 @@ ac_nir_map_io_location(unsigned location,
}
/**
* This function takes an I/O intrinsic like load/store_input,
* and emits a sequence that calculates the full offset of that instruction,
* including a stride to the base and component offsets.
* This function calculates the full offset of an input/output.
*/
nir_def *
ac_nir_calc_io_off(nir_builder *b,
nir_intrinsic_instr *intrin,
nir_def *base_stride,
unsigned component_stride,
unsigned mapped_driver_location)
ac_nir_calc_io_off(nir_builder *b, unsigned component, nir_def *io_offset, nir_def *base_stride,
unsigned component_stride, unsigned mapped_driver_location)
{
/* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */
nir_def *base_op = nir_imul_imm(b, base_stride, mapped_driver_location);
@ -75,11 +70,10 @@ ac_nir_calc_io_off(nir_builder *b,
* so the instruction effectively reads/writes another input/output
* when it has an offset
*/
nir_def *offset_op = nir_imul(b, base_stride,
nir_get_io_offset_src(intrin)->ssa);
nir_def *offset_op = nir_imul(b, base_stride, io_offset);
/* component is in bytes */
unsigned const_op = nir_intrinsic_component(intrin) * component_stride;
unsigned const_op = component * component_stride;
return nir_iadd_imm_nuw(b, nir_iadd_nuw(b, base_op, offset_op), const_op);
}