From c61eb5480674e922a512c036455b00f30c12037c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 30 Mar 2024 23:43:46 +0100 Subject: [PATCH] ac/nir/tess: Map TCS LDS IO locations without gaps. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_nir_lower_tess_io_to_mem.c | 32 +++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/ac_nir_lower_tess_io_to_mem.c index 93f05a1dce2..2521ed97ec3 100644 --- a/src/amd/common/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_tess_io_to_mem.c @@ -362,6 +362,35 @@ hs_per_vertex_input_lds_offset(nir_builder *b, return nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset); } +static unsigned +hs_output_lds_map_io_location(nir_shader *shader, + nir_intrinsic_instr *intrin, + lower_tess_io_state *st) +{ + const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); + const unsigned loc = io_sem.location; + + switch (intrin->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_load_output: { + const uint64_t tf_mask = tcs_lds_tf_out_mask(shader, st); + if (BITFIELD64_BIT(loc) & TESS_LVL_MASK) + return util_bitcount64(tf_mask & BITFIELD64_MASK(loc)); + + const uint32_t patch_out_mask = tcs_lds_per_patch_out_mask(shader); + return util_bitcount64(tf_mask) + + util_bitcount(patch_out_mask & BITFIELD_MASK(loc - VARYING_SLOT_PATCH0)); + } + case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_load_per_vertex_output: { + const uint64_t per_vertex_mask = tcs_lds_per_vtx_out_mask(shader); + return util_bitcount64(per_vertex_mask & BITFIELD64_MASK(loc)); + } + default: + unreachable("invalid TCS IO intrinsic"); + } +} + static nir_def * hs_output_lds_offset(nir_builder *b, lower_tess_io_state *st, @@ -376,7 +405,8 @@ hs_output_lds_offset(nir_builder *b, unsigned output_patch_stride = pervertex_output_patch_size + st->tcs_num_reserved_patch_outputs * 16u; nir_def *off = intrin - ? ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io) + ? ac_nir_calc_io_offset_mapped(b, intrin, nir_imm_int(b, 16u), 4u, + hs_output_lds_map_io_location(b->shader, intrin, st)) : nir_imm_int(b, 0); nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);