From 508f293b14cd6c1a7320ba0a1f62326b14dde2fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 27 Mar 2024 01:29:28 +0100 Subject: [PATCH] ac/nir/tess: Emit tess factor stores based on new intrinsics. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the TCS to read the primitive mode and whether TES reads the tess factors, from an SGPR arg, which lets it decide how to store them at runtime. For linked shaders, the conditions will be constant and NIR optimizations can delete the dead CF. Signed-off-by: Timur Kristóf Reviewed-by: Marek Olšák Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_nir.h | 1 - src/amd/common/ac_nir_lower_tess_io_to_mem.c | 57 +++++++++++++++----- src/amd/vulkan/nir/radv_nir_lower_io.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 2 - 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index 25b80c6fb34..f9202b53f07 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -123,7 +123,6 @@ void ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, - bool tes_reads_tessfactors, uint64_t tes_inputs_read, uint64_t tes_patch_inputs_read, unsigned num_reserved_tcs_outputs, diff --git a/src/amd/common/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/ac_nir_lower_tess_io_to_mem.c index e582c8dc23f..4d619fef394 100644 --- a/src/amd/common/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_tess_io_to_mem.c @@ -122,9 +122,6 @@ typedef struct { uint64_t tes_inputs_read; uint64_t tes_patch_inputs_read; - /* Whether TES reads the tess factors. */ - bool tes_reads_tessfactors; - unsigned tcs_num_reserved_outputs; unsigned tcs_num_reserved_patch_outputs; @@ -599,6 +596,21 @@ hs_store_dynamic_control_word_gfx6(nir_builder *b) nir_pop_if(b, rel_patch_id_zero); } +static nir_def * +hs_resize_tess_factor(nir_builder *b, nir_def *tf, unsigned comps) +{ + if (!comps) + return NULL; + else if (!tf) + return nir_imm_zero(b, comps, 32); + else if (comps > tf->num_components) + return nir_pad_vector_imm_int(b, tf, 0, comps); + else if (comps < tf->num_components) + return nir_trim_vector(b, tf, comps); + else + return tf; +} + static void hs_store_tess_factors_for_tessellator(nir_builder *b, enum amd_gfx_level gfx_level, enum tess_primitive_mode prim_mode, @@ -617,21 +629,24 @@ hs_store_tess_factors_for_tessellator(nir_builder *b, enum amd_gfx_level gfx_lev nir_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u); + nir_def *tf_outer = hs_resize_tess_factor(b, tessfactors.outer, outer_comps); + nir_def *tf_inner = hs_resize_tess_factor(b, tessfactors.inner, inner_comps); + /* Store tess factors for the tessellator */ if (prim_mode == TESS_PRIMITIVE_ISOLINES) { /* LINES reversal */ - nir_def *t = nir_vec2(b, nir_channel(b, tessfactors.outer, 1), nir_channel(b, tessfactors.outer, 0)); + nir_def *t = nir_vec2(b, nir_channel(b, tf_outer, 1), nir_channel(b, tf_outer, 0)); nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, zero, .base = tess_factors_const_offset, .access = ACCESS_COHERENT); } else if (prim_mode == TESS_PRIMITIVE_TRIANGLES) { - nir_def *t = nir_vec4(b, nir_channel(b, tessfactors.outer, 0), nir_channel(b, tessfactors.outer, 1), - nir_channel(b, tessfactors.outer, 2), nir_channel(b, tessfactors.inner, 0)); + nir_def *t = nir_vec4(b, nir_channel(b, tf_outer, 0), nir_channel(b, tf_outer, 1), + nir_channel(b, tf_outer, 2), nir_channel(b, tf_inner, 0)); nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, zero, .base = tess_factors_const_offset, .access = ACCESS_COHERENT); } else { - nir_store_buffer_amd(b, tessfactors.outer, tessfactor_ring, tess_factors_offset, tess_factors_base, zero, + nir_store_buffer_amd(b, tf_outer, tessfactor_ring, tess_factors_offset, tess_factors_base, zero, .base = tess_factors_const_offset, .access = ACCESS_COHERENT); - nir_store_buffer_amd(b, tessfactors.inner, tessfactor_ring, tess_factors_offset, tess_factors_base, zero, + nir_store_buffer_amd(b, tf_inner, tessfactor_ring, tess_factors_offset, tess_factors_base, zero, .base = tess_factors_const_offset + 4u * outer_comps, .access = ACCESS_COHERENT); } } @@ -719,10 +734,30 @@ hs_finale(nir_shader *shader, if (st->gfx_level <= GFX8) hs_store_dynamic_control_word_gfx6(b); - hs_store_tess_factors_for_tessellator(b, st->gfx_level, b->shader->info.tess._primitive_mode, tessfactors); + nir_def *prim_mode = nir_load_tcs_primitive_mode_amd(b); + nir_if *if_triangles = nir_push_if(b, nir_ieq_imm(b, prim_mode, TESS_PRIMITIVE_TRIANGLES)); + { + hs_store_tess_factors_for_tessellator(b, st->gfx_level, TESS_PRIMITIVE_TRIANGLES, tessfactors); + } + nir_push_else(b, if_triangles); + { + nir_if *if_isolines = nir_push_if(b, nir_ieq_imm(b, prim_mode, TESS_PRIMITIVE_ISOLINES)); + { + hs_store_tess_factors_for_tessellator(b, st->gfx_level, TESS_PRIMITIVE_ISOLINES, tessfactors); + } + nir_push_else(b, if_isolines); + { + hs_store_tess_factors_for_tessellator(b, st->gfx_level, TESS_PRIMITIVE_QUADS, tessfactors); + } + nir_pop_if(b, if_isolines); + } + nir_pop_if(b, if_triangles); - if (st->tes_reads_tessfactors) + nir_if *if_tes_reads_tf = nir_push_if(b, nir_load_tcs_tess_levels_to_tes_amd(b)); + { hs_store_tess_factors_for_tes(b, tessfactors, st); + } + nir_pop_if(b, if_tes_reads_tf); } if (if_invocation_id_zero) { @@ -851,7 +886,6 @@ void ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, - bool tes_reads_tessfactors, uint64_t tes_inputs_read, uint64_t tes_patch_inputs_read, unsigned num_reserved_tcs_outputs, @@ -866,7 +900,6 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, lower_tess_io_state state = { .gfx_level = gfx_level, - .tes_reads_tessfactors = tes_reads_tessfactors, .tes_inputs_read = tes_inputs_read, .tes_patch_inputs_read = tes_patch_inputs_read, .tcs_num_reserved_outputs = num_reserved_tcs_outputs, diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index dafbc0a71b7..080a7673417 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -151,7 +151,7 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq); NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, device->physical_device->rad_info.gfx_level, - info->tcs.tes_reads_tess_factors, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, + info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, info->wave_size, false, false, !info->has_epilog, info->has_epilog); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 07ade6dc212..45a3dba6a9d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1823,8 +1823,6 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir, NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, si_map_io_driver_location, sel->screen->info.gfx_level, - /* Used by hs_emit_write_tess_factors() when monolithic shader. */ - key->ge.part.tcs.epilog.tes_reads_tess_factors, ~0ULL, ~0ULL, /* no TES inputs filter */ util_last_bit64(sel->info.outputs_written_before_tes_gs), util_last_bit64(sel->info.patch_outputs_written),