From fb6184f89c440e666281feff85de7fb225fba0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 1 Oct 2024 16:05:12 -0400 Subject: [PATCH] nir: add shader_info::tess::tcs_same_invocation_inputs_read(_indirect) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need both the same-invocation usage mask and cross-invocation usage mask. The AMD reason is below. Cross-invocation TCS input access doesn't prevent the same-invocation fast path in AMD hw because it's just a different way to load the same data, and we want to use both paths for the same TCS input based on the load instruction. The fast path can't be used for indirect access, which is gathered separately for same-invocation access. Reviewed-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/compiler/nir/nir_gather_info.c | 18 +++++++++++++----- src/compiler/nir/nir_print.c | 2 ++ src/compiler/shader_info.h | 8 ++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 27db703ee86..a5932cf3b30 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -147,8 +147,12 @@ set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len, shader->info.inputs_read_indirectly |= bitfield; } - if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL) - shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield; + if (shader->info.stage == MESA_SHADER_TESS_CTRL) { + if (cross_invocation) + shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield; + else + shader->info.tess.tcs_same_invocation_inputs_read |= bitfield; + } if (shader->info.stage == MESA_SHADER_FRAGMENT) { shader->info.fs.uses_sample_qualifier |= var->data.sample; @@ -564,9 +568,12 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, } if (shader->info.stage == MESA_SHADER_TESS_CTRL && - instr->intrinsic == nir_intrinsic_load_per_vertex_input && - !src_is_invocation_id(nir_get_io_arrayed_index_src(instr))) - shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask; + instr->intrinsic == nir_intrinsic_load_per_vertex_input) { + if (src_is_invocation_id(nir_get_io_arrayed_index_src(instr))) + shader->info.tess.tcs_same_invocation_inputs_read |= slot_mask; + else + shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask; + } break; case nir_intrinsic_load_output: @@ -1022,6 +1029,7 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) shader->info.fs.needs_quad_helper_invocations = false; } if (shader->info.stage == MESA_SHADER_TESS_CTRL) { + shader->info.tess.tcs_same_invocation_inputs_read = 0; shader->info.tess.tcs_cross_invocation_inputs_read = 0; shader->info.tess.tcs_cross_invocation_outputs_read = 0; } diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 45f9347823f..41e02e5fa03 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -2587,6 +2587,8 @@ print_shader_info(const struct shader_info *info, FILE *fp) print_nz_bool(fp, "ccw", info->tess.ccw); print_nz_bool(fp, "point_mode", info->tess.point_mode); + print_nz_x64(fp, "tcs_same_invocation_inputs_read", + info->tess.tcs_same_invocation_inputs_read); print_nz_x64(fp, "tcs_cross_invocation_inputs_read", info->tess.tcs_cross_invocation_inputs_read); print_nz_x64(fp, "tcs_cross_invocation_outputs_read", info->tess.tcs_cross_invocation_outputs_read); break; diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index 2289bb23654..b1ddcce99bd 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -490,6 +490,14 @@ typedef struct shader_info { bool ccw:1; bool point_mode:1; + /* Bit mask of TCS per-vertex inputs (VS outputs) that are used + * with a vertex index that is equal to the invocation id. + * + * Not mutually exclusive with tcs_cross_invocation_inputs_read, i.e. + * both input[0] and input[invocation_id] can be present. + */ + uint64_t tcs_same_invocation_inputs_read; + /* Bit mask of TCS per-vertex inputs (VS outputs) that are used * with a vertex index that is NOT the invocation id */