From 45ccd6487f6197ec681d03c193033efd5f275726 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 23 Jan 2025 10:35:51 +0100 Subject: [PATCH] aco/sched_ilp: only remove WaW/WaR for inter clause dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Direct RaW shouldn't be removed, because the clause is split by a wait anyway. Foz-DB Navi21: Totals from 52 (0.07% of 79206) affected shaders: Instrs: 1603523 -> 1603485 (-0.00%); split: -0.00%, +0.00% CodeSize: 8223048 -> 8222788 (-0.00%) Latency: 9741674 -> 9738884 (-0.03%); split: -0.03%, +0.00% InvThroughput: 2322621 -> 2322010 (-0.03%); split: -0.03%, +0.00% SClause: 31325 -> 31320 (-0.02%); split: -0.02%, +0.01% Reviewed-by: Daniel Schürmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_scheduler_ilp.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp index 3e11eaa82f5..1adfd43a47a 100644 --- a/src/amd/compiler/aco_scheduler_ilp.cpp +++ b/src/amd/compiler/aco_scheduler_ilp.cpp @@ -388,12 +388,13 @@ add_entry(SchedILPContext& ctx, Instruction* const instr, const uint32_t idx) } } + mask_t write_dep_mask = 0; for (const Definition& def : instr->definitions) { for (unsigned i = 0; i < def.size(); i++) { RegisterInfo& reg_info = ctx.regs[def.physReg().reg() + i]; /* Add all previous register reads and writes to the dependencies. */ - entry.dependency_mask |= reg_info.read_mask; + write_dep_mask |= reg_info.read_mask; reg_info.read_mask = mask; /* This register write is a direct dependency for all following reads. */ @@ -423,19 +424,23 @@ add_entry(SchedILPContext& ctx, Instruction* const instr, const uint32_t idx) if (!is_memory_instr(instr) || instr->definitions.empty() || get_sync_info(instr).semantics & semantic_volatile || ctx.is_vopd) { /* Add all previous instructions as dependencies. */ - entry.dependency_mask = ctx.active_mask; + entry.dependency_mask = ctx.active_mask & ~ctx.non_reorder_mask; } /* Remove non-reorderable instructions from dependencies, since WaR dependencies can interfere * with clause formation. This should be fine, since these are always scheduled in-order and * any cases that are actually a concern for clause formation are added as transitive * dependencies. */ - entry.dependency_mask &= ~ctx.non_reorder_mask; + write_dep_mask &= ~ctx.non_reorder_mask; + /* Ignore RaW for VINTERP. */ + if (instr->isVINTRP()) + entry.dependency_mask &= ~ctx.non_reorder_mask; ctx.potential_partial_clause = true; } else if (ctx.last_non_reorderable != UINT8_MAX) { ctx.potential_partial_clause = false; } + entry.dependency_mask |= write_dep_mask; entry.dependency_mask &= ~mask; for (unsigned i = 0; i < num_nodes; i++) {