mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 01:30:08 +01:00
aco/sched_ilp: reorder VINTRP
VINTRP(gfx6-gfx10.3) is mostly just VALU, but we treated it like memory instructions as an afterthought. This had issues as VINTRP was never reordered with itself, or other memory instructions. Reordering VINTRP in clauses increases ILP. We don't really need collect_clause_dependencies for VINTRP either, because they ususally have the same dependencies already. That means we can still form VINTRP clauses by selecting preferably VINTRP after a previous one. Foz-DB Navi21: Totals from 34184 (43.16% of 79206) affected shaders: Instrs: 18811270 -> 18812046 (+0.00%); split: -0.01%, +0.02% CodeSize: 103627276 -> 103630056 (+0.00%); split: -0.01%, +0.01% Latency: 188379364 -> 187936731 (-0.23%); split: -0.27%, +0.03% InvThroughput: 42600163 -> 42590608 (-0.02%); split: -0.03%, +0.00% VClause: 378960 -> 378912 (-0.01%); split: -0.02%, +0.00% SClause: 727560 -> 720573 (-0.96%); split: -1.08%, +0.12% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Daniel Schürmann <None> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33111>
This commit is contained in:
parent
45ccd6487f
commit
df1de388a3
2 changed files with 14 additions and 7 deletions
|
|
@ -80,7 +80,7 @@ struct SchedILPContext {
|
|||
bool
|
||||
can_reorder(const Instruction* const instr)
|
||||
{
|
||||
if (instr->isVALU())
|
||||
if (instr->isVALU() || instr->isVINTRP())
|
||||
return true;
|
||||
if (!instr->isSALU() || instr->isSOPP())
|
||||
return false;
|
||||
|
|
@ -285,7 +285,7 @@ unsigned
|
|||
get_latency(const Instruction* const instr)
|
||||
{
|
||||
/* Note, that these are not accurate latency estimations. */
|
||||
if (instr->isVALU())
|
||||
if (instr->isVALU() || instr->isVINTRP())
|
||||
return 5;
|
||||
if (instr->isSALU())
|
||||
return 2;
|
||||
|
|
@ -432,9 +432,6 @@ add_entry(SchedILPContext& ctx, Instruction* const instr, const uint32_t idx)
|
|||
* any cases that are actually a concern for clause formation are added as transitive
|
||||
* dependencies. */
|
||||
write_dep_mask &= ~ctx.non_reorder_mask;
|
||||
/* Ignore RaW for VINTERP. */
|
||||
if (instr->isVINTRP())
|
||||
entry.dependency_mask &= ~ctx.non_reorder_mask;
|
||||
ctx.potential_partial_clause = true;
|
||||
} else if (ctx.last_non_reorderable != UINT8_MAX) {
|
||||
ctx.potential_partial_clause = false;
|
||||
|
|
@ -558,8 +555,14 @@ select_instruction_ilp(const SchedILPContext& ctx)
|
|||
mask = collect_clause_dependencies(ctx, ctx.next_non_reorderable, 0);
|
||||
}
|
||||
|
||||
/* VINTRP(gfx6-10.3) can be handled like alu, but switching between VINTRP and other
|
||||
* alu has a cost. So if the previous instr was VINTRP, try to keep selecting VINTRP.
|
||||
*/
|
||||
bool prefer_vintrp = ctx.prev_info.instr && ctx.prev_info.instr->isVINTRP();
|
||||
|
||||
/* Select the instruction with highest priority of all candidates. */
|
||||
unsigned idx = -1u;
|
||||
bool idx_vintrp = false;
|
||||
int32_t priority = INT32_MIN;
|
||||
u_foreach_bit (i, mask) {
|
||||
const InstrInfo& candidate = ctx.nodes[i];
|
||||
|
|
@ -568,8 +571,12 @@ select_instruction_ilp(const SchedILPContext& ctx)
|
|||
if (candidate.dependency_mask)
|
||||
continue;
|
||||
|
||||
if (idx == -1u || candidate.priority > priority) {
|
||||
bool is_vintrp = prefer_vintrp && candidate.instr->isVINTRP();
|
||||
|
||||
if (idx == -1u || (is_vintrp && !idx_vintrp) ||
|
||||
(is_vintrp == idx_vintrp && candidate.priority > priority)) {
|
||||
idx = i;
|
||||
idx_vintrp = is_vintrp;
|
||||
priority = candidate.priority;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -378,8 +378,8 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
|
|||
|
||||
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
||||
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
||||
//>> BB1:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue