aco: calculate all p_as_uniform and v_readfirstlane_b32 sources in WQM

We should avoid a situation where a v_readfirstlane_b32 is in WQM but it's
source is calculated in Exact.

Fixes hang when running Assassin's Creed: Valhalla benchmark.

fossil-db (GFX10.3):
Totals from 1021 (0.70% of 146267) affected shaders:
CodeSize: 7835228 -> 7842992 (+0.10%); split: -0.00%, +0.10%
Instrs: 1519208 -> 1521149 (+0.13%); split: -0.00%, +0.13%
SClause: 78921 -> 78920 (-0.00%)
Copies: 44456 -> 45421 (+2.17%); split: -0.05%, +2.22%
Branches: 12987 -> 13933 (+7.28%)
PreSGPRs: 47599 -> 47813 (+0.45%)
Cycles: 10037540 -> 10045304 (+0.08%); split: -0.00%, +0.08%
VMEM: 538381 -> 538777 (+0.07%); split: +0.11%, -0.03%
SMEM: 84553 -> 84554 (+0.00%); split: +0.01%, -0.01%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9557>
This commit is contained in:
Rhys Perry 2021-02-25 15:37:17 +00:00 committed by Marge Bot
parent 5bd75e6835
commit b7d1f00d3f
2 changed files with 8 additions and 3 deletions

View file

@ -214,7 +214,8 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block)
aco_ptr<Instruction>& instr = block->instructions[i];
WQMState needs = needs_exact(instr) ? Exact : Unspecified;
bool propagate_wqm = instr->opcode == aco_opcode::p_wqm;
bool propagate_wqm = instr->opcode == aco_opcode::p_wqm ||
instr->opcode == aco_opcode::p_as_uniform;
bool preserve_wqm = instr->opcode == aco_opcode::p_discard_if;
bool pred_by_exec = pred_by_exec_mask(instr);
for (const Definition& definition : instr->definitions) {

View file

@ -713,8 +713,10 @@ Temp convert_pointer_to_64_bit(isel_context *ctx, Temp ptr)
if (ptr.size() == 2)
return ptr;
Builder bld(ctx->program, ctx->block);
if (ptr.type() == RegType::vgpr)
if (ptr.type() == RegType::vgpr) {
ptr = bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), ptr);
ptr = emit_wqm(ctx, ptr);
}
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
ptr, Operand((unsigned)ctx->options->address32_hi));
}
@ -5498,8 +5500,10 @@ Temp get_sampler_desc(isel_context *ctx, nir_deref_instr *deref_instr,
constant_index += array_size * const_value->u32;
} else {
Temp indirect = get_ssa_temp(ctx, deref_instr->arr.index.ssa);
if (indirect.type() == RegType::vgpr)
if (indirect.type() == RegType::vgpr) {
indirect = bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), indirect);
indirect = emit_wqm(ctx, indirect);
}
if (array_size != 1)
indirect = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(array_size), indirect);