mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
aco/optimizer_postRA: don't combine DPP across exec on GFX8/9
GFX8/9 seem to use FI=0 behaviour. fossil-db (vega10): Totals from 1 (0.00% of 63053) affected shaders: Instrs: 542 -> 570 (+5.17%) CodeSize: 2928 -> 3040 (+3.83%) Latency: 2087 -> 2118 (+1.49%) InvThroughput: 1103 -> 1143 (+3.63%) Affected shader is from Cyberpunk 2077 fossil. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Cc: 23.2 <mesa-stable> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9784 (cherry picked from commit e64f895e08f39e0f2c42df1f2aac9f92f94cefd1) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25479>
This commit is contained in:
parent
444dda215e
commit
3aa5e03ca1
3 changed files with 34 additions and 1 deletions
|
|
@ -858,7 +858,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= dpp.abs[0] << 21;
|
||||
encoding |= dpp.neg[0] << 20;
|
||||
if (ctx.gfx_level >= GFX10)
|
||||
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
|
||||
encoding |= 1 << 18; /* set Fetch Inactive */
|
||||
encoding |= dpp.bound_ctrl << 19;
|
||||
encoding |= dpp.dpp_ctrl << 8;
|
||||
encoding |= reg(ctx, dpp_op, 8);
|
||||
|
|
|
|||
|
|
@ -510,6 +510,11 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (is_overwritten_since(ctx, mov->operands[0], op_instr_idx))
|
||||
continue;
|
||||
|
||||
/* GFX8/9 don't have fetch-inactive. */
|
||||
if (ctx.program->gfx_level < GFX10 &&
|
||||
is_overwritten_since(ctx, Operand(exec, ctx.program->lane_mask), op_instr_idx))
|
||||
continue;
|
||||
|
||||
if (i && !can_swap_operands(instr, &instr->opcode))
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -468,6 +468,34 @@ BEGIN_TEST(optimizer_postRA.dpp)
|
|||
finish_optimizer_postRA_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.dpp_across_exec)
|
||||
for (amd_gfx_level gfx : {GFX9, GFX10}) {
|
||||
//>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
|
||||
if (!setup_cs("v1 v1", gfx))
|
||||
continue;
|
||||
|
||||
bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
|
||||
bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
|
||||
|
||||
PhysReg reg_v2(258);
|
||||
Operand a(inputs[0], PhysReg(256));
|
||||
Operand b(inputs[1], PhysReg(257));
|
||||
|
||||
//~gfx9! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
|
||||
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
|
||||
//~gfx9! v1: %res0:v[2] = v_add_f32 %tmp0:v[2], %b:v[1]
|
||||
//~gfx10! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
|
||||
//! p_unit_test 0, %res0:v[2]
|
||||
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1),
|
||||
Operand(exec, bld.lm));
|
||||
Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
|
||||
writeout(0, Operand(res0, reg_v2));
|
||||
|
||||
finish_optimizer_postRA_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.dpp_across_cf)
|
||||
//>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1] = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1 v1 s2", GFX10_3))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue