mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-22 10:40:22 +01:00
aco: don't modify exec in p_interp_gfx11
The RDNA3 ISA docs say that lds_param_load write the entire quad regardless of exec, so this isn't needed. fossil-db (gfx1100): Totals from 5291 (3.93% of 134574) affected shaders: Instrs: 4891396 -> 4789628 (-2.08%) CodeSize: 25519032 -> 25111960 (-1.60%) Latency: 36122982 -> 36074300 (-0.13%); split: -0.14%, +0.00% InvThroughput: 4162436 -> 4161424 (-0.02%); split: -0.02%, +0.00% Copies: 263862 -> 263838 (-0.01%) PreSGPRs: 225012 -> 224179 (-0.37%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21171>
This commit is contained in:
parent
6e4598f7b9
commit
b4383821e7
3 changed files with 6 additions and 15 deletions
|
|
@ -530,7 +530,7 @@ public:
|
|||
}
|
||||
<%
|
||||
import itertools
|
||||
formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3,6)]),
|
||||
formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3, 6), (1, 6)]),
|
||||
("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
|
||||
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
|
||||
|
|
|
|||
|
|
@ -5335,9 +5335,8 @@ emit_interp_instr_gfx11(isel_context* ctx, unsigned idx, unsigned component, Tem
|
|||
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
|
||||
Operand coord2_op(coord2);
|
||||
coord2_op.setLateKill(true); /* we re-use the destination reg in the middle */
|
||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), bld.def(bld.lm), bld.def(s1, scc),
|
||||
Operand(v1.as_linear()), Operand::c32(idx), Operand::c32(component), coord1,
|
||||
coord2_op, prim_mask_op);
|
||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), Operand(v1.as_linear()),
|
||||
Operand::c32(idx), Operand::c32(component), coord1, coord2_op, prim_mask_op);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5416,9 +5415,9 @@ emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component, unsig
|
|||
if (in_exec_divergent_or_in_loop(ctx)) {
|
||||
Operand prim_mask_op = bld.m0(prim_mask);
|
||||
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
|
||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), bld.def(bld.lm), bld.def(s1, scc),
|
||||
Operand(v1.as_linear()), Operand::c32(idx), Operand::c32(component),
|
||||
Operand::c32(dpp_ctrl), prim_mask_op);
|
||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), Operand(v1.as_linear()),
|
||||
Operand::c32(idx), Operand::c32(component), Operand::c32(dpp_ctrl),
|
||||
prim_mask_op);
|
||||
} else {
|
||||
Temp p =
|
||||
bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
|
||||
|
|
|
|||
|
|
@ -2507,15 +2507,11 @@ lower_to_hw_instr(Program* program)
|
|||
case aco_opcode::p_interp_gfx11: {
|
||||
assert(instr->definitions[0].regClass() == v1 ||
|
||||
instr->definitions[0].regClass() == v2b);
|
||||
assert(instr->definitions[1].regClass() == bld.lm);
|
||||
assert(instr->definitions[2].isFixed() && instr->definitions[2].physReg() == scc);
|
||||
assert(instr->operands[0].regClass() == v1.as_linear());
|
||||
assert(instr->operands[1].isConstant());
|
||||
assert(instr->operands[2].isConstant());
|
||||
assert(instr->operands.back().physReg() == m0);
|
||||
Definition dst = instr->definitions[0];
|
||||
PhysReg exec_tmp = instr->definitions[1].physReg();
|
||||
Definition clobber_scc = instr->definitions[2];
|
||||
PhysReg lin_vgpr = instr->operands[0].physReg();
|
||||
unsigned attribute = instr->operands[1].constantValue();
|
||||
unsigned component = instr->operands[2].constantValue();
|
||||
|
|
@ -2531,12 +2527,8 @@ lower_to_hw_instr(Program* program)
|
|||
dpp_ctrl = instr->operands[3].constantValue();
|
||||
}
|
||||
|
||||
bld.sop1(Builder::s_mov, Definition(exec_tmp, bld.lm), Operand(exec, bld.lm));
|
||||
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), clobber_scc,
|
||||
Operand(exec, bld.lm));
|
||||
bld.ldsdir(aco_opcode::lds_param_load, Definition(lin_vgpr, v1), Operand(m0, s1),
|
||||
attribute, component);
|
||||
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(exec_tmp, bld.lm));
|
||||
|
||||
Operand p(lin_vgpr, v1);
|
||||
Operand dst_op(dst.physReg(), v1);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue