mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 09:00:10 +01:00
aco: fix p_interp_gfx11 to not overwrite SCC
s_wqm_b64 clobbers SCC.
Found this while working on dual source blending.
Fixes: 6113ee650a ("aco/gfx11: fix FS input loads in quad-divergent control flow")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19747>
This commit is contained in:
parent
3012e85e36
commit
369c9b6425
4 changed files with 8 additions and 5 deletions
|
|
@ -522,7 +522,7 @@ public:
|
||||||
}
|
}
|
||||||
<%
|
<%
|
||||||
import itertools
|
import itertools
|
||||||
formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6)]),
|
formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3,6)]),
|
||||||
("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
|
("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
|
||||||
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
|
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
|
||||||
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
|
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
|
||||||
|
|
|
||||||
|
|
@ -5329,7 +5329,7 @@ emit_interp_instr_gfx11(isel_context* ctx, unsigned idx, unsigned component, Tem
|
||||||
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
|
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
|
||||||
Operand coord2_op(coord2);
|
Operand coord2_op(coord2);
|
||||||
coord2_op.setLateKill(true); /* we re-use the destination reg in the middle */
|
coord2_op.setLateKill(true); /* we re-use the destination reg in the middle */
|
||||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), bld.def(bld.lm),
|
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), bld.def(bld.lm), bld.def(s1, scc),
|
||||||
Operand(v1.as_linear()), Operand::c32(idx), Operand::c32(component), coord1,
|
Operand(v1.as_linear()), Operand::c32(idx), Operand::c32(component), coord1,
|
||||||
coord2_op, prim_mask_op);
|
coord2_op, prim_mask_op);
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
|
|
@ -2384,12 +2384,14 @@ lower_to_hw_instr(Program* program)
|
||||||
assert(instr->definitions[0].regClass() == v1 ||
|
assert(instr->definitions[0].regClass() == v1 ||
|
||||||
instr->definitions[0].regClass() == v2b);
|
instr->definitions[0].regClass() == v2b);
|
||||||
assert(instr->definitions[1].regClass() == bld.lm);
|
assert(instr->definitions[1].regClass() == bld.lm);
|
||||||
|
assert(instr->definitions[2].isFixed() && instr->definitions[2].physReg() == scc);
|
||||||
assert(instr->operands[0].regClass() == v1.as_linear());
|
assert(instr->operands[0].regClass() == v1.as_linear());
|
||||||
assert(instr->operands[1].isConstant());
|
assert(instr->operands[1].isConstant());
|
||||||
assert(instr->operands[2].isConstant());
|
assert(instr->operands[2].isConstant());
|
||||||
assert(instr->operands.back().physReg() == m0);
|
assert(instr->operands.back().physReg() == m0);
|
||||||
Definition dst = instr->definitions[0];
|
Definition dst = instr->definitions[0];
|
||||||
PhysReg exec_tmp = instr->definitions[1].physReg();
|
PhysReg exec_tmp = instr->definitions[1].physReg();
|
||||||
|
Definition clobber_scc = instr->definitions[2];
|
||||||
PhysReg lin_vgpr = instr->operands[0].physReg();
|
PhysReg lin_vgpr = instr->operands[0].physReg();
|
||||||
unsigned attribute = instr->operands[1].constantValue();
|
unsigned attribute = instr->operands[1].constantValue();
|
||||||
unsigned component = instr->operands[2].constantValue();
|
unsigned component = instr->operands[2].constantValue();
|
||||||
|
|
@ -2406,7 +2408,8 @@ lower_to_hw_instr(Program* program)
|
||||||
}
|
}
|
||||||
|
|
||||||
bld.sop1(Builder::s_mov, Definition(exec_tmp, bld.lm), Operand(exec, bld.lm));
|
bld.sop1(Builder::s_mov, Definition(exec_tmp, bld.lm), Operand(exec, bld.lm));
|
||||||
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), Operand(exec, bld.lm));
|
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), clobber_scc,
|
||||||
|
Operand(exec, bld.lm));
|
||||||
bld.ldsdir(aco_opcode::lds_param_load, Definition(lin_vgpr, v1), Operand(m0, s1),
|
bld.ldsdir(aco_opcode::lds_param_load, Definition(lin_vgpr, v1), Operand(m0, s1),
|
||||||
attribute, component);
|
attribute, component);
|
||||||
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(exec_tmp, bld.lm));
|
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(exec_tmp, bld.lm));
|
||||||
|
|
|
||||||
|
|
@ -336,8 +336,8 @@ opcode("p_init_scratch")
|
||||||
opcode("p_jump_to_epilog")
|
opcode("p_jump_to_epilog")
|
||||||
|
|
||||||
# loads and interpolates a fragment shader input with a correct exec mask
|
# loads and interpolates a fragment shader input with a correct exec mask
|
||||||
#dst0=result, dst1=exec_tmp, src0=linear_vgpr, src1=attribute, src2=component, src3=coord1, src4=coord2, src5=m0
|
#dst0=result, dst1=exec_tmp, dst2=clobber_scc, src0=linear_vgpr, src1=attribute, src2=component, src3=coord1, src4=coord2, src5=m0
|
||||||
#dst0=result, dst1=exec_tmp, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0
|
#dst0=result, dst1=exec_tmp, dst2=clobber_scc, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0
|
||||||
opcode("p_interp_gfx11")
|
opcode("p_interp_gfx11")
|
||||||
|
|
||||||
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
|
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue