mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
aco: implement linear vgpr copies
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12172>
This commit is contained in:
parent
b1e4794f0f
commit
8d50385bbd
3 changed files with 116 additions and 2 deletions
|
|
@ -1113,6 +1113,54 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
copy_linear_vgpr(Builder& bld, Definition def, Operand op, bool preserve_scc, PhysReg scratch_sgpr)
|
||||
{
|
||||
if (preserve_scc)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(scratch_sgpr, s1), Operand(scc, s1));
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (def.size() == 2)
|
||||
bld.vop3(aco_opcode::v_lshrrev_b64, def, Operand::zero(), op);
|
||||
else
|
||||
bld.vop1(aco_opcode::v_mov_b32, def, op);
|
||||
|
||||
bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1),
|
||||
Operand(exec, bld.lm));
|
||||
}
|
||||
|
||||
if (preserve_scc)
|
||||
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(scratch_sgpr, s1),
|
||||
Operand::zero());
|
||||
}
|
||||
|
||||
void
|
||||
swap_linear_vgpr(Builder& bld, Definition def, Operand op, bool preserve_scc, PhysReg scratch_sgpr)
|
||||
{
|
||||
if (preserve_scc)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(scratch_sgpr, s1), Operand(scc, s1));
|
||||
|
||||
Operand def_as_op = Operand(def.physReg(), def.regClass());
|
||||
Definition op_as_def = Definition(op.physReg(), op.regClass());
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (bld.program->chip_class >= GFX9) {
|
||||
bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op);
|
||||
} else {
|
||||
bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);
|
||||
bld.vop2(aco_opcode::v_xor_b32, def, op, def_as_op);
|
||||
bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);
|
||||
}
|
||||
|
||||
bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1),
|
||||
Operand(exec, bld.lm));
|
||||
}
|
||||
|
||||
if (preserve_scc)
|
||||
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(scratch_sgpr, s1),
|
||||
Operand::zero());
|
||||
}
|
||||
|
||||
bool
|
||||
do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* preserve_scc,
|
||||
PhysReg scratch_sgpr)
|
||||
|
|
@ -1133,6 +1181,8 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres
|
|||
*preserve_scc = true;
|
||||
} else if (op.isConstant()) {
|
||||
copy_constant(ctx, bld, def, op);
|
||||
} else if (def.regClass().is_linear_vgpr()) {
|
||||
copy_linear_vgpr(bld, def, op, *preserve_scc, scratch_sgpr);
|
||||
} else if (def.regClass() == v1) {
|
||||
bld.vop1(aco_opcode::v_mov_b32, def, op);
|
||||
} else if (def.regClass() == v2) {
|
||||
|
|
@ -1232,7 +1282,9 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese
|
|||
assert(op.regClass() == def.regClass());
|
||||
Operand def_as_op = Operand(def.physReg(), def.regClass());
|
||||
Definition op_as_def = Definition(op.physReg(), op.regClass());
|
||||
if (ctx->program->chip_class >= GFX9 && def.regClass() == v1) {
|
||||
if (def.regClass().is_linear_vgpr()) {
|
||||
swap_linear_vgpr(bld, def, op, preserve_scc, pi->scratch_sgpr);
|
||||
} else if (ctx->program->chip_class >= GFX9 && def.regClass() == v1) {
|
||||
bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op);
|
||||
} else if (def.regClass() == v1) {
|
||||
assert(def.physReg().byte() == 0 && op.physReg().byte() == 0);
|
||||
|
|
|
|||
|
|
@ -422,12 +422,20 @@ validate_ir(Program* program)
|
|||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
|
||||
"Operand and Definition size must match", instr.get());
|
||||
if (instr->operands[i].isTemp())
|
||||
if (instr->operands[i].isTemp()) {
|
||||
check((instr->definitions[i].getTemp().type() ==
|
||||
instr->operands[i].regClass().type()) ||
|
||||
(instr->definitions[i].getTemp().type() == RegType::vgpr &&
|
||||
instr->operands[i].regClass().type() == RegType::sgpr),
|
||||
"Operand and Definition types do not match", instr.get());
|
||||
check(instr->definitions[i].regClass().is_linear_vgpr() ==
|
||||
instr->operands[i].regClass().is_linear_vgpr(),
|
||||
"Operand and Definition types do not match", instr.get());
|
||||
} else {
|
||||
check(!instr->definitions[i].regClass().is_linear_vgpr(),
|
||||
"Can only copy linear VGPRs into linear VGPRs, not constant/undef",
|
||||
instr.get());
|
||||
}
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_phi) {
|
||||
check(instr->operands.size() == block.logical_preds.size(),
|
||||
|
|
|
|||
|
|
@ -668,3 +668,57 @@ BEGIN_TEST(to_hw_instr.insert)
|
|||
//! s_endpgm
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
|
||||
if (!setup_cs(NULL, GFX10))
|
||||
return;
|
||||
|
||||
PhysReg reg_s0{0};
|
||||
PhysReg reg_s1{1};
|
||||
PhysReg v0_lo{256};
|
||||
PhysReg v0_b3{256};
|
||||
v0_b3.reg_b += 3;
|
||||
PhysReg v1_lo{257};
|
||||
|
||||
//>> p_unit_test 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
|
||||
/* It would be better if the scc=s0 copy was done later, but handle_operands() is complex
|
||||
* enough
|
||||
*/
|
||||
|
||||
//! s1: %0:scc = s_cmp_lg_i32 %0:s[0], 0
|
||||
//! s1: %0:m0 = s_mov_b32 %0:scc
|
||||
//! lv1: %0:v[0] = v_mov_b32 %0:v[1]
|
||||
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
|
||||
//! lv1: %0:v[0] = v_mov_b32 %0:v[1]
|
||||
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
|
||||
//! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
|
||||
Instruction *instr = bld.pseudo(
|
||||
aco_opcode::p_parallelcopy,
|
||||
Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
|
||||
Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
|
||||
instr->pseudo().scratch_sgpr = m0;
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
|
||||
if (!setup_cs(NULL, GFX10))
|
||||
return;
|
||||
|
||||
PhysReg reg_v0{256};
|
||||
PhysReg reg_v1{257};
|
||||
RegClass v1_linear = v1.as_linear();
|
||||
|
||||
//>> p_unit_test 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
|
||||
Instruction *instr = bld.pseudo(
|
||||
aco_opcode::p_parallelcopy,
|
||||
Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear),
|
||||
Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
|
||||
instr->pseudo().scratch_sgpr = m0;
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue