mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
aco: handle SGPR limitations when applying extract
We were already doing this, but missing it in a few places. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31762>
This commit is contained in:
parent
07e28dad75
commit
d3ac69f79b
2 changed files with 31 additions and 1 deletions
|
|
@ -1057,6 +1057,10 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
(instr->opcode == aco_opcode::s_pack_hl_b32_b16 && idx == 1))) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
if (ctx.program->gfx_level < GFX9 && !info.instr->operands[0].isOfType(RegType::vgpr) &&
|
||||
instr->definitions[0].regClass().is_subdword())
|
||||
return false;
|
||||
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
|
||||
/* the outer offset must be within extracted range */
|
||||
|
|
@ -1088,7 +1092,7 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
|
||||
ctx.info[tmp.id()].label &= ~label_insert;
|
||||
|
||||
if (sel.size() == 4) {
|
||||
if (sel.size() == 4 && tmp.type() == instr->operands[idx].regClass().type()) {
|
||||
/* full dword selection */
|
||||
} else if ((instr->opcode == aco_opcode::v_cvt_f32_u32 ||
|
||||
instr->opcode == aco_opcode::v_cvt_f32_i32) &&
|
||||
|
|
@ -1104,6 +1108,8 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) ||
|
||||
(sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
|
||||
/* The undesirable upper bits are already shifted out. */
|
||||
if (!instr->isVOP3() && !info.instr->operands[0].isOfType(RegType::vgpr))
|
||||
instr->format = asVOP3(instr->format);
|
||||
return;
|
||||
} else if (instr->opcode == aco_opcode::v_mul_u32_u24 && ctx.program->gfx_level >= GFX10 &&
|
||||
!instr->usesModifiers() && sel.size() == 2 && !sel.sign_extend() &&
|
||||
|
|
|
|||
|
|
@ -634,3 +634,27 @@ BEGIN_TEST(optimize.sdwa.special_case_valu)
|
|||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.sdwa.extract_sgpr_limits)
|
||||
//>> s1: %a = p_startpgm
|
||||
if (!setup_cs("s1", GFX8))
|
||||
return;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp a_vgpr = bld.copy(bld.def(v1), a);
|
||||
|
||||
/* The optimizer should make this VOP3 */
|
||||
//! v1: %res0 = v_lshlrev_b32 16, %a
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(
|
||||
0, bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16), ext_ushort(a_vgpr, 0)));
|
||||
|
||||
/* Unsupported on GFX8 */
|
||||
//! v1: %res1_tmp = p_extract %a, 0, 16, 0
|
||||
//! v1b: %res1 = p_extract %res1_tmp, 0, 8, 0
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, bld.pseudo(aco_opcode::p_extract, bld.def(v1b), ext_ushort(a_vgpr, 0),
|
||||
Operand::c32(0), Operand::c32(8), Operand::c32(false)));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue