mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 08:00:12 +01:00
aco: apply extract to p_extract_vector
fossil-db (navi21): Totals from 46 (0.06% of 79395) affected shaders: Instrs: 80126 -> 79944 (-0.23%); split: -0.27%, +0.04% CodeSize: 486860 -> 485668 (-0.24%); split: -0.31%, +0.06% Latency: 1615395 -> 1614218 (-0.07%); split: -0.07%, +0.00% InvThroughput: 705479 -> 705013 (-0.07%); split: -0.07%, +0.00% Copies: 18934 -> 18797 (-0.72%); split: -0.98%, +0.25% VALU: 52452 -> 52268 (-0.35%); split: -0.41%, +0.06% SALU: 17253 -> 17255 (+0.01%); split: -0.02%, +0.03% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31762>
This commit is contained in:
parent
6cb9d39bc2
commit
f1a932bc29
2 changed files with 160 additions and 5 deletions
|
|
@ -1084,14 +1084,15 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
} else if (sel.size() == 2 && ((instr->opcode == aco_opcode::s_pack_lh_b32_b16 && idx == 0) ||
|
||||
(instr->opcode == aco_opcode::s_pack_hl_b32_b16 && idx == 1))) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
} else if (instr->opcode == aco_opcode::p_extract ||
|
||||
instr->opcode == aco_opcode::p_extract_vector) {
|
||||
if (ctx.program->gfx_level < GFX9 && !info.instr->operands[0].isOfType(RegType::vgpr) &&
|
||||
instr->definitions[0].regClass().is_subdword())
|
||||
return false;
|
||||
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
return apply_extract_twice(sel, instr->operands[idx].getTemp(), instrSel,
|
||||
instr->definitions[0].getTemp()) != SubdwordSel();
|
||||
return instrSel && apply_extract_twice(sel, instr->operands[idx].getTemp(), instrSel,
|
||||
instr->definitions[0].getTemp());
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
@ -1175,6 +1176,29 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
instr->operands[2] = Operand::c32(new_sel.size() * 8u);
|
||||
instr->operands[3] = Operand::c32(new_sel.sign_extend());
|
||||
return;
|
||||
} else if (instr->opcode == aco_opcode::p_extract_vector) {
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
SubdwordSel new_sel = apply_extract_twice(sel, instr->operands[idx].getTemp(), instrSel,
|
||||
instr->definitions[0].getTemp());
|
||||
assert(new_sel.size() <= 2);
|
||||
|
||||
if (new_sel.size() == instr->definitions[0].bytes()) {
|
||||
instr->operands[1] = Operand::c32(new_sel.offset() / instr->definitions[0].bytes());
|
||||
return;
|
||||
} else {
|
||||
/* parse_extract() only succeeds with p_extract_vector for VGPR definitions because there
|
||||
* are no sub-dword SGPR regclasses. */
|
||||
assert(instr->definitions[0].regClass().type() != RegType::sgpr);
|
||||
|
||||
Instruction* ext = create_instruction(aco_opcode::p_extract, Format::PSEUDO, 4, 1);
|
||||
ext->definitions[0] = instr->definitions[0];
|
||||
ext->operands[0] = instr->operands[0];
|
||||
ext->operands[1] = Operand::c32(new_sel.offset() / new_sel.size());
|
||||
ext->operands[2] = Operand::c32(new_sel.size() * 8u);
|
||||
ext->operands[3] = Operand::c32(new_sel.sign_extend());
|
||||
ext->pass_flags = instr->pass_flags;
|
||||
instr.reset(ext);
|
||||
}
|
||||
}
|
||||
|
||||
/* These are the only labels worth keeping at the moment. */
|
||||
|
|
@ -3785,7 +3809,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (instr->isSDWA() || instr->isDPP())
|
||||
return;
|
||||
|
||||
if (instr->opcode == aco_opcode::p_extract) {
|
||||
if (instr->opcode == aco_opcode::p_extract || instr->opcode == aco_opcode::p_extract_vector) {
|
||||
ssa_info& info = ctx.info[instr->operands[0].tempId()];
|
||||
if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
|
||||
apply_extract(ctx, instr, 0, info);
|
||||
|
|
@ -3794,7 +3818,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->operands[0].setTemp(info.instr->operands[0].getTemp());
|
||||
}
|
||||
|
||||
apply_ds_extract(ctx, instr);
|
||||
if (instr->opcode == aco_opcode::p_extract)
|
||||
apply_ds_extract(ctx, instr);
|
||||
}
|
||||
|
||||
/* TODO: There are still some peephole optimizations that could be done:
|
||||
|
|
|
|||
|
|
@ -699,3 +699,133 @@ BEGIN_TEST(optimize.sdwa.subdword_extract)
|
|||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.sdwa.extract_vector)
|
||||
//>> v1: %a = p_startpgm
|
||||
if (!setup_cs("v1", GFX10_3))
|
||||
return;
|
||||
|
||||
Temp a = inputs[0];
|
||||
|
||||
//! v1b: %res0 = p_extract_vector %a, 0
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(
|
||||
0, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ubyte(a, 0), Operand::c32(0)));
|
||||
|
||||
//! v1b: %res1 = p_extract_vector %a, 1
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(
|
||||
1, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ubyte(a, 1), Operand::c32(0)));
|
||||
|
||||
//! v1b: %res2 = p_extract_vector %a, 2
|
||||
//! p_unit_test 2, %res2
|
||||
writeout(
|
||||
2, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ubyte(a, 2), Operand::c32(0)));
|
||||
|
||||
//! v1b: %res3 = p_extract_vector %a, 3
|
||||
//! p_unit_test 3, %res3
|
||||
writeout(
|
||||
3, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ubyte(a, 3), Operand::c32(0)));
|
||||
|
||||
//! v1b: %res4 = p_extract_vector %a, 0
|
||||
//! p_unit_test 4, %res4
|
||||
writeout(
|
||||
4, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ushort(a, 0), Operand::c32(0)));
|
||||
|
||||
//! v1b: %res5 = p_extract_vector %a, 2
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(
|
||||
5, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ushort(a, 1), Operand::c32(0)));
|
||||
|
||||
//! v1b: %res6 = p_extract_vector %a, 1
|
||||
//! p_unit_test 6, %res6
|
||||
writeout(
|
||||
6, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ushort(a, 0), Operand::c32(1)));
|
||||
|
||||
//! v1b: %res7 = p_extract_vector %a, 3
|
||||
//! p_unit_test 7, %res7
|
||||
writeout(
|
||||
7, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ushort(a, 1), Operand::c32(1)));
|
||||
|
||||
//! v1: %res8_tmp = p_extract %a, 0, 8, 0
|
||||
//! v1b: %res8 = p_extract_vector %res8_tmp, 1
|
||||
//! p_unit_test 8, %res8
|
||||
writeout(
|
||||
8, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ubyte(a, 0), Operand::c32(1)));
|
||||
|
||||
//! v1: %res9_tmp = p_extract %a, 0, 16, 0
|
||||
//! v1b: %res9 = p_extract_vector %res9_tmp, 2
|
||||
//! p_unit_test 9, %res9
|
||||
writeout(
|
||||
9, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ushort(a, 0), Operand::c32(2)));
|
||||
|
||||
//! v1: %res10_tmp = p_extract %a, 1, 16, 0
|
||||
//! v1b: %res10 = p_extract_vector %res10_tmp, 2
|
||||
//! p_unit_test 10, %res10
|
||||
writeout(10, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ushort(a, 1),
|
||||
Operand::c32(2)));
|
||||
|
||||
//! v1: %res11_tmp = p_extract %a, 1, 8, 0
|
||||
//! v1b: %res11 = p_extract_vector %res11_tmp, 2
|
||||
//! p_unit_test 11, %res11
|
||||
writeout(
|
||||
11, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), ext_ubyte(a, 1), Operand::c32(2)));
|
||||
|
||||
//! v2b: %res12 = p_extract %a, 0, 8, 0
|
||||
//! p_unit_test 12, %res12
|
||||
writeout(
|
||||
12, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ubyte(a, 0), Operand::c32(0)));
|
||||
|
||||
//! v2b: %res13 = p_extract %a, 1, 8, 0
|
||||
//! p_unit_test 13, %res13
|
||||
writeout(
|
||||
13, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ubyte(a, 1), Operand::c32(0)));
|
||||
|
||||
//! v2b: %res14 = p_extract %a, 2, 8, 0
|
||||
//! p_unit_test 14, %res14
|
||||
writeout(
|
||||
14, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ubyte(a, 2), Operand::c32(0)));
|
||||
|
||||
//! v2b: %res15 = p_extract %a, 3, 8, 0
|
||||
//! p_unit_test 15, %res15
|
||||
writeout(
|
||||
15, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ubyte(a, 3), Operand::c32(0)));
|
||||
|
||||
//! v2b: %res16 = p_extract_vector %a, 0
|
||||
//! p_unit_test 16, %res16
|
||||
writeout(16, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ushort(a, 0),
|
||||
Operand::c32(0)));
|
||||
|
||||
//! v2b: %res17 = p_extract_vector %a, 1
|
||||
//! p_unit_test 17, %res17
|
||||
writeout(17, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ushort(a, 1),
|
||||
Operand::c32(0)));
|
||||
|
||||
//! v1: %res18_tmp = p_extract %a, 0, 8, 0
|
||||
//! v2b: %res18 = p_extract_vector %res18_tmp, 1
|
||||
//! p_unit_test 18, %res18
|
||||
writeout(
|
||||
18, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ubyte(a, 0), Operand::c32(1)));
|
||||
|
||||
//! v1: %res19_tmp = p_extract %a, 0, 16, 0
|
||||
//! v2b: %res19 = p_extract_vector %res19_tmp, 1
|
||||
//! p_unit_test 19, %res19
|
||||
writeout(19, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), ext_ushort(a, 0),
|
||||
Operand::c32(1)));
|
||||
|
||||
//! v1b: %res20 = p_extract_vector %a, 2
|
||||
//! p_unit_test 20, %res20
|
||||
writeout(20, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b),
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v2b), a, Operand::c32(1),
|
||||
Operand::c32(16), Operand::c32(false)),
|
||||
Operand::c32(0)));
|
||||
|
||||
//! v1b: %res21 = p_extract_vector %a, 3
|
||||
//! p_unit_test 21, %res21
|
||||
writeout(21, bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b),
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v2b), a, Operand::c32(1),
|
||||
Operand::c32(16), Operand::c32(false)),
|
||||
Operand::c32(1)));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue