mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 02:40:11 +01:00
aco: combine extracts into s_pack_ll_b32_b16
fossil-db (navi21): Totals from 3 (0.00% of 79395) affected shaders: Instrs: 45941 -> 45924 (-0.04%) CodeSize: 241768 -> 241756 (-0.00%) Latency: 176501 -> 176491 (-0.01%) Copies: 6884 -> 6882 (-0.03%) SALU: 6101 -> 6088 (-0.21%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29912>
This commit is contained in:
parent
98cb50297b
commit
ca161a96d1
2 changed files with 67 additions and 1 deletions
|
|
@ -1050,6 +1050,13 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
} else if (instr->isVALU() && sel.size() == 2 && !instr->valu().opsel[idx] &&
|
||||
can_use_opsel(ctx.program->gfx_level, instr->opcode, idx)) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::s_pack_ll_b32_b16 && sel.size() == 2 &&
|
||||
(idx == 1 || ctx.program->gfx_level >= GFX11 || !sel.offset())) {
|
||||
return true;
|
||||
} else if (sel.size() == 2 &&
|
||||
((instr->opcode == aco_opcode::s_pack_lh_b32_b16 && idx == 0) ||
|
||||
(instr->opcode == aco_opcode::s_pack_hl_b32_b16 && idx == 1))) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
|
||||
|
|
@ -1124,6 +1131,13 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
!info.instr->operands[0].isOfType(RegType::vgpr))
|
||||
instr->format = asVOP3(instr->format);
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::s_pack_ll_b32_b16) {
|
||||
if (sel.offset())
|
||||
instr->opcode = idx ? aco_opcode::s_pack_lh_b32_b16 : aco_opcode::s_pack_hl_b32_b16;
|
||||
} else if (instr->opcode == aco_opcode::s_pack_lh_b32_b16 ||
|
||||
instr->opcode == aco_opcode::s_pack_hl_b32_b16) {
|
||||
if (sel.offset())
|
||||
instr->opcode = aco_opcode::s_pack_hh_b32_b16;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
|
||||
|
|
@ -3784,7 +3798,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (instr->definitions.empty() || is_dead(ctx.uses, instr.get()))
|
||||
return;
|
||||
|
||||
if (instr->isVALU()) {
|
||||
if (instr->isVALU() || instr->isSALU()) {
|
||||
/* Apply SDWA. Do this after label_instruction() so it can remove
|
||||
* label_extract if not all instructions can take SDWA. */
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
|
|
@ -3811,7 +3825,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->operands[i].setTemp(info.instr->operands[0].getTemp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->isVALU()) {
|
||||
if (can_apply_sgprs(ctx, instr))
|
||||
apply_sgprs(ctx, instr);
|
||||
combine_mad_mix(ctx, instr);
|
||||
|
|
|
|||
|
|
@ -1977,3 +1977,53 @@ BEGIN_TEST(optimize.vinterp_inreg_output_modifiers)
|
|||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.s_pack)
|
||||
//>> s1: %a, s1: %b, s1: %c = p_startpgm
|
||||
if (!setup_cs("s1 s1 s1", GFX11))
|
||||
return;
|
||||
|
||||
Temp lo = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[1],
|
||||
Operand::c32(0), Operand::c32(16u), Operand::c32(false));
|
||||
Temp hi = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[2],
|
||||
Operand::c32(1), Operand::c32(16u), Operand::c32(false));
|
||||
|
||||
//! s1: %res0 = s_pack_lh_b32_b16 %b, %c
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(0, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), lo, hi));
|
||||
|
||||
//! s1: %res1 = s_pack_ll_b32_b16 %b, %b
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), lo, lo));
|
||||
|
||||
//! s1: %res2 = s_pack_hl_b32_b16 %c, %b
|
||||
//! p_unit_test 2, %res2
|
||||
writeout(2, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), hi, lo));
|
||||
|
||||
//! s1: %res3 = s_pack_hh_b32_b16 %c, %c
|
||||
//! p_unit_test 3, %res3
|
||||
writeout(3, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), hi, hi));
|
||||
|
||||
lo = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[1], Operand::c32(0),
|
||||
Operand::c32(16u), Operand::c32(false));
|
||||
hi = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[2], Operand::c32(1),
|
||||
Operand::c32(16u), Operand::c32(false));
|
||||
|
||||
//! s1: %res4 = s_pack_ll_b32_b16 %a, %b
|
||||
//! p_unit_test 4, %res4
|
||||
writeout(4, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), inputs[0], lo));
|
||||
|
||||
//! s1: %res5 = s_pack_lh_b32_b16 %a, %c
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(5, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), inputs[0], hi));
|
||||
|
||||
//! s1: %res6 = s_pack_ll_b32_b16 %b, %a
|
||||
//! p_unit_test 6, %res6
|
||||
writeout(6, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), lo, inputs[0]));
|
||||
|
||||
//! s1: %res7 = s_pack_hl_b32_b16 %c, %a
|
||||
//! p_unit_test 7, %res7
|
||||
writeout(7, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), hi, inputs[0]));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue