aco: Implement integer conversions using p_extract.

Fossil DB stats on Sienna Cichlid:

Totals from 563 (0.44% of 128647) affected shaders:
SpillSGPRs: 1381 -> 1382 (+0.07%)
SpillVGPRs: 1606 -> 1552 (-3.36%)
CodeSize: 2474724 -> 2446612 (-1.14%); split: -1.15%, +0.02%
Scratch: 181248 -> 180224 (-0.56%)
Instrs: 440973 -> 435091 (-1.33%); split: -1.35%, +0.01%
Latency: 9123609 -> 8517830 (-6.64%); split: -6.66%, +0.02%
InvThroughput: 3685256 -> 3383293 (-8.19%); split: -8.22%, +0.02%
VClause: 8425 -> 8372 (-0.63%)
Copies: 66553 -> 66681 (+0.19%); split: -0.49%, +0.68%
Branches: 13824 -> 13825 (+0.01%); split: -0.01%, +0.01%
PreSGPRs: 21816 -> 21824 (+0.04%)

Fossil DB stats on Sienna Cichlid with NGGC on:

Totals from 58802 (45.71% of 128647) affected shaders:
SpillSGPRs: 6541 -> 6542 (+0.02%)
SpillVGPRs: 1606 -> 1552 (-3.36%)
CodeSize: 162976608 -> 162244340 (-0.45%); split: -0.45%, +0.00%
Scratch: 181248 -> 180224 (-0.56%)
Instrs: 31163521 -> 31098078 (-0.21%); split: -0.21%, +0.00%
Latency: 146893569 -> 144920070 (-1.34%); split: -1.34%, +0.00%
InvThroughput: 25384324 -> 25035940 (-1.37%); split: -1.38%, +0.00%
VClause: 552310 -> 552257 (-0.01%)
Copies: 3356856 -> 3356984 (+0.00%); split: -0.01%, +0.01%
Branches: 1237314 -> 1237315 (+0.00%); split: -0.00%, +0.00%
PreSGPRs: 2185339 -> 2185347 (+0.00%)

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11560>
This commit is contained in:
Timur Kristóf 2021-06-23 18:28:18 +02:00 committed by Marge Bot
parent f14023666c
commit 9478901824

View file

@ -635,24 +635,10 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
assert(src_bits < 32);
bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(),
Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
} else if (ctx->options->chip_class >= GFX8) {
assert(src_bits < 32);
assert(src_bits != 8 || src.regClass() == v1b);
assert(src_bits != 16 || src.regClass() == v2b);
assert(dst_bits >= 16);
aco_ptr<SDWA_instruction> sdwa{
create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(src);
sdwa->definitions[0] = Definition(tmp);
sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else {
assert(src_bits < 32);
assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == GFX7);
aco_opcode opcode = sign_extend ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32;
bld.vop3(opcode, Definition(tmp), src, Operand::zero(),
Operand::c32(src_bits == 8 ? 8u : 16u));
bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits),
Operand::c32((unsigned)sign_extend));
}
if (dst_bits == 64) {