mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 22:30:12 +01:00
aco/optimizer: Optimize p_extract + v_mul_u32_u24 to v_mad_u32_u16.
This should perform the same but removes SDWA from the address calculations in NGG culling shaders for example. This is done because SDWA is no longer available on GFX11. Fossil DB stats on GFX1100: Totals from 36 (0.03% of 134913) affected shaders: CodeSize: 300968 -> 300884 (-0.03%); split: -0.04%, +0.01% Instrs: 60955 -> 60863 (-0.15%); split: -0.15%, +0.00% Latency: 426809 -> 426819 (+0.00%); split: -0.06%, +0.06% InvThroughput: 39076 -> 39025 (-0.13%); split: -0.14%, +0.01% VClause: 1440 -> 1443 (+0.21%) Copies: 5714 -> 5725 (+0.19%) Fossil DB stats on GFX1100 with NGG culling enabled: Totals from 60953 (45.18% of 134913) affected shaders: VGPRs: 2273172 -> 2273160 (-0.00%) CodeSize: 186401864 -> 186403036 (+0.00%); split: -0.00%, +0.00% Instrs: 37038048 -> 36977353 (-0.16%); split: -0.16%, +0.00% Latency: 146466770 -> 146350172 (-0.08%); split: -0.08%, +0.00% InvThroughput: 15342790 -> 15228585 (-0.74%); split: -0.74%, +0.00% VClause: 669662 -> 669665 (+0.00%) Copies: 2972380 -> 2972482 (+0.00%); split: -0.01%, +0.01% Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17924>
This commit is contained in:
parent
171d76ded1
commit
faba30a8f3
1 changed files with 17 additions and 0 deletions
|
|
@ -1106,6 +1106,11 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) ||
|
||||
(sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::v_mul_u32_u24 && ctx.program->gfx_level >= GFX10 &&
|
||||
!instr->usesModifiers() && sel.size() == 2 && !sel.sign_extend() &&
|
||||
(instr->operands[!idx].is16bit() ||
|
||||
instr->operands[!idx].constantValue() <= UINT16_MAX)) {
|
||||
return true;
|
||||
} else if (idx < 2 && can_use_SDWA(ctx.program->gfx_level, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
|
||||
if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
|
||||
|
|
@ -1162,6 +1167,18 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
(sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
|
||||
/* The undesireable upper bits are already shifted out. */
|
||||
return;
|
||||
} else if (instr->opcode == aco_opcode::v_mul_u32_u24 && ctx.program->gfx_level >= GFX10 &&
|
||||
!instr->usesModifiers() && sel.size() == 2 && !sel.sign_extend() &&
|
||||
(instr->operands[!idx].is16bit() ||
|
||||
instr->operands[!idx].constantValue() <= UINT16_MAX)) {
|
||||
Instruction* mad =
|
||||
create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u16, Format::VOP3, 3, 1);
|
||||
mad->definitions[0] = instr->definitions[0];
|
||||
mad->operands[0] = instr->operands[0];
|
||||
mad->operands[1] = instr->operands[1];
|
||||
mad->operands[2] = Operand::zero();
|
||||
mad->vop3().opsel = (sel.offset() / 2) << idx;
|
||||
instr.reset(mad);
|
||||
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
|
||||
to_SDWA(ctx, instr);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue