mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 22:30:12 +01:00
aco: fix combining add/sub to b2i if a new dest needs to be allocated
The uses vector needs to be expanded to avoid out of bounds access and to make sure the number of uses is initialized to 0. This fixes combining more v_and(a, v_subbrev_co_u32). fossilds-db (Vega10): Totals from 4574 (3.28% of 139517) affected shaders: SGPRs: 291625 -> 292217 (+0.20%); split: -0.01%, +0.21% VGPRs: 276368 -> 276188 (-0.07%); split: -0.07%, +0.01% SpillSGPRs: 455 -> 533 (+17.14%) SpillVGPRs: 76 -> 78 (+2.63%) CodeSize: 23327500 -> 23304152 (-0.10%); split: -0.17%, +0.07% MaxWaves: 22044 -> 22066 (+0.10%) Instrs: 4583064 -> 4576301 (-0.15%); split: -0.15%, +0.01% Cycles: 47925276 -> 47871968 (-0.11%); split: -0.13%, +0.01% VMEM: 1599363 -> 1597473 (-0.12%); split: +0.08%, -0.19% SMEM: 331461 -> 331126 (-0.10%); split: +0.08%, -0.18% VClause: 80639 -> 80696 (+0.07%); split: -0.02%, +0.09% SClause: 155992 -> 155993 (+0.00%); split: -0.02%, +0.02% Copies: 333482 -> 333318 (-0.05%); split: -0.12%, +0.07% Branches: 70967 -> 70968 (+0.00%) PreSGPRs: 187078 -> 187711 (+0.34%); split: -0.01%, +0.35% PreVGPRs: 244918 -> 244785 (-0.05%) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7513>
This commit is contained in:
parent
7004548bdf
commit
ec347ee9bc
2 changed files with 16 additions and 3 deletions
|
|
@ -2260,9 +2260,16 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n
|
|||
}
|
||||
ctx.uses[instr->operands[i].tempId()]--;
|
||||
new_instr->definitions[0] = instr->definitions[0];
|
||||
new_instr->definitions[1] =
|
||||
instr->definitions.size() == 2 ? instr->definitions[1] :
|
||||
Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
|
||||
if (instr->definitions.size() == 2) {
|
||||
new_instr->definitions[1] = instr->definitions[1];
|
||||
} else {
|
||||
new_instr->definitions[1] =
|
||||
Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
|
||||
/* Make sure the uses vector is large enough and the number of
|
||||
* uses properly initialized to 0.
|
||||
*/
|
||||
ctx.uses.push_back(0);
|
||||
}
|
||||
new_instr->definitions[1].setHint(vcc);
|
||||
new_instr->operands[0] = Operand(0u);
|
||||
new_instr->operands[1] = instr->operands[!i];
|
||||
|
|
|
|||
|
|
@ -119,6 +119,12 @@ BEGIN_TEST(optimize.cndmask)
|
|||
Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev);
|
||||
writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev));
|
||||
|
||||
//! v1: %res4 = v_cndmask_b32 0, %a, %c
|
||||
//! p_unit_test 4, %res4
|
||||
Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), Operand(inputs[2]));
|
||||
Temp sub = bld.vsub32(bld.def(v1), Operand(0u), cndmask);
|
||||
writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub));
|
||||
|
||||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue