aco: fix combining add/sub to b2i if a new dest needs to be allocated

The uses vector needs to be expanded to avoid out of bounds access
and to make sure the number of uses is initialized to 0.

This fixes combining more v_and(a, v_subbrev_co_u32).

fossilds-db (Vega10):
Totals from 4574 (3.28% of 139517) affected shaders:
SGPRs: 291625 -> 292217 (+0.20%); split: -0.01%, +0.21%
VGPRs: 276368 -> 276188 (-0.07%); split: -0.07%, +0.01%
SpillSGPRs: 455 -> 533 (+17.14%)
SpillVGPRs: 76 -> 78 (+2.63%)
CodeSize: 23327500 -> 23304152 (-0.10%); split: -0.17%, +0.07%
MaxWaves: 22044 -> 22066 (+0.10%)
Instrs: 4583064 -> 4576301 (-0.15%); split: -0.15%, +0.01%
Cycles: 47925276 -> 47871968 (-0.11%); split: -0.13%, +0.01%
VMEM: 1599363 -> 1597473 (-0.12%); split: +0.08%, -0.19%
SMEM: 331461 -> 331126 (-0.10%); split: +0.08%, -0.18%
VClause: 80639 -> 80696 (+0.07%); split: -0.02%, +0.09%
SClause: 155992 -> 155993 (+0.00%); split: -0.02%, +0.02%
Copies: 333482 -> 333318 (-0.05%); split: -0.12%, +0.07%
Branches: 70967 -> 70968 (+0.00%)
PreSGPRs: 187078 -> 187711 (+0.34%); split: -0.01%, +0.35%
PreVGPRs: 244918 -> 244785 (-0.05%)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7513>
(cherry picked from commit ec347ee9bc)
This commit is contained in:
Samuel Pitoiset 2020-11-09 19:42:22 +01:00 committed by Dylan Baker
parent 8464a6e3d7
commit 162a0678ac
3 changed files with 17 additions and 4 deletions

View file

@ -4756,7 +4756,7 @@
"description": "aco: fix combining add/sub to b2i if a new dest needs to be allocated",
"nominated": false,
"nomination_type": null,
"resolution": 4,
"resolution": 1,
"master_sha": null,
"because_sha": null
},

View file

@ -2278,9 +2278,16 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n
}
ctx.uses[instr->operands[i].tempId()]--;
new_instr->definitions[0] = instr->definitions[0];
new_instr->definitions[1] =
instr->definitions.size() == 2 ? instr->definitions[1] :
Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
if (instr->definitions.size() == 2) {
new_instr->definitions[1] = instr->definitions[1];
} else {
new_instr->definitions[1] =
Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
/* Make sure the uses vector is large enough and the number of
* uses properly initialized to 0.
*/
ctx.uses.push_back(0);
}
new_instr->definitions[1].setHint(vcc);
new_instr->operands[0] = Operand(0u);
new_instr->operands[1] = instr->operands[!i];

View file

@ -119,6 +119,12 @@ BEGIN_TEST(optimize.cndmask)
Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev);
writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev));
//! v1: %res4 = v_cndmask_b32 0, %a, %c
//! p_unit_test 4, %res4
Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), Operand(inputs[2]));
Temp sub = bld.vsub32(bld.def(v1), Operand(0u), cndmask);
writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub));
finish_opt_test();
}
END_TEST