mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
aco: create v_mad_u32_u24
fossil-db (Navi): Totals from 849 (0.61% of 138791) affected shaders: SGPRs: 38528 -> 38544 (+0.04%) VGPRs: 39860 -> 39856 (-0.01%) CodeSize: 2701880 -> 2702016 (+0.01%) MaxWaves: 9148 -> 9150 (+0.02%) Instrs: 509864 -> 509821 (-0.01%); split: -0.01%, +0.00% Cycles: 3400124 -> 3399628 (-0.01%); split: -0.02%, +0.00% VMEM: 262757 -> 262672 (-0.03%) SMEM: 59710 -> 59704 (-0.01%) Copies: 44461 -> 44466 (+0.01%) fossil-db (Polaris): Totals from 1487 (1.06% of 140385) affected shaders: SGPRs: 54688 -> 55840 (+2.11%) CodeSize: 2725608 -> 2725720 (+0.00%); split: -0.01%, +0.01% Instrs: 521394 -> 517710 (-0.71%) Cycles: 18474108 -> 18410964 (-0.34%) VMEM: 436992 -> 431028 (-1.36%); split: +0.06%, -1.43% SMEM: 124503 -> 122564 (-1.56%); split: +0.45%, -2.00% VClause: 21972 -> 22015 (+0.20%); split: -0.12%, +0.31% SClause: 14274 -> 14287 (+0.09%) Copies: 44407 -> 44411 (+0.01%); split: -0.02%, +0.03% PreSGPRs: 34318 -> 34321 (+0.01%); split: -0.00%, +0.01% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7639>
This commit is contained in:
parent
1200f6da0b
commit
631e18d427
2 changed files with 28 additions and 7 deletions
|
|
@ -1316,6 +1316,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
|
||||
}
|
||||
break;
|
||||
case aco_opcode::v_mul_u32_u24:
|
||||
ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
|
||||
break;
|
||||
case aco_opcode::v_and_b32: { /* abs */
|
||||
if (!instr->usesModifiers() && instr->operands[1].isTemp() &&
|
||||
instr->operands[1].getTemp().type() == RegType::vgpr &&
|
||||
|
|
@ -2324,12 +2327,6 @@ bool combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (instr->usesModifiers())
|
||||
return false;
|
||||
|
||||
/* Do not combine if the carry-out is used. */
|
||||
if ((instr->opcode == aco_opcode::v_add_co_u32 ||
|
||||
instr->opcode == aco_opcode::v_add_co_u32_e64) &&
|
||||
ctx.uses[instr->definitions[1].tempId()])
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
Instruction *op_instr = follow_operand(ctx, instr->operands[i]);
|
||||
if (op_instr &&
|
||||
|
|
@ -2912,6 +2909,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
} else if (instr->opcode == aco_opcode::v_add_u32) {
|
||||
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
|
||||
else if (combine_add_bcnt(ctx, instr)) ;
|
||||
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24, aco_opcode::v_mad_u32_u24, "120", 1 | 2)) ;
|
||||
else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
|
||||
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
|
||||
|
|
@ -2924,8 +2922,10 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
}
|
||||
} else if (instr->opcode == aco_opcode::v_add_co_u32 ||
|
||||
instr->opcode == aco_opcode::v_add_co_u32_e64) {
|
||||
bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0;
|
||||
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
|
||||
else combine_add_bcnt(ctx, instr);
|
||||
else if (!carry_out && combine_add_bcnt(ctx, instr)) ;
|
||||
else if (!carry_out) combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24, aco_opcode::v_mad_u32_u24, "120", 1 | 2);
|
||||
} else if (instr->opcode == aco_opcode::v_sub_u32 ||
|
||||
instr->opcode == aco_opcode::v_sub_co_u32 ||
|
||||
instr->opcode == aco_opcode::v_sub_co_u32_e64) {
|
||||
|
|
|
|||
|
|
@ -723,3 +723,24 @@ BEGIN_TEST(optimize.minmax)
|
|||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.mad_32_24)
|
||||
for (unsigned i = GFX8; i <= GFX9; i++) {
|
||||
//>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1", (chip_class)i))
|
||||
continue;
|
||||
|
||||
//! v1: %res0 = v_mad_u32_u24 %b, %c, %a
|
||||
//! p_unit_test 0, %res0
|
||||
Temp mul = bld.vop2(aco_opcode::v_mul_u32_u24, bld.def(v1), inputs[1], inputs[2]);
|
||||
writeout(0, bld.vadd32(bld.def(v1), inputs[0], mul));
|
||||
|
||||
//! v1: %res1_tmp = v_mul_u32_u24 %b, %c
|
||||
//! v1: %_, s2: %res1 = v_add_co_u32 %a, %res1_tmp
|
||||
//! p_unit_test 1, %res1
|
||||
mul = bld.vop2(aco_opcode::v_mul_u32_u24, bld.def(v1), inputs[1], inputs[2]);
|
||||
writeout(1, bld.vadd32(bld.def(v1), inputs[0], mul, true).def(1).getTemp());
|
||||
|
||||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue