mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-26 07:50:20 +01:00
aco: don't combine precise max(min()) to med3
fossil-db (Navi):
Totals from 241 (0.18% of 137413) affected shaders:
CodeSize: 856280 -> 856308 (+0.00%); split: -0.00%, +0.00%
Instrs: 164220 -> 164514 (+0.18%); split: -0.00%, +0.18%
Cycles: 1031916 -> 1033092 (+0.11%); split: -0.00%, +0.11%
VMEM: 77855 -> 78514 (+0.85%); split: +0.85%, -0.01%
SMEM: 20501 -> 20593 (+0.45%); split: +0.46%, -0.01%
Copies: 9791 -> 9790 (-0.01%); split: -0.03%, +0.02%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
(cherry picked from commit d4c821da0e)
This commit is contained in:
parent
cdb5bcc059
commit
afe279ad86
3 changed files with 53 additions and 8 deletions
|
|
@ -238,7 +238,7 @@
|
|||
"description": "aco: don't combine precise max(min()) to med3",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"master_sha": null,
|
||||
"because_sha": null
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1958,7 +1958,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
|||
Instruction* op1_instr, bool swap, const char *shuffle_str,
|
||||
Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel,
|
||||
bool *op1_clamp, uint8_t *op1_omod,
|
||||
bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
|
||||
bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel,
|
||||
bool *precise)
|
||||
{
|
||||
/* checks */
|
||||
if (op1_instr->opcode != op1)
|
||||
|
|
@ -1999,6 +2000,9 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
|||
else if (op1_vop3 && op1_vop3->opsel & (1 << swap))
|
||||
return false;
|
||||
|
||||
*precise = op1_instr->definitions[0].isPrecise() ||
|
||||
op2_instr->definitions[0].isPrecise();
|
||||
|
||||
int shuffle[3];
|
||||
shuffle[shuffle_str[0] - '0'] = 0;
|
||||
shuffle[shuffle_str[1] - '0'] = 1;
|
||||
|
|
@ -2051,12 +2055,12 @@ bool combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode
|
|||
continue;
|
||||
|
||||
Operand operands[3];
|
||||
bool neg[3], abs[3], clamp;
|
||||
bool neg[3], abs[3], clamp, precise;
|
||||
uint8_t opsel = 0, omod = 0;
|
||||
if (match_op3_for_vop3(ctx, instr->opcode, op2,
|
||||
instr.get(), swap, shuffle,
|
||||
operands, neg, abs, &opsel,
|
||||
&clamp, &omod, NULL, NULL, NULL)) {
|
||||
&clamp, &omod, NULL, NULL, NULL, &precise)) {
|
||||
ctx.uses[instr->operands[swap].tempId()]--;
|
||||
create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
|
||||
return true;
|
||||
|
|
@ -2074,13 +2078,13 @@ bool combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposi
|
|||
* max(-min(a, b), c) -> max3(-a, -b, c) */
|
||||
for (unsigned swap = 0; swap < 2; swap++) {
|
||||
Operand operands[3];
|
||||
bool neg[3], abs[3], clamp;
|
||||
bool neg[3], abs[3], clamp, precise;
|
||||
uint8_t opsel = 0, omod = 0;
|
||||
bool inbetween_neg;
|
||||
if (match_op3_for_vop3(ctx, instr->opcode, opposite,
|
||||
instr.get(), swap, "012",
|
||||
operands, neg, abs, &opsel,
|
||||
&clamp, &omod, &inbetween_neg, NULL, NULL) &&
|
||||
&clamp, &omod, &inbetween_neg, NULL, NULL, &precise) &&
|
||||
inbetween_neg) {
|
||||
ctx.uses[instr->operands[swap].tempId()]--;
|
||||
neg[1] = true;
|
||||
|
|
@ -2320,11 +2324,17 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr,
|
|||
|
||||
for (unsigned swap = 0; swap < 2; swap++) {
|
||||
Operand operands[3];
|
||||
bool neg[3], abs[3], clamp;
|
||||
bool neg[3], abs[3], clamp, precise;
|
||||
uint8_t opsel = 0, omod = 0;
|
||||
if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
|
||||
"012", operands, neg, abs, &opsel,
|
||||
&clamp, &omod, NULL, NULL, NULL)) {
|
||||
&clamp, &omod, NULL, NULL, NULL, &precise)) {
|
||||
/* max(min(src, upper), lower) returns upper if src is NaN, but
|
||||
* med3(src, lower, upper) returns lower.
|
||||
*/
|
||||
if (precise && instr->opcode != min)
|
||||
continue;
|
||||
|
||||
int const0_idx = -1, const1_idx = -1;
|
||||
uint32_t const0 = 0, const1 = 0;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
|
|
|||
|
|
@ -122,3 +122,38 @@ BEGIN_TEST(optimize.cndmask)
|
|||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.clamp)
|
||||
//>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1", GFX9))
|
||||
return;
|
||||
|
||||
//! v1: %res0 = v_med3_f32 4.0, 0, %a
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(0, bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u),
|
||||
bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0])));
|
||||
|
||||
//! v1: %res1 = v_med3_f32 0, 4.0, %a
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u),
|
||||
bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0])));
|
||||
|
||||
/* correct NaN behaviour with precise */
|
||||
|
||||
//! v1: %res2 = v_med3_f32 4.0, 0, %a
|
||||
//! p_unit_test 2, %res2
|
||||
Builder::Result max = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0]);
|
||||
max.def(0).setPrecise(true);
|
||||
Builder::Result min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), max);
|
||||
max.def(0).setPrecise(true);
|
||||
writeout(2, min);
|
||||
|
||||
//! v1: (precise)%res3_tmp = v_min_f32 4.0, %a
|
||||
//! v1: %res3 = v_max_f32 0, %res3_tmp
|
||||
//! p_unit_test 3, %res3
|
||||
min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0]);
|
||||
min.def(0).setPrecise(true);
|
||||
writeout(3, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), min));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue