intel/brw: Combine constants for src0 of integer multiply too

The majority of cases that would have been affected by this actually
had both sources as integer constants. The earlier commit "intel/rt:
Don't directly generate umul_32x16" allowed those to be constant
folded.

v2: Move the a*-1 block to be near the existing a*-1 block.

No shader-db changes on any Intel platform.

fossil-db results:

All Intel platforms had similar results. (Ice Lake shown)
Totals:
Instrs: 165510246 -> 165510222 (-0.00%)
Cycles: 15125198238 -> 15125195835 (-0.00%); split: -0.00%, +0.00%

Totals from 46 (0.01% of 656118) affected shaders:
Instrs: 36010 -> 35986 (-0.07%)
Cycles: 2613658 -> 2611255 (-0.09%); split: -0.17%, +0.07%

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27552>
This commit is contained in:
Ian Romanick 2024-01-16 17:25:39 -08:00 committed by Marge Bot
parent dd3bed1d92
commit e7480f94c1
3 changed files with 33 additions and 5 deletions

View file

@ -1378,6 +1378,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
case BRW_OPCODE_ASR:
case BRW_OPCODE_BFI1:
case BRW_OPCODE_MUL:
case BRW_OPCODE_ROL:
case BRW_OPCODE_ROR:
case BRW_OPCODE_SHL:

View file

@ -1017,12 +1017,12 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
inst->src[arg] = val;
progress = true;
} else if (arg == 0 && inst->src[1].file != IMM) {
/* Don't copy propagate the constant in situations like
/* We used to not copy propagate the constant in situations like
*
* mov(8) g8<1>D 0x7fffffffD
* mul(8) g16<1>D g8<8,8,1>D g15<16,8,2>W
*
* On platforms that only have a 32x16 multiplier, this will
* On platforms that only have a 32x16 multiplier, this would
* result in lowering the multiply to
*
* mul(8) g15<1>D g14<8,8,1>D 0xffffUW
@ -1030,7 +1030,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
* add(8) g15.1<2>UW g15.1<16,8,2>UW g16<16,8,2>UW
*
* On Gfx8 and Gfx9, which have the full 32x32 multiplier, it
* results in
* would results in
*
* mul(8) g16<1>D g15<16,8,2>W 0x7fffffffD
*
@ -1038,11 +1038,19 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
*
* When multiplying a DW and any lower precision integer, the
* DW operand must on src0.
*
* So it would have been invalid. However, brw_fs_combine_constants
* will now "fix" the constant.
*/
if (inst->opcode == BRW_OPCODE_MUL &&
type_sz(inst->src[1].type) < 4 &&
type_sz(val.type) == 4)
(inst->src[0].type == BRW_REGISTER_TYPE_D ||
inst->src[0].type == BRW_REGISTER_TYPE_UD)) {
inst->src[0] = val;
inst->src[0].type = BRW_REGISTER_TYPE_D;
progress = true;
break;
}
/* Fit this constant in by commuting the operands.
* Exception: we can't do this for 32-bit integer MUL/MACH

View file

@ -148,7 +148,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
break;
case BRW_OPCODE_MUL:
if (inst->src[1].file != IMM)
if (inst->src[0].file != IMM && inst->src[1].file != IMM)
continue;
if (brw_reg_type_is_floating_point(inst->src[1].type))
@ -177,6 +177,15 @@ brw_fs_opt_algebraic(fs_visitor &s)
inst->writes_accumulator_implicitly(devinfo)))
break;
if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
inst->opcode = BRW_OPCODE_MOV;
inst->sources = 1;
inst->src[0] = brw_imm_d(0);
inst->src[1] = reg_undef;
progress = true;
break;
}
/* a * 1.0 = a */
if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_MOV;
@ -187,6 +196,16 @@ brw_fs_opt_algebraic(fs_visitor &s)
}
/* a * -1.0 = -a */
if (inst->src[0].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->sources = 1;
inst->src[0] = inst->src[1];
inst->src[0].negate = !inst->src[0].negate;
inst->src[1] = reg_undef;
progress = true;
break;
}
if (inst->src[1].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->sources = 1;