mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
intel/brw: Combine constants for src0 of integer multiply too
The majority of cases that would have been affected by this actually had both sources as integer constants. The earlier commit "intel/rt: Don't directly generate umul_32x16" allowed those to be constant folded. v2: Move the a*-1 block to be near the existing a*-1 block. No shader-db changes on any Intel platform. fossil-db results: All Intel platforms had similar results. (Ice Lake shown) Totals: Instrs: 165510246 -> 165510222 (-0.00%) Cycles: 15125198238 -> 15125195835 (-0.00%); split: -0.00%, +0.00% Totals from 46 (0.01% of 656118) affected shaders: Instrs: 36010 -> 35986 (-0.07%) Cycles: 2613658 -> 2611255 (-0.09%); split: -0.17%, +0.07% Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27552>
This commit is contained in:
parent
dd3bed1d92
commit
e7480f94c1
3 changed files with 33 additions and 5 deletions
|
|
@ -1378,6 +1378,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
|
||||
case BRW_OPCODE_ASR:
|
||||
case BRW_OPCODE_BFI1:
|
||||
case BRW_OPCODE_MUL:
|
||||
case BRW_OPCODE_ROL:
|
||||
case BRW_OPCODE_ROR:
|
||||
case BRW_OPCODE_SHL:
|
||||
|
|
|
|||
|
|
@ -1017,12 +1017,12 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
} else if (arg == 0 && inst->src[1].file != IMM) {
|
||||
/* Don't copy propagate the constant in situations like
|
||||
/* We used to not copy propagate the constant in situations like
|
||||
*
|
||||
* mov(8) g8<1>D 0x7fffffffD
|
||||
* mul(8) g16<1>D g8<8,8,1>D g15<16,8,2>W
|
||||
*
|
||||
* On platforms that only have a 32x16 multiplier, this will
|
||||
* On platforms that only have a 32x16 multiplier, this would
|
||||
* result in lowering the multiply to
|
||||
*
|
||||
* mul(8) g15<1>D g14<8,8,1>D 0xffffUW
|
||||
|
|
@ -1030,7 +1030,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* add(8) g15.1<2>UW g15.1<16,8,2>UW g16<16,8,2>UW
|
||||
*
|
||||
* On Gfx8 and Gfx9, which have the full 32x32 multiplier, it
|
||||
* results in
|
||||
* would results in
|
||||
*
|
||||
* mul(8) g16<1>D g15<16,8,2>W 0x7fffffffD
|
||||
*
|
||||
|
|
@ -1038,11 +1038,19 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
*
|
||||
* When multiplying a DW and any lower precision integer, the
|
||||
* DW operand must on src0.
|
||||
*
|
||||
* So it would have been invalid. However, brw_fs_combine_constants
|
||||
* will now "fix" the constant.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_MUL &&
|
||||
type_sz(inst->src[1].type) < 4 &&
|
||||
type_sz(val.type) == 4)
|
||||
(inst->src[0].type == BRW_REGISTER_TYPE_D ||
|
||||
inst->src[0].type == BRW_REGISTER_TYPE_UD)) {
|
||||
inst->src[0] = val;
|
||||
inst->src[0].type = BRW_REGISTER_TYPE_D;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Fit this constant in by commuting the operands.
|
||||
* Exception: we can't do this for 32-bit integer MUL/MACH
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
if (inst->src[1].file != IMM)
|
||||
if (inst->src[0].file != IMM && inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_reg_type_is_floating_point(inst->src[1].type))
|
||||
|
|
@ -177,6 +177,15 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
inst->writes_accumulator_implicitly(devinfo)))
|
||||
break;
|
||||
|
||||
if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_d(0);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* a * 1.0 = a */
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
|
|
@ -187,6 +196,16 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
}
|
||||
|
||||
/* a * -1.0 = -a */
|
||||
if (inst->src[0].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = inst->src[1];
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[1].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue