2013-12-12 00:30:16 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2013 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "brw_fs.h"
|
|
|
|
|
#include "brw_fs_live_variables.h"
|
|
|
|
|
#include "brw_cfg.h"
|
|
|
|
|
|
2016-03-13 16:25:57 -07:00
|
|
|
using namespace brw;
|
|
|
|
|
|
2024-07-13 00:19:44 -07:00
|
|
|
/** @file
|
2014-12-16 11:30:12 -08:00
|
|
|
*
|
|
|
|
|
* Implements a pass that propagates the SAT modifier from a MOV.SAT into the
|
|
|
|
|
* instruction that produced the source of the MOV.SAT, thereby allowing the
|
|
|
|
|
* MOV's src and dst to be coalesced and the MOV removed.
|
|
|
|
|
*
|
|
|
|
|
* For instance,
|
|
|
|
|
*
|
|
|
|
|
* ADD tmp, src0, src1
|
|
|
|
|
* MOV.SAT dst, tmp
|
|
|
|
|
*
|
|
|
|
|
* would be transformed into
|
|
|
|
|
*
|
|
|
|
|
* ADD.SAT tmp, src0, src1
|
|
|
|
|
* MOV dst, tmp
|
2013-12-12 00:30:16 -08:00
|
|
|
*/
|
|
|
|
|
|
2024-06-25 09:23:35 -07:00
|
|
|
static bool
|
|
|
|
|
propagate_sat(fs_inst *inst, fs_inst *scan_inst)
|
|
|
|
|
{
|
|
|
|
|
if (scan_inst->dst.type != inst->dst.type) {
|
|
|
|
|
scan_inst->dst.type = inst->dst.type;
|
|
|
|
|
for (int i = 0; i < scan_inst->sources; i++) {
|
|
|
|
|
scan_inst->src[i].type = inst->dst.type;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->src[0].negate) {
|
|
|
|
|
if (scan_inst->opcode == BRW_OPCODE_MUL) {
|
|
|
|
|
scan_inst->src[0].negate = !scan_inst->src[0].negate;
|
|
|
|
|
inst->src[0].negate = false;
|
|
|
|
|
} else if (scan_inst->opcode == BRW_OPCODE_MAD) {
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
if (scan_inst->src[i].file == IMM) {
|
|
|
|
|
brw_reg_negate_immediate(&scan_inst->src[i]);
|
|
|
|
|
} else {
|
|
|
|
|
scan_inst->src[i].negate = !scan_inst->src[i].negate;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
inst->src[0].negate = false;
|
|
|
|
|
} else if (scan_inst->opcode == BRW_OPCODE_ADD) {
|
|
|
|
|
if (scan_inst->src[1].file == IMM) {
|
|
|
|
|
if (!brw_reg_negate_immediate(&scan_inst->src[1])) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
scan_inst->src[1].negate = !scan_inst->src[1].negate;
|
|
|
|
|
}
|
|
|
|
|
scan_inst->src[0].negate = !scan_inst->src[0].negate;
|
|
|
|
|
inst->src[0].negate = false;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
scan_inst->saturate = true;
|
|
|
|
|
inst->saturate = false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-12 00:30:16 -08:00
|
|
|
static bool
|
2024-06-18 13:38:19 -07:00
|
|
|
opt_saturate_propagation_local(fs_visitor &s, bblock_t *block)
|
2013-12-12 00:30:16 -08:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
2014-09-08 12:05:25 -07:00
|
|
|
int ip = block->end_ip + 1;
|
2013-12-12 00:30:16 -08:00
|
|
|
|
2014-09-08 12:05:25 -07:00
|
|
|
foreach_inst_in_block_reverse(fs_inst, inst, block) {
|
|
|
|
|
ip--;
|
2013-12-12 00:30:16 -08:00
|
|
|
|
|
|
|
|
if (inst->opcode != BRW_OPCODE_MOV ||
|
i965/fs: Consider type mismatches in saturate propagation.
NIR considers bcsel to produce and consume unsigned types, leading to
SEL instructions operating on unsigned types when the data is really
floating-point. Previous to this patch, saturate propagation would
happily transform
(+f0) sel g20:UD, g30:UD, g40:UD
mov.sat g50:F, g20:F
into
(+f0) sel.sat g20:UD, g30:UD, g40:UD
mov g50:F, g20:F
But since the meaning of .sat is dependent on the type of the
destination register, this is not valid.
Instead, allow saturate propagation to change the types of dest/source
on instructions that are simply copying data in order to propagate the
saturate modifier.
Fixes bad code gen in 158 programs.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-10-14 02:23:25 -07:00
|
|
|
!inst->saturate ||
|
2015-10-26 17:09:25 -07:00
|
|
|
inst->dst.file != VGRF ||
|
i965/fs: Consider type mismatches in saturate propagation.
NIR considers bcsel to produce and consume unsigned types, leading to
SEL instructions operating on unsigned types when the data is really
floating-point. Previous to this patch, saturate propagation would
happily transform
(+f0) sel g20:UD, g30:UD, g40:UD
mov.sat g50:F, g20:F
into
(+f0) sel.sat g20:UD, g30:UD, g40:UD
mov g50:F, g20:F
But since the meaning of .sat is dependent on the type of the
destination register, this is not valid.
Instead, allow saturate propagation to change the types of dest/source
on instructions that are simply copying data in order to propagate the
saturate modifier.
Fixes bad code gen in 158 programs.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-10-14 02:23:25 -07:00
|
|
|
inst->dst.type != inst->src[0].type ||
|
2015-10-26 17:09:25 -07:00
|
|
|
inst->src[0].file != VGRF ||
|
2015-01-27 22:46:22 -08:00
|
|
|
inst->src[0].abs)
|
2013-12-12 00:30:16 -08:00
|
|
|
continue;
|
|
|
|
|
|
intel/brw: Use def analysis for simple cases of saturate propagation
I had hoped this would improve compilation performance too. I tried
several different long running fossils, and there was no difference.
Fossil-db results are all over the place from platform to platform.
All of the Tiger Lake shaders hurt for spills and fills are fragment
shaders in rdr2.
shader-db:
All Intel platforms had similar results. (Meteor Lake shown)
total instructions in shared programs: 19734088 -> 19733645 (<.01%)
instructions in affected programs: 71200 -> 70757 (-0.62%)
helped: 186
HURT: 0
helped stats (abs) min: 1 max: 7 x̄: 2.38 x̃: 1
helped stats (rel) min: 0.06% max: 2.79% x̄: 0.83% x̃: 0.48%
95% mean confidence interval for instructions value: -2.69 -2.07
95% mean confidence interval for instructions %-change: -0.93% -0.72%
Instructions are helped.
total cycles in shared programs: 916290473 -> 916180971 (-0.01%)
cycles in affected programs: 3403719 -> 3294217 (-3.22%)
helped: 89
HURT: 88
helped stats (abs) min: 1 max: 36685 x̄: 1424.13 x̃: 10
helped stats (rel) min: <.01% max: 26.75% x̄: 1.66% x̃: 0.46%
HURT stats (abs) min: 1 max: 8750 x̄: 195.98 x̃: 7
HURT stats (rel) min: <.01% max: 17.12% x̄: 1.57% x̃: 0.19%
95% mean confidence interval for cycles value: -1199.88 -37.43
95% mean confidence interval for cycles %-change: -0.66% 0.56%
Inconclusive result (%-change mean confidence interval includes 0).
fossil-db:
Meteor Lake and DG2 had similar results. (Meteor Lake shown)
Totals:
Instrs: 151458346 -> 151457413 (-0.00%)
Cycle count: 17202426472 -> 17202406469 (-0.00%); split: -0.00%, +0.00%
Max live registers: 31989626 -> 31989959 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5500560 -> 5500384 (-0.00%)
Totals from 479 (0.08% of 628970) affected shaders:
Instrs: 398836 -> 397903 (-0.23%)
Cycle count: 18064565 -> 18044562 (-0.11%); split: -0.40%, +0.29%
Max live registers: 36663 -> 36996 (+0.91%); split: -0.02%, +0.92%
Max dispatch width: 4392 -> 4216 (-4.01%)
Tiger Lake
Totals:
Instrs: 149913036 -> 149912182 (-0.00%); split: -0.00%, +0.00%
Cycle count: 15560086488 -> 15560135139 (+0.00%); split: -0.00%, +0.00%
Spill count: 61241 -> 61251 (+0.02%)
Fill count: 107304 -> 107314 (+0.01%)
Max live registers: 31964752 -> 31965119 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5517568 -> 5517248 (-0.01%)
Totals from 486 (0.08% of 628673) affected shaders:
Instrs: 396065 -> 395211 (-0.22%); split: -0.23%, +0.01%
Cycle count: 17677691 -> 17726342 (+0.28%); split: -0.23%, +0.51%
Spill count: 1302 -> 1312 (+0.77%)
Fill count: 3746 -> 3756 (+0.27%)
Max live registers: 37538 -> 37905 (+0.98%); split: -0.02%, +0.99%
Max dispatch width: 4576 -> 4256 (-6.99%)
Ice Lake
Totals:
Instrs: 151348422 -> 151347463 (-0.00%)
Cycle count: 15155678386 -> 15155691726 (+0.00%); split: -0.00%, +0.00%
Fill count: 108114 -> 108111 (-0.00%)
Max live registers: 32444479 -> 32444814 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5611288 -> 5611256 (-0.00%)
Totals from 483 (0.08% of 634352) affected shaders:
Instrs: 393333 -> 392374 (-0.24%)
Cycle count: 16706439 -> 16719779 (+0.08%); split: -0.14%, +0.22%
Fill count: 3654 -> 3651 (-0.08%)
Max live registers: 37246 -> 37581 (+0.90%); split: -0.02%, +0.92%
Max dispatch width: 4312 -> 4280 (-0.74%)
Skylake
Totals:
Instrs: 140741190 -> 140734481 (-0.00%); split: -0.00%, +0.00%
Cycle count: 14659096516 -> 14659116346 (+0.00%); split: -0.00%, +0.00%
Max live registers: 31757558 -> 31757725 (+0.00%)
Max dispatch width: 5470040 -> 5469920 (-0.00%)
Totals from 3542 (0.57% of 624449) affected shaders:
Instrs: 3081309 -> 3074600 (-0.22%); split: -0.22%, +0.00%
Cycle count: 228843073 -> 228862903 (+0.01%); split: -0.11%, +0.12%
Max live registers: 304531 -> 304698 (+0.05%)
Max dispatch width: 31016 -> 30896 (-0.39%)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29774>
2024-06-25 09:28:45 -07:00
|
|
|
const brw::def_analysis &defs = s.def_analysis.require();
|
|
|
|
|
fs_inst *def = defs.get(inst->src[0]);
|
|
|
|
|
|
|
|
|
|
if (def != NULL) {
|
|
|
|
|
if (def->exec_size != inst->exec_size)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (def->dst.type != inst->dst.type && !def->can_change_types())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (def->flags_written(s.devinfo) != 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (def->saturate) {
|
|
|
|
|
inst->saturate = false;
|
|
|
|
|
progress = true;
|
|
|
|
|
continue;
|
|
|
|
|
} else if (defs.get_use_count(def->dst) == 1 &&
|
|
|
|
|
def->can_do_saturate() &&
|
|
|
|
|
propagate_sat(inst, def)) {
|
|
|
|
|
progress = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If the def is in a different block the liveness based pass will
|
|
|
|
|
* not be able to make progress, so skip it.
|
|
|
|
|
*/
|
|
|
|
|
if (block != defs.get_block(inst->src[0]))
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-18 13:38:19 -07:00
|
|
|
const fs_live_variables &live = s.live_analysis.require();
|
2016-03-13 16:25:57 -07:00
|
|
|
int src_var = live.var_from_reg(inst->src[0]);
|
|
|
|
|
int src_end_ip = live.end[src_var];
|
2013-12-12 00:30:16 -08:00
|
|
|
|
|
|
|
|
bool interfered = false;
|
2015-10-20 11:16:00 +02:00
|
|
|
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
2018-10-19 15:33:50 -07:00
|
|
|
if (scan_inst->exec_size == inst->exec_size &&
|
|
|
|
|
regions_overlap(scan_inst->dst, scan_inst->size_written,
|
2024-06-19 10:50:51 -07:00
|
|
|
inst->src[0], inst->size_read(s.devinfo, 0))) {
|
2019-04-24 12:38:28 +02:00
|
|
|
if (scan_inst->is_partial_write() ||
|
i965/fs: Consider type mismatches in saturate propagation.
NIR considers bcsel to produce and consume unsigned types, leading to
SEL instructions operating on unsigned types when the data is really
floating-point. Previous to this patch, saturate propagation would
happily transform
(+f0) sel g20:UD, g30:UD, g40:UD
mov.sat g50:F, g20:F
into
(+f0) sel.sat g20:UD, g30:UD, g40:UD
mov g50:F, g20:F
But since the meaning of .sat is dependent on the type of the
destination register, this is not valid.
Instead, allow saturate propagation to change the types of dest/source
on instructions that are simply copying data in order to propagate the
saturate modifier.
Fixes bad code gen in 158 programs.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-10-14 02:23:25 -07:00
|
|
|
(scan_inst->dst.type != inst->dst.type &&
|
|
|
|
|
!scan_inst->can_change_types()))
|
2015-01-27 22:43:28 -08:00
|
|
|
break;
|
|
|
|
|
|
intel/brw: Don't propagate saturate to an instruction that writes flags
There are two problems.
1. This is not NaN safe. 'add.le.sat dst F, Inf F, -Inf F' has a
different result than 'add dst F, Inf F, -Inf F; cmp.le null, dst F, 0F'.
2. Ignoring the first problem, this only produces the desired flags
for LE and G. All other cases can produce the wrong result.
For example, batman_arkham_city_goty.foz 6a63c4caacaa0dae has the
following code:
mad.ge.f0.0(8) g51<1>F g50<8,8,1>F g46<8,8,1>F g11<1,1,1>F
mov.sat(8) g52<1>F g51<1,1,0>F
...
(+f0.0) sel(8) g54<1>UD g53<8,8,1>UD 0x3f000000UD
Without this commit, the saturate is incorrectly propagated to the MAD.
A similar case exists in witcher_3_dxvk_g2.foz 5b03243be667a275.
There are even worse cases like total_war_warhammer3.dx12vk-g6.foz
78328466761ef7ab and ee920491573860fc. The former has the following
code (and the latter has very similar code):
mad.l.f0.0(16) g95<1>F g93<8,8,1>F g62<8,8,1>F g68<1,1,1>F
...
mov.sat(16) g109<1>F -g95<1,1,0>F
...
(+f0.0) sel(16) g68<1>UD g111<1,1,0>UD g54<1,1,0>UD
(+f0.0) sel(16) g70<1>UD g113<1,1,0>UD g56<1,1,0>UD
(+f0.0) sel(16) g72<1>UD g115<1,1,0>UD g58<1,1,0>UD
Saturate propagation makes a hash of this code:
mad.sat.l.f0.0(16) g106<1>F -g93<8,8,1>F -g62<8,8,1>F g68<1,1,1>F
...
(+f0.0) sel(16) g70<1>UD g110<1,1,0>UD g56<1,1,0>UD
(+f0.0) sel(16) g72<1>UD g112<1,1,0>UD g58<1,1,0>UD
(+f0.0) sel(16) g68<1>UD g108<1,1,0>UD g54<1,1,0>UD
Not only is the saturate incorrectly applied to the MAD, but the MAD
result is negated without changing the conditional modifier to G!
NOTE: Backports of this commit to stable branches may need to be more
like the following commit to elk.
shader-db:
All Intel platforms had similar results. (Meteor Lake shown)
total instructions in shared programs: 19729375 -> 19729377 (<.01%)
instructions in affected programs: 112 -> 114 (1.79%)
helped: 0
HURT: 2
total cycles in shared programs: 916234266 -> 916234288 (<.01%)
cycles in affected programs: 636 -> 658 (3.46%)
helped: 0
HURT: 2
fossil-db:
All Intel platforms had similar results. (Meteor Lake shown)
Totals:
Instrs: 151531594 -> 151531601 (+0.00%)
Cycle count: 17209107419 -> 17209107474 (+0.00%); split: -0.00%, +0.00%
Totals from 6 (0.00% of 630198) affected shaders:
Instrs: 4550 -> 4557 (+0.15%)
Cycle count: 194629 -> 194684 (+0.03%); split: -0.00%, +0.03%
Fixes: 947c828d5cb ("i965/fs: Add a saturation propagation optimization pass.")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29774>
2024-06-25 13:14:58 -07:00
|
|
|
if (scan_inst->flags_written(s.devinfo) != 0)
|
|
|
|
|
break;
|
|
|
|
|
|
2014-06-28 18:00:27 -07:00
|
|
|
if (scan_inst->saturate) {
|
2013-12-12 00:30:16 -08:00
|
|
|
inst->saturate = false;
|
|
|
|
|
progress = true;
|
2016-03-06 19:03:56 -08:00
|
|
|
} else if (src_end_ip == ip || inst->dst.equals(inst->src[0])) {
|
2024-06-25 09:23:35 -07:00
|
|
|
if (scan_inst->can_do_saturate() &&
|
|
|
|
|
propagate_sat(inst, scan_inst)) {
|
2014-06-28 18:00:27 -07:00
|
|
|
progress = true;
|
|
|
|
|
}
|
2013-12-12 00:30:16 -08:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-03-17 10:39:43 -07:00
|
|
|
for (int i = 0; i < scan_inst->sources; i++) {
|
2015-10-26 17:09:25 -07:00
|
|
|
if (scan_inst->src[i].file == VGRF &&
|
2015-10-26 04:35:14 -07:00
|
|
|
scan_inst->src[i].nr == inst->src[0].nr &&
|
2022-12-07 19:27:45 +02:00
|
|
|
regions_overlap(
|
2024-06-19 10:50:51 -07:00
|
|
|
scan_inst->src[i], scan_inst->size_read(s.devinfo, i),
|
|
|
|
|
inst->src[0], inst->size_read(s.devinfo, 0))) {
|
2015-02-10 16:25:47 -08:00
|
|
|
if (scan_inst->opcode != BRW_OPCODE_MOV ||
|
|
|
|
|
!scan_inst->saturate ||
|
|
|
|
|
scan_inst->src[0].abs ||
|
2015-01-27 22:46:22 -08:00
|
|
|
scan_inst->src[0].negate ||
|
|
|
|
|
scan_inst->src[0].abs != inst->src[0].abs ||
|
|
|
|
|
scan_inst->src[0].negate != inst->src[0].negate) {
|
2015-02-10 16:25:47 -08:00
|
|
|
interfered = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-12 00:30:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (interfered)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_opt_saturate_propagation(fs_visitor &s)
|
2013-12-12 00:30:16 -08:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2024-01-03 11:03:51 -08:00
|
|
|
foreach_block (block, s.cfg) {
|
2024-06-18 13:38:19 -07:00
|
|
|
progress = opt_saturate_propagation_local(s, block) || progress;
|
2013-12-12 00:30:16 -08:00
|
|
|
}
|
|
|
|
|
|
2014-09-08 12:09:44 -07:00
|
|
|
/* Live intervals are still valid. */
|
2013-12-12 00:30:16 -08:00
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|