2013-12-12 00:30:16 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2013 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2024-12-06 19:48:54 -08:00
|
|
|
#include "brw_analysis.h"
|
2025-02-05 14:25:15 -08:00
|
|
|
#include "brw_shader.h"
|
2013-12-12 00:30:16 -08:00
|
|
|
#include "brw_cfg.h"
|
|
|
|
|
|
2024-07-13 00:19:44 -07:00
|
|
|
/** @file
|
2014-12-16 11:30:12 -08:00
|
|
|
*
|
|
|
|
|
* Implements a pass that propagates the SAT modifier from a MOV.SAT into the
|
|
|
|
|
* instruction that produced the source of the MOV.SAT, thereby allowing the
|
|
|
|
|
* MOV's src and dst to be coalesced and the MOV removed.
|
|
|
|
|
*
|
|
|
|
|
* For instance,
|
|
|
|
|
*
|
|
|
|
|
* ADD tmp, src0, src1
|
|
|
|
|
* MOV.SAT dst, tmp
|
|
|
|
|
*
|
|
|
|
|
* would be transformed into
|
|
|
|
|
*
|
|
|
|
|
* ADD.SAT tmp, src0, src1
|
|
|
|
|
* MOV dst, tmp
|
2013-12-12 00:30:16 -08:00
|
|
|
*/
|
|
|
|
|
|
2024-06-25 09:23:35 -07:00
|
|
|
static bool
|
2024-12-07 00:23:07 -08:00
|
|
|
propagate_sat(brw_inst *inst, brw_inst *scan_inst)
|
2024-06-25 09:23:35 -07:00
|
|
|
{
|
|
|
|
|
if (scan_inst->dst.type != inst->dst.type) {
|
|
|
|
|
scan_inst->dst.type = inst->dst.type;
|
|
|
|
|
for (int i = 0; i < scan_inst->sources; i++) {
|
|
|
|
|
scan_inst->src[i].type = inst->dst.type;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->src[0].negate) {
|
|
|
|
|
if (scan_inst->opcode == BRW_OPCODE_MUL) {
|
|
|
|
|
scan_inst->src[0].negate = !scan_inst->src[0].negate;
|
|
|
|
|
inst->src[0].negate = false;
|
|
|
|
|
} else if (scan_inst->opcode == BRW_OPCODE_MAD) {
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
if (scan_inst->src[i].file == IMM) {
|
|
|
|
|
brw_reg_negate_immediate(&scan_inst->src[i]);
|
|
|
|
|
} else {
|
|
|
|
|
scan_inst->src[i].negate = !scan_inst->src[i].negate;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
inst->src[0].negate = false;
|
|
|
|
|
} else if (scan_inst->opcode == BRW_OPCODE_ADD) {
|
|
|
|
|
if (scan_inst->src[1].file == IMM) {
|
|
|
|
|
if (!brw_reg_negate_immediate(&scan_inst->src[1])) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
scan_inst->src[1].negate = !scan_inst->src[1].negate;
|
|
|
|
|
}
|
|
|
|
|
scan_inst->src[0].negate = !scan_inst->src[0].negate;
|
|
|
|
|
inst->src[0].negate = false;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
scan_inst->saturate = true;
|
|
|
|
|
inst->saturate = false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-12 00:30:16 -08:00
|
|
|
static bool
|
2025-03-11 13:20:09 -07:00
|
|
|
opt_saturate_propagation_local(brw_shader &s,
|
|
|
|
|
const brw_ip_ranges &ips,
|
|
|
|
|
bblock_t *block)
|
2013-12-12 00:30:16 -08:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2024-12-07 00:23:07 -08:00
|
|
|
foreach_inst_in_block_reverse(brw_inst, inst, block) {
|
2013-12-12 00:30:16 -08:00
|
|
|
if (inst->opcode != BRW_OPCODE_MOV ||
|
i965/fs: Consider type mismatches in saturate propagation.
NIR considers bcsel to produce and consume unsigned types, leading to
SEL instructions operating on unsigned types when the data is really
floating-point. Previous to this patch, saturate propagation would
happily transform
(+f0) sel g20:UD, g30:UD, g40:UD
mov.sat g50:F, g20:F
into
(+f0) sel.sat g20:UD, g30:UD, g40:UD
mov g50:F, g20:F
But since the meaning of .sat is dependent on the type of the
destination register, this is not valid.
Instead, allow saturate propagation to change the types of dest/source
on instructions that are simply copying data in order to propagate the
saturate modifier.
Fixes bad code gen in 158 programs.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-10-14 02:23:25 -07:00
|
|
|
!inst->saturate ||
|
2015-10-26 17:09:25 -07:00
|
|
|
inst->dst.file != VGRF ||
|
i965/fs: Consider type mismatches in saturate propagation.
NIR considers bcsel to produce and consume unsigned types, leading to
SEL instructions operating on unsigned types when the data is really
floating-point. Previous to this patch, saturate propagation would
happily transform
(+f0) sel g20:UD, g30:UD, g40:UD
mov.sat g50:F, g20:F
into
(+f0) sel.sat g20:UD, g30:UD, g40:UD
mov g50:F, g20:F
But since the meaning of .sat is dependent on the type of the
destination register, this is not valid.
Instead, allow saturate propagation to change the types of dest/source
on instructions that are simply copying data in order to propagate the
saturate modifier.
Fixes bad code gen in 158 programs.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-10-14 02:23:25 -07:00
|
|
|
inst->dst.type != inst->src[0].type ||
|
2015-10-26 17:09:25 -07:00
|
|
|
inst->src[0].file != VGRF ||
|
2015-01-27 22:46:22 -08:00
|
|
|
inst->src[0].abs)
|
2013-12-12 00:30:16 -08:00
|
|
|
continue;
|
|
|
|
|
|
2025-03-13 00:26:43 -07:00
|
|
|
const brw_def_analysis &defs = s.def_analysis.require();
|
2024-12-07 00:23:07 -08:00
|
|
|
brw_inst *def = defs.get(inst->src[0]);
|
intel/brw: Use def analysis for simple cases of saturate propagation
I had hoped this would improve compilation performance too. I tried
several different long running fossils, and there was no difference.
Fossil-db results are all over the place from platform to platform.
All of the Tiger Lake shaders hurt for spills and fills are fragment
shaders in rdr2.
shader-db:
All Intel platforms had similar results. (Meteor Lake shown)
total instructions in shared programs: 19734088 -> 19733645 (<.01%)
instructions in affected programs: 71200 -> 70757 (-0.62%)
helped: 186
HURT: 0
helped stats (abs) min: 1 max: 7 x̄: 2.38 x̃: 1
helped stats (rel) min: 0.06% max: 2.79% x̄: 0.83% x̃: 0.48%
95% mean confidence interval for instructions value: -2.69 -2.07
95% mean confidence interval for instructions %-change: -0.93% -0.72%
Instructions are helped.
total cycles in shared programs: 916290473 -> 916180971 (-0.01%)
cycles in affected programs: 3403719 -> 3294217 (-3.22%)
helped: 89
HURT: 88
helped stats (abs) min: 1 max: 36685 x̄: 1424.13 x̃: 10
helped stats (rel) min: <.01% max: 26.75% x̄: 1.66% x̃: 0.46%
HURT stats (abs) min: 1 max: 8750 x̄: 195.98 x̃: 7
HURT stats (rel) min: <.01% max: 17.12% x̄: 1.57% x̃: 0.19%
95% mean confidence interval for cycles value: -1199.88 -37.43
95% mean confidence interval for cycles %-change: -0.66% 0.56%
Inconclusive result (%-change mean confidence interval includes 0).
fossil-db:
Meteor Lake and DG2 had similar results. (Meteor Lake shown)
Totals:
Instrs: 151458346 -> 151457413 (-0.00%)
Cycle count: 17202426472 -> 17202406469 (-0.00%); split: -0.00%, +0.00%
Max live registers: 31989626 -> 31989959 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5500560 -> 5500384 (-0.00%)
Totals from 479 (0.08% of 628970) affected shaders:
Instrs: 398836 -> 397903 (-0.23%)
Cycle count: 18064565 -> 18044562 (-0.11%); split: -0.40%, +0.29%
Max live registers: 36663 -> 36996 (+0.91%); split: -0.02%, +0.92%
Max dispatch width: 4392 -> 4216 (-4.01%)
Tiger Lake
Totals:
Instrs: 149913036 -> 149912182 (-0.00%); split: -0.00%, +0.00%
Cycle count: 15560086488 -> 15560135139 (+0.00%); split: -0.00%, +0.00%
Spill count: 61241 -> 61251 (+0.02%)
Fill count: 107304 -> 107314 (+0.01%)
Max live registers: 31964752 -> 31965119 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5517568 -> 5517248 (-0.01%)
Totals from 486 (0.08% of 628673) affected shaders:
Instrs: 396065 -> 395211 (-0.22%); split: -0.23%, +0.01%
Cycle count: 17677691 -> 17726342 (+0.28%); split: -0.23%, +0.51%
Spill count: 1302 -> 1312 (+0.77%)
Fill count: 3746 -> 3756 (+0.27%)
Max live registers: 37538 -> 37905 (+0.98%); split: -0.02%, +0.99%
Max dispatch width: 4576 -> 4256 (-6.99%)
Ice Lake
Totals:
Instrs: 151348422 -> 151347463 (-0.00%)
Cycle count: 15155678386 -> 15155691726 (+0.00%); split: -0.00%, +0.00%
Fill count: 108114 -> 108111 (-0.00%)
Max live registers: 32444479 -> 32444814 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5611288 -> 5611256 (-0.00%)
Totals from 483 (0.08% of 634352) affected shaders:
Instrs: 393333 -> 392374 (-0.24%)
Cycle count: 16706439 -> 16719779 (+0.08%); split: -0.14%, +0.22%
Fill count: 3654 -> 3651 (-0.08%)
Max live registers: 37246 -> 37581 (+0.90%); split: -0.02%, +0.92%
Max dispatch width: 4312 -> 4280 (-0.74%)
Skylake
Totals:
Instrs: 140741190 -> 140734481 (-0.00%); split: -0.00%, +0.00%
Cycle count: 14659096516 -> 14659116346 (+0.00%); split: -0.00%, +0.00%
Max live registers: 31757558 -> 31757725 (+0.00%)
Max dispatch width: 5470040 -> 5469920 (-0.00%)
Totals from 3542 (0.57% of 624449) affected shaders:
Instrs: 3081309 -> 3074600 (-0.22%); split: -0.22%, +0.00%
Cycle count: 228843073 -> 228862903 (+0.01%); split: -0.11%, +0.12%
Max live registers: 304531 -> 304698 (+0.05%)
Max dispatch width: 31016 -> 30896 (-0.39%)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29774>
2024-06-25 09:28:45 -07:00
|
|
|
|
|
|
|
|
if (def != NULL) {
|
|
|
|
|
if (def->exec_size != inst->exec_size)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (def->dst.type != inst->dst.type && !def->can_change_types())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (def->flags_written(s.devinfo) != 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (def->saturate) {
|
|
|
|
|
inst->saturate = false;
|
|
|
|
|
progress = true;
|
|
|
|
|
} else if (defs.get_use_count(def->dst) == 1 &&
|
|
|
|
|
def->can_do_saturate() &&
|
|
|
|
|
propagate_sat(inst, def)) {
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
2013-12-12 00:30:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
2024-12-07 10:25:45 -08:00
|
|
|
brw_opt_saturate_propagation(brw_shader &s)
|
2013-12-12 00:30:16 -08:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2025-03-11 13:20:09 -07:00
|
|
|
const brw_ip_ranges &ips = s.ip_ranges_analysis.require();
|
|
|
|
|
|
2024-01-03 11:03:51 -08:00
|
|
|
foreach_block (block, s.cfg) {
|
2025-03-11 13:20:09 -07:00
|
|
|
progress = opt_saturate_propagation_local(s, ips, block) || progress;
|
2013-12-12 00:30:16 -08:00
|
|
|
}
|
|
|
|
|
|
2025-03-10 16:08:31 -07:00
|
|
|
if (progress)
|
|
|
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTION_DETAIL);
|
2013-12-12 00:30:16 -08:00
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|