mesa/src/intel/compiler/brw_opt_saturate_propagation.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

147 lines
4.4 KiB
C++
Raw Normal View History

/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_analysis.h"
#include "brw_shader.h"
#include "brw_cfg.h"
/** @file
*
* Implements a pass that propagates the SAT modifier from a MOV.SAT into the
* instruction that produced the source of the MOV.SAT, thereby allowing the
* MOV's src and dst to be coalesced and the MOV removed.
*
* For instance,
*
* ADD tmp, src0, src1
* MOV.SAT dst, tmp
*
* would be transformed into
*
* ADD.SAT tmp, src0, src1
* MOV dst, tmp
*/
static bool
propagate_sat(brw_inst *inst, brw_inst *scan_inst)
{
if (scan_inst->dst.type != inst->dst.type) {
scan_inst->dst.type = inst->dst.type;
for (int i = 0; i < scan_inst->sources; i++) {
scan_inst->src[i].type = inst->dst.type;
}
}
if (inst->src[0].negate) {
if (scan_inst->opcode == BRW_OPCODE_MUL) {
scan_inst->src[0].negate = !scan_inst->src[0].negate;
inst->src[0].negate = false;
} else if (scan_inst->opcode == BRW_OPCODE_MAD) {
for (int i = 0; i < 2; i++) {
if (scan_inst->src[i].file == IMM) {
brw_reg_negate_immediate(&scan_inst->src[i]);
} else {
scan_inst->src[i].negate = !scan_inst->src[i].negate;
}
}
inst->src[0].negate = false;
} else if (scan_inst->opcode == BRW_OPCODE_ADD) {
if (scan_inst->src[1].file == IMM) {
if (!brw_reg_negate_immediate(&scan_inst->src[1])) {
return false;
}
} else {
scan_inst->src[1].negate = !scan_inst->src[1].negate;
}
scan_inst->src[0].negate = !scan_inst->src[0].negate;
inst->src[0].negate = false;
} else {
return false;
}
}
scan_inst->saturate = true;
inst->saturate = false;
return true;
}
static bool
opt_saturate_propagation_local(brw_shader &s,
const brw_ip_ranges &ips,
bblock_t *block)
{
bool progress = false;
foreach_inst_in_block_reverse(brw_inst, inst, block) {
if (inst->opcode != BRW_OPCODE_MOV ||
!inst->saturate ||
inst->dst.file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].file != VGRF ||
inst->src[0].abs)
continue;
const brw_def_analysis &defs = s.def_analysis.require();
brw_inst *def = defs.get(inst->src[0]);
intel/brw: Use def analysis for simple cases of saturate propagation I had hoped this would improve compilation performance too. I tried several different long running fossils, and there was no difference. Fossil-db results are all over the place from platform to platform. All of the Tiger Lake shaders hurt for spills and fills are fragment shaders in rdr2. shader-db: All Intel platforms had similar results. (Meteor Lake shown) total instructions in shared programs: 19734088 -> 19733645 (<.01%) instructions in affected programs: 71200 -> 70757 (-0.62%) helped: 186 HURT: 0 helped stats (abs) min: 1 max: 7 x̄: 2.38 x̃: 1 helped stats (rel) min: 0.06% max: 2.79% x̄: 0.83% x̃: 0.48% 95% mean confidence interval for instructions value: -2.69 -2.07 95% mean confidence interval for instructions %-change: -0.93% -0.72% Instructions are helped. total cycles in shared programs: 916290473 -> 916180971 (-0.01%) cycles in affected programs: 3403719 -> 3294217 (-3.22%) helped: 89 HURT: 88 helped stats (abs) min: 1 max: 36685 x̄: 1424.13 x̃: 10 helped stats (rel) min: <.01% max: 26.75% x̄: 1.66% x̃: 0.46% HURT stats (abs) min: 1 max: 8750 x̄: 195.98 x̃: 7 HURT stats (rel) min: <.01% max: 17.12% x̄: 1.57% x̃: 0.19% 95% mean confidence interval for cycles value: -1199.88 -37.43 95% mean confidence interval for cycles %-change: -0.66% 0.56% Inconclusive result (%-change mean confidence interval includes 0). fossil-db: Meteor Lake and DG2 had similar results. (Meteor Lake shown) Totals: Instrs: 151458346 -> 151457413 (-0.00%) Cycle count: 17202426472 -> 17202406469 (-0.00%); split: -0.00%, +0.00% Max live registers: 31989626 -> 31989959 (+0.00%); split: -0.00%, +0.00% Max dispatch width: 5500560 -> 5500384 (-0.00%) Totals from 479 (0.08% of 628970) affected shaders: Instrs: 398836 -> 397903 (-0.23%) Cycle count: 18064565 -> 18044562 (-0.11%); split: -0.40%, +0.29% Max live registers: 36663 -> 36996 (+0.91%); split: -0.02%, +0.92% Max dispatch width: 4392 -> 4216 (-4.01%) Tiger Lake Totals: Instrs: 149913036 -> 149912182 (-0.00%); split: -0.00%, +0.00% Cycle count: 15560086488 -> 15560135139 (+0.00%); split: -0.00%, +0.00% Spill count: 61241 -> 61251 (+0.02%) Fill count: 107304 -> 107314 (+0.01%) Max live registers: 31964752 -> 31965119 (+0.00%); split: -0.00%, +0.00% Max dispatch width: 5517568 -> 5517248 (-0.01%) Totals from 486 (0.08% of 628673) affected shaders: Instrs: 396065 -> 395211 (-0.22%); split: -0.23%, +0.01% Cycle count: 17677691 -> 17726342 (+0.28%); split: -0.23%, +0.51% Spill count: 1302 -> 1312 (+0.77%) Fill count: 3746 -> 3756 (+0.27%) Max live registers: 37538 -> 37905 (+0.98%); split: -0.02%, +0.99% Max dispatch width: 4576 -> 4256 (-6.99%) Ice Lake Totals: Instrs: 151348422 -> 151347463 (-0.00%) Cycle count: 15155678386 -> 15155691726 (+0.00%); split: -0.00%, +0.00% Fill count: 108114 -> 108111 (-0.00%) Max live registers: 32444479 -> 32444814 (+0.00%); split: -0.00%, +0.00% Max dispatch width: 5611288 -> 5611256 (-0.00%) Totals from 483 (0.08% of 634352) affected shaders: Instrs: 393333 -> 392374 (-0.24%) Cycle count: 16706439 -> 16719779 (+0.08%); split: -0.14%, +0.22% Fill count: 3654 -> 3651 (-0.08%) Max live registers: 37246 -> 37581 (+0.90%); split: -0.02%, +0.92% Max dispatch width: 4312 -> 4280 (-0.74%) Skylake Totals: Instrs: 140741190 -> 140734481 (-0.00%); split: -0.00%, +0.00% Cycle count: 14659096516 -> 14659116346 (+0.00%); split: -0.00%, +0.00% Max live registers: 31757558 -> 31757725 (+0.00%) Max dispatch width: 5470040 -> 5469920 (-0.00%) Totals from 3542 (0.57% of 624449) affected shaders: Instrs: 3081309 -> 3074600 (-0.22%); split: -0.22%, +0.00% Cycle count: 228843073 -> 228862903 (+0.01%); split: -0.11%, +0.12% Max live registers: 304531 -> 304698 (+0.05%) Max dispatch width: 31016 -> 30896 (-0.39%) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29774>
2024-06-25 09:28:45 -07:00
if (def != NULL) {
if (def->exec_size != inst->exec_size)
continue;
if (def->dst.type != inst->dst.type && !def->can_change_types())
continue;
if (def->flags_written(s.devinfo) != 0)
continue;
if (def->saturate) {
inst->saturate = false;
progress = true;
} else if (defs.get_use_count(def->dst) == 1 &&
def->can_do_saturate() &&
propagate_sat(inst, def)) {
progress = true;
}
}
}
return progress;
}
bool
brw_opt_saturate_propagation(brw_shader &s)
{
bool progress = false;
const brw_ip_ranges &ips = s.ip_ranges_analysis.require();
foreach_block (block, s.cfg) {
progress = opt_saturate_propagation_local(s, ips, block) || progress;
}
if (progress)
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTION_DETAIL);
return progress;
}