mesa/src/intel/compiler/brw_lower_pack.cpp

86 lines
2.9 KiB
C++
Raw Normal View History

/*
* Copyright © 2015 Connor Abbott
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "util/half_float.h"
#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_builder.h"
using namespace brw;
bool
brw_lower_pack(fs_visitor &s)
{
bool progress = false;
foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) {
if (inst->opcode != FS_OPCODE_PACK &&
inst->opcode != FS_OPCODE_PACK_HALF_2x16_SPLIT)
continue;
assert(inst->dst.file == VGRF);
assert(inst->saturate == false);
brw_reg dst = inst->dst;
const brw_builder ibld(&s, block, inst);
intel/fs: reduce liveness of variables in lowering passes When lowering a single instruction with a destination VGRF to 2 or more, the VGRF is now considered partially written by each generated instruction and that increases its liveness especially in loops. Thus potentially increasing the number of spills/fills due to register allocation. Putting an UNDEF instruction in front of the lowered instructions allows the IR to limit the liveness of the VGRF, reducing register pressure. This has a pretty dramatic effect on spills/fills for RT shaders. Here the stats on Q2RTX shaders on DG2 (wipping out any spills/fills due to register allocation) : Instructions in all programs: 26150 -> 24955 (-4.6%) SENDs in all programs: 1148 -> 1148 (+0.0%) Loops in all programs: 4 -> 4 (+0.0%) Cycles in all programs: 392179 -> 332787 (-15.1%) Spills in all programs: 132 -> 116 (-12.1%) Fills in all programs: 262 -> 154 (-41.2%) Shader-db results on TGL : total instructions in shared programs: 21158140 -> 21158377 (<.01%) instructions in affected programs: 76629 -> 76866 (0.31%) helped: 18 HURT: 20 helped stats (abs) min: 1 max: 60 x̄: 18.89 x̃: 12 helped stats (rel) min: 0.21% max: 3.61% x̄: 1.02% x̃: 0.77% HURT stats (abs) min: 1 max: 79 x̄: 28.85 x̃: 18 HURT stats (rel) min: 0.04% max: 2.81% x̄: 1.13% x̃: 0.79% 95% mean confidence interval for instructions value: -4.82 17.30 95% mean confidence interval for instructions %-change: -0.34% 0.57% Inconclusive result (value mean confidence interval includes 0). total loops in shared programs: 5753 -> 5753 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cycles in shared programs: 798856834 -> 798870688 (<.01%) cycles in affected programs: 6208395 -> 6222249 (0.22%) helped: 22 HURT: 17 helped stats (abs) min: 2 max: 8794 x̄: 1438.18 x̃: 782 helped stats (rel) min: 0.05% max: 2.28% x̄: 0.63% x̃: 0.44% HURT stats (abs) min: 2 max: 19178 x̄: 2676.12 x̃: 1358 HURT stats (rel) min: 0.04% max: 23.49% x̄: 2.25% x̃: 0.71% 95% mean confidence interval for cycles value: -952.19 1662.65 95% mean confidence interval for cycles %-change: -0.64% 1.90% Inconclusive result (value mean confidence interval includes 0). total spills in shared programs: 4078 -> 4066 (-0.29%) spills in affected programs: 40 -> 28 (-30.00%) helped: 2 HURT: 0 total fills in shared programs: 2856 -> 2832 (-0.84%) fills in affected programs: 127 -> 103 (-18.90%) helped: 2 HURT: 0 total sends in shared programs: 998554 -> 998554 (0.00%) sends in affected programs: 0 -> 0 helped: 0 HURT: 0 LOST: 0 GAINED: 0 Total CPU time (seconds): 2346.06 -> 2304.80 (-1.76%) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18657>
2022-09-14 02:40:01 +03:00
/* The lowering generates 2 instructions for what was previously 1. This
* can trick the IR to believe we're doing partial writes, but the
* register is actually fully written. Mark it as undef to help the IR
* reduce the liveness of the register.
*/
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
switch (inst->opcode) {
case FS_OPCODE_PACK:
for (unsigned i = 0; i < inst->sources; i++)
ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);
break;
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
assert(dst.type == BRW_TYPE_UD);
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == IMM) {
const uint32_t half = _mesa_float_to_half(inst->src[i].f);
ibld.MOV(subscript(dst, BRW_TYPE_UW, i),
brw_imm_uw(half));
} else {
ibld.MOV(subscript(dst, BRW_TYPE_HF, i),
inst->src[i]);
}
}
break;
default:
unreachable("skipped above");
}
inst->remove(block);
progress = true;
}
if (progress)
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
return progress;
}