nir: replace undef only used by ALU opcodes with 0 or NaN

If undef is consumed by an FP opcode, replace it with NaN to eliminate
that opcode, else replace it with 0, but there are exceptions, such as
when undef is used by stores or phis, it's not touched.

This also contains workarounds for viewperf shaders.

radeonsi:
TOTALS FROM AFFECTED SHADERS (1987/58918)
  Code Size: 5158692 -> 5143796 (-0.29 %) bytes
  Max Waves: 22456 -> 22513 (0.25 %)
  Outputs: 3726 -> 3726 (0.00 %)
  Patch Outputs: 0 -> 0 (0.00 %)

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24059>
This commit is contained in:
Marek Olšák 2023-07-06 04:52:16 -04:00 committed by Marge Bot
parent 2b128c570b
commit 861d274453

View file

@ -23,12 +23,18 @@
#include "nir.h"
#include "nir_builder.h"
#include "util/mesa-sha1.h"
#include <math.h>
/** @file nir_opt_undef.c
*
* Handles optimization of operations involving ssa_undef.
*/
struct undef_options {
bool disallow_undef_to_nan;
};
/**
* Turn conditional selects between an undef and some other value into a move
* of that other value (on the assumption that the condition's going to be
@ -169,10 +175,126 @@ opt_undef_pack(nir_builder *b, nir_alu_instr *alu)
return true;
}
struct visit_info {
bool replace_undef_with_constant;
bool prefer_nan;
bool must_keep_undef;
};
/**
* Analyze an undef use to see if replacing undef with a constant is
* beneficial.
*/
static void
visit_undef_use(nir_src *src, struct visit_info *info)
{
nir_instr *instr = src->parent_instr;
if (src->is_if) {
/* If the use is "if", keep undef because the branch will be eliminated
* by nir_opt_dead_cf.
*/
info->must_keep_undef = true;
return;
}
if (instr->type == nir_instr_type_alu) {
/* Replacing undef with a constant is only beneficial with ALU
* instructions because it can eliminate them or simplify them.
*/
nir_alu_instr *alu = nir_instr_as_alu(instr);
/* Follow movs and vecs.
*
* Note that all vector component uses are followed and swizzles are
* ignored.
*/
if (alu->op == nir_op_mov || nir_op_is_vec(alu->op)) {
nir_foreach_use_including_if(next_src, &alu->def) {
visit_undef_use(next_src, info);
}
return;
}
unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
for (unsigned i = 0; i < num_srcs; i++) {
if (&alu->src[i].src != src)
continue;
if (nir_op_is_selection(alu->op) && i != 0) {
/* nir_opt_algebraic can eliminate a select opcode only if src0 is
* a constant. If the undef use is src1 or src2, it will be
* handled by opt_undef_csel.
*/
continue;
}
info->replace_undef_with_constant = true;
if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
alu->op != nir_op_fmulz &&
(alu->op != nir_op_ffmaz || i == 2))
info->prefer_nan = true;
}
} else {
/* If the use is not ALU, don't replace undef. We need to preserve
* undef for stores and phis because those are handled differently,
* and replacing undef with a constant would result in worse code.
*/
info->must_keep_undef = true;
return;
}
}
/**
* Replace ssa_undef used by ALU opcodes with 0 or NaN, whichever eliminates
* more code.
*
* Replace it with NaN if an FP opcode uses undef, which will cause the opcode
* to be eliminated by nir_opt_algebraic. 0 would not eliminate the FP opcode.
*/
static bool
replace_ssa_undef(nir_builder *b, nir_instr *instr,
const struct undef_options *options)
{
nir_undef_instr *undef = nir_instr_as_undef(instr);
struct visit_info info = {0};
nir_foreach_use_including_if(src, &undef->def) {
visit_undef_use(src, &info);
}
if (info.must_keep_undef || !info.replace_undef_with_constant)
return false;
b->cursor = nir_before_instr(&undef->instr);
nir_def *replacement;
/* If undef is used as float, replace it with NaN, which will
* eliminate all FP instructions that consume it. Else, replace it
* with 0, which is more likely to eliminate non-FP instructions.
*/
if (info.prefer_nan && !options->disallow_undef_to_nan)
replacement = nir_imm_floatN_t(b, NAN, undef->def.bit_size);
else
replacement = nir_imm_intN_t(b, 0, undef->def.bit_size);
if (undef->def.num_components > 1)
replacement = nir_replicate(b, replacement, undef->def.num_components);
nir_def_rewrite_uses_after(&undef->def, replacement, &undef->instr);
nir_instr_remove(&undef->instr);
return true;
}
static bool
nir_opt_undef_instr(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type == nir_instr_type_alu) {
const struct undef_options *options = data;
if (instr->type == nir_instr_type_undef) {
return replace_ssa_undef(b, instr, options);
} else if (instr->type == nir_instr_type_alu) {
nir_alu_instr *alu = nir_instr_as_alu(instr);
return opt_undef_csel(b, alu) ||
opt_undef_vecN(b, alu) ||
@ -188,9 +310,40 @@ nir_opt_undef_instr(nir_builder *b, nir_instr *instr, void *data)
bool
nir_opt_undef(nir_shader *shader)
{
struct undef_options options = {0};
/* Disallow the undef->NaN transformation only for those shaders where
* it's known to break rendering. These are shader source SHA1s printed by
* nir_print_shader().
*/
uint32_t shader_sha1s[][SHA1_DIGEST_LENGTH32] = {
/* gputest/gimark */
{0x9a1af9e2, 0x68f185bf, 0x11fc1257, 0x1102e80b, 0x5ca350fa},
/* Viewperf13/CATIA_car_01 */
{0x4746a4a4, 0xe3b27d27, 0xe6d2b0fb, 0xb7e9ceb3, 0x973e6152}, /* Taillights */
{0xc49cc90d, 0xd7208212, 0x726502ea, 0xe1fe62c0, 0xb62fbd1f}, /* Grill */
{0xde23f35b, 0xb6fa45ae, 0x96da7e6b, 0x5a6e4a60, 0xce0b6b31}, /* Headlights */
{0xdf36242c, 0x0705db59, 0xf1ddac9b, 0xcd1c8466, 0x4c73203b}, /* Rims */
/* Viewperf13/CATIA_car_04 */
{0x631da72a, 0xc971e849, 0xd6489a15, 0xf7c8dddb, 0xe8efd982}, /* Headlights */
{0x85984b88, 0xd16b8fee, 0x0d49d97b, 0x5f6cc66e, 0xadcafad9}, /* Rims */
{0xad023488, 0x09930735, 0xb0567e58, 0x336dce36, 0xe3c1e448}, /* Tires */
{0xdcc4a549, 0x587873fa, 0xeed94361, 0x9a47cbff, 0x846d0167}, /* Windows */
{0xfa0074a2, 0xef868430, 0x87935a0c, 0x19bc96be, 0xb5b95c74}, /* Body */
};
for (unsigned i = 0; i < ARRAY_SIZE(shader_sha1s); i++) {
if (_mesa_printed_sha1_equal(shader->info.source_sha1, shader_sha1s[i])) {
options.disallow_undef_to_nan = true;
break;
}
}
return nir_shader_instructions_pass(shader,
nir_opt_undef_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
nir_metadata_dominance,
&options);
}