mesa/src/compiler/nir/nir_opcodes_c.py
Ian Romanick eb76cee9f8 nir: Eliminate nir_op_i2b
There are a lot of optimizations in opt_algebraic that match ('ine', a,
0), but there are almost none that match i2b.  Instead of adding a huge
pile of additional patterns (including variations that include both ine
and i2b), always lower i2b to a != 0.

At this point in the series, it should be impossible for anything to
generate i2b, so there /should not/ be any changes.

The failing test on d3d12 is a pre-existing bug that is triggered by
this change.  I talked to Jesse about it, and, after some analysis, he
suggested just adding it to the list of known failures.

v2: Don't rematerialize i2b instructions in dxil_nir_lower_x2b.

v3: Don't rematerialize i2b instructions in zink_nir_algebraic.py.

v4: Fix zink-on-TGL CI failures by calling nir_opt_algebraic after
nir_lower_doubles makes progress.  The latter can generate b2i
instructions, but nir_lower_int64 can't handle them (anymore).

v5: Add back most of the hunk at line 2125 of nir_opt_algebraic.py. I
had accidentally removed the f2b(bf2(x)) optimization.

v6: Just eliminate the i2b instruction.

v7: Remove missed i2b32 in midgard_compile.c. Remove (now unused)
emit_alu_i2orf2_b1 function from sfn_instr_alu.cpp. Previously this
function was still used. 🤷

No shader-db changes on any Intel platform.

All Intel platforms had similar results. (Ice Lake shown)
Instructions in all programs: 141165875 -> 141165873 (-0.0%)
Instructions helped: 2

Cycles in all programs: 9098956382 -> 9098956350 (-0.0%)
Cycles helped: 2

The two Vulkan shaders are helped because of the "new" (('b2i32',
('ine', ('ubfe', a, b, 1), 0)), ('ubfe', a, b, 1)) algebraic pattern.

Acked-by: Jesse Natalie <jenatali@microsoft.com> [earlier version]
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tested-by: Daniel Schürmann <daniel@schuermann.dev> [earlier version]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15121>
2022-12-14 06:23:21 +00:00

132 lines
5.1 KiB
Python

#
# Copyright (C) 2014 Connor Abbott
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
# Authors:
# Connor Abbott (cwabbott0@gmail.com)
from nir_opcodes import opcodes, type_sizes
from mako.template import Template
template = Template("""
#include "nir.h"
nir_op
nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd)
{
nir_alu_type src_base = (nir_alu_type) nir_alu_type_get_base_type(src);
nir_alu_type dst_base = (nir_alu_type) nir_alu_type_get_base_type(dst);
unsigned src_bit_size = nir_alu_type_get_type_size(src);
unsigned dst_bit_size = nir_alu_type_get_type_size(dst);
if (src == dst && src_base == nir_type_float) {
return nir_op_mov;
} else if (src == dst && src_base == nir_type_bool) {
return nir_op_mov;
} else if ((src_base == nir_type_int || src_base == nir_type_uint) &&
(dst_base == nir_type_int || dst_base == nir_type_uint) &&
src_bit_size == dst_bit_size) {
/* Integer <-> integer conversions with the same bit-size on both
* ends are just no-op moves.
*/
return nir_op_mov;
}
/* i2b and u2b do not exist. Use ine (via nir_type_conversion) instead */
assert((src_base != nir_type_int && src_base != nir_type_uint) ||
dst_base != nir_type_bool);
switch (src_base) {
% for src_t in ['int', 'uint', 'float', 'bool']:
case nir_type_${src_t}:
switch (dst_base) {
% for dst_t in ['int', 'uint', 'float', 'bool']:
case nir_type_${dst_t}:
% if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']:
% if dst_t == 'int':
<% continue %>
% else:
<% dst_t = src_t %>
% endif
% elif src_t == 'bool' and dst_t in ['int', 'uint', 'bool']:
% if dst_t == 'int':
<% continue %>
% else:
<% dst_t = 'int' %>
% endif
% elif src_t in ['int', 'uint'] and dst_t == 'bool':
<% continue %>
% endif
switch (dst_bit_size) {
% for dst_bits in type_sizes(dst_t):
case ${dst_bits}:
% if src_t == 'float' and dst_t == 'float' and dst_bits == 16:
switch(rnd) {
% for rnd_t in [('rtne', '_rtne'), ('rtz', '_rtz'), ('undef', '')]:
case nir_rounding_mode_${rnd_t[0]}:
return ${'nir_op_{0}2{1}{2}{3}'.format(src_t[0], dst_t[0],
dst_bits, rnd_t[1])};
% endfor
default:
unreachable("Invalid 16-bit nir rounding mode");
}
% else:
assert(rnd == nir_rounding_mode_undef);
return ${'nir_op_{0}2{1}{2}'.format(src_t[0], dst_t[0], dst_bits)};
% endif
% endfor
default:
unreachable("Invalid nir alu bit size");
}
% endfor
default:
unreachable("Invalid nir alu base type");
}
% endfor
default:
unreachable("Invalid nir alu base type");
}
}
const nir_op_info nir_op_infos[nir_num_opcodes] = {
% for name, opcode in sorted(opcodes.items()):
{
.name = "${name}",
.num_inputs = ${opcode.num_inputs},
.output_size = ${opcode.output_size},
.output_type = ${"nir_type_" + opcode.output_type},
.input_sizes = {
${ ", ".join(str(size) for size in opcode.input_sizes) }
},
.input_types = {
${ ", ".join("nir_type_" + type for type in opcode.input_types) }
},
.is_conversion = ${"true" if opcode.is_conversion else "false"},
.algebraic_properties =
${ "0" if opcode.algebraic_properties == "" else " | ".join(
"NIR_OP_IS_" + prop.upper() for prop in
opcode.algebraic_properties.strip().split(" ")) }
},
% endfor
};
""")
print(template.render(opcodes=opcodes, type_sizes=type_sizes))