mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-23 21:38:18 +02:00
ir3: port to nir_op_fmad
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
parent
d2f4b2d8e9
commit
acad18d97e
9 changed files with 12 additions and 79 deletions
|
|
@ -144,6 +144,9 @@ static const nir_shader_compiler_options ir3_base_options = {
|
|||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
|
||||
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
|
||||
.float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
|
||||
.vertex_id_zero_based = false,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
|
|
|
|||
|
|
@ -726,6 +726,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
|||
case nir_op_fsub:
|
||||
dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG);
|
||||
break;
|
||||
case nir_op_fmad:
|
||||
case nir_op_ffma_old:
|
||||
/* The scalar ALU doesn't support mad, so expand to mul+add so that we
|
||||
* don't unnecessarily fall back to non-earlypreamble. This is safe
|
||||
|
|
|
|||
|
|
@ -90,7 +90,6 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
/* nir_opt_algebraic() above would have unfused our ffmas, re-fuse them. */
|
||||
if (needs_late_alg) {
|
||||
NIR_PASS(progress, ctx->s, nir_opt_algebraic_late);
|
||||
NIR_PASS(progress, ctx->s, ir3_nir_opt_algebraic_late);
|
||||
NIR_PASS(progress, ctx->s, nir_opt_dce);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1647,8 +1647,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so,
|
|||
*/
|
||||
bool more_late_algebraic = true;
|
||||
while (more_late_algebraic) {
|
||||
more_late_algebraic = OPT(s, nir_opt_algebraic_late) ||
|
||||
OPT(s, ir3_nir_opt_algebraic_late);
|
||||
more_late_algebraic = OPT(s, nir_opt_algebraic_late);
|
||||
if (!more_late_algebraic && so->compiler->gen >= 5) {
|
||||
/* Lowers texture operations that have only f2f16 or u2u16 called on
|
||||
* them to have a 16-bit destination. Also, lower 16-bit texture
|
||||
|
|
|
|||
|
|
@ -63,7 +63,6 @@ nir_mem_access_size_align ir3_mem_access_size_align(
|
|||
|
||||
bool ir3_nir_opt_branch_and_or_not(nir_shader *nir);
|
||||
bool ir3_nir_opt_triops_bitwise(nir_shader *nir);
|
||||
bool ir3_nir_opt_algebraic_late(nir_shader *nir);
|
||||
|
||||
struct ir3_optimize_options {
|
||||
nir_opt_uub_options opt_uub_options;
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
|
|||
* times the derivatives of ij in screen space.
|
||||
*/
|
||||
nir_def *new_ij = ij;
|
||||
new_ij = nir_ffma_old(b, chan(off, 0), nir_ddx(b, ij), new_ij);
|
||||
new_ij = nir_ffma_old(b, chan(off, 1), nir_ddy(b, ij), new_ij);
|
||||
new_ij = nir_fmad(b, chan(off, 0), nir_ddx(b, ij), new_ij);
|
||||
new_ij = nir_fmad(b, chan(off, 1), nir_ddy(b, ij), new_ij);
|
||||
|
||||
return new_ij;
|
||||
} else {
|
||||
|
|
@ -52,8 +52,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
|
|||
|
||||
/* Get the offset value from pixel center for ij, and also for w. */
|
||||
nir_def *pos = sij;
|
||||
pos = nir_ffma_old(b, chan(off, 0), nir_ddx(b, sij), pos);
|
||||
pos = nir_ffma_old(b, chan(off, 1), nir_ddy(b, sij), pos);
|
||||
pos = nir_fmad(b, chan(off, 0), nir_ddx(b, sij), pos);
|
||||
pos = nir_fmad(b, chan(off, 1), nir_ddy(b, sij), pos);
|
||||
|
||||
/* convert back into screen space, dividing by the offset 1/w */
|
||||
return nir_fmul(b, nir_trim_vector(b, pos, 2),
|
||||
|
|
|
|||
|
|
@ -1,57 +0,0 @@
|
|||
#
|
||||
# Copyright © 2016 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
# fuse fadd+fmul late to get something we can turn into mad.f32/f16. The
|
||||
# common nir_opt_algebraic_late pass only does this for non-exact patterns.
|
||||
# Since for us, mad is not fused, we don't have this restriction.
|
||||
late_optimizations = []
|
||||
|
||||
a = 'a'
|
||||
b = 'b'
|
||||
c = 'c'
|
||||
|
||||
for sz in [16, 32]:
|
||||
# Fuse the correct fmul. Only consider fmuls where the only users are fadd
|
||||
# (or fneg/fabs which are assumed to be propagated away), as a heuristic to
|
||||
# avoid fusing in cases where it's harmful.
|
||||
fmul = 'fmul(is_only_used_by_fadd)'
|
||||
ffma = 'ffma_old'
|
||||
|
||||
fadd = 'fadd@{}'.format(sz)
|
||||
|
||||
late_optimizations.extend([
|
||||
((fadd, (fmul, a, b), c), (ffma, a, b, c)),
|
||||
|
||||
((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c),
|
||||
(ffma, ('fneg', a), b, c)),
|
||||
|
||||
((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c),
|
||||
(ffma, ('fabs', a), ('fabs', b), c)),
|
||||
|
||||
((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c),
|
||||
(ffma, ('fneg', ('fabs', a)), ('fabs', b), c)),
|
||||
])
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--import-path', required=True)
|
||||
args = parser.parse_args()
|
||||
sys.path.insert(0, args.import_path)
|
||||
run()
|
||||
|
||||
|
||||
def run():
|
||||
import nir_algebraic # pylint: disable=import-error
|
||||
|
||||
print('#include "ir3_nir.h"')
|
||||
print(nir_algebraic.AlgebraicPass("ir3_nir_opt_algebraic_late",
|
||||
late_optimizations).render())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -7,8 +7,9 @@ import argparse
|
|||
import sys
|
||||
|
||||
trig_workarounds = [
|
||||
(('fsin', 'x@32'), ('fsin', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
(('fcos', 'x@32'), ('fcos', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
# !fmad as reassociation could make this not precise enough.
|
||||
(('fsin', 'x@32'), ('fsin', ('!fmad', 6.2831853, ('ffract', ('!fmad', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
(('fcos', 'x@32'), ('fcos', ('!fmad', 6.2831853, ('ffract', ('!fmad', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -45,17 +45,6 @@ ir3_nir_triop_bitwise_c = custom_target(
|
|||
depend_files : nir_algebraic_depends,
|
||||
)
|
||||
|
||||
ir3_nir_opt_algebraic_late_c = custom_target(
|
||||
'ir3_nir_opt_algebraic_late.c',
|
||||
input : 'ir3_nir_opt_algebraic_late.py',
|
||||
output : 'ir3_nir_opt_algebraic_late.c',
|
||||
command : [
|
||||
prog_python, '@INPUT@', '-p', dir_compiler_nir,
|
||||
],
|
||||
capture : true,
|
||||
depend_files : nir_algebraic_depends,
|
||||
)
|
||||
|
||||
ir3_parser = custom_target(
|
||||
'ir3_parser.[ch]',
|
||||
input: 'ir3_parser.y',
|
||||
|
|
@ -150,7 +139,6 @@ libfreedreno_ir3 = static_library(
|
|||
ir3_nir_imul_c,
|
||||
ir3_nir_branch_and_or_not_c,
|
||||
ir3_nir_triop_bitwise_c,
|
||||
ir3_nir_opt_algebraic_late_c,
|
||||
ir3_parser[0], ir3_parser[1],
|
||||
ir3_lexer,
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue