From ed70b256cea526dd233bc21a9749ad2bb14e48d9 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 2 Jun 2021 15:14:41 +0100 Subject: [PATCH] nir: add ffma creation helpers Signed-off-by: Rhys Perry Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir.h | 7 +++++-- src/compiler/nir/nir_builder.h | 29 +++++++++++++++++++++++++++ src/compiler/nir/nir_opt_algebraic.py | 4 ++-- src/intel/compiler/brw_compiler.c | 2 +- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f750035beae..3eea054e5e7 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3685,8 +3685,11 @@ typedef struct nir_shader_compiler_options { */ bool intel_vec4; - /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ - bool lower_bfe_with_two_constants; + /** + * For most Intel GPUs, all ternary operations such as FMA and BFE cannot + * have immediates, so two to three instructions may eventually be needed. + */ + bool avoid_ternary_with_two_constants; /** Whether 8-bit ALU is supported. */ bool support_8bit_alu; diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index e1e248b1514..962fdf0fafd 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -977,6 +977,35 @@ nir_uclamp(nir_builder *b, return nir_umin(b, nir_umax(b, x, min_val), max_val); } +static inline nir_ssa_def * +nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2) +{ + if (build->shader->options->avoid_ternary_with_two_constants) + return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2); + else + return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), + nir_imm_floatN_t(build, src2, src0->bit_size)); +} + +static inline nir_ssa_def * +nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2) +{ + return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2); +} + +static inline nir_ssa_def * +nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2) +{ + return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); +} + +static inline nir_ssa_def * +nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, + nir_ssa_def *src2) +{ + return nir_ffma(build, nir_fneg(build, src1), src2, src0); +} + static inline nir_ssa_def * nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size) { diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 51f2d18c2ca..376069514c3 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2410,7 +2410,7 @@ late_optimizations = [ # result, it is very easy for 3-source instruction combined with either # loads of immediate values or copies from weird register strides to be # more expensive than the primitive instructions it represents. - (('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->lower_bfe_with_two_constants'), + (('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->avoid_ternary_with_two_constants'), # b is the lowest order bit to be extracted and c is the number of bits to # extract. The inner shift removes the bits above b + c by shifting left @@ -2418,7 +2418,7 @@ late_optimizations = [ # -(b + c). The outer shift moves the bit that was at b to bit zero. # After the first shift, that bit is now at b + (32 - (b + c)) or 32 - c. # This means that it must be shifted right by 32 - c or -c bits. - (('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->lower_bfe_with_two_constants'), + (('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->avoid_ternary_with_two_constants'), # Clean up no-op shifts that may result from the bfe lowerings. (('ishl', a, 0), a), diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 4336ff73a08..ec6b591cd12 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -67,7 +67,7 @@ .lower_unpack_unorm_4x8 = true, \ .lower_usub_sat64 = true, \ .lower_hadd64 = true, \ - .lower_bfe_with_two_constants = true, \ + .avoid_ternary_with_two_constants = true, \ .max_unroll_iterations = 32, \ .force_indirect_unrolling = nir_var_function_temp