nir: add ffma creation helpers

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>
This commit is contained in:
Rhys Perry 2021-06-02 15:14:41 +01:00 committed by Marge Bot
parent 4ec4d862c2
commit ed70b256ce
4 changed files with 37 additions and 5 deletions

View file

@ -3685,8 +3685,11 @@ typedef struct nir_shader_compiler_options {
*/
bool intel_vec4;
/** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
bool lower_bfe_with_two_constants;
/**
* For most Intel GPUs, all ternary operations such as FMA and BFE cannot
* have immediates, so two to three instructions may eventually be needed.
*/
bool avoid_ternary_with_two_constants;
/** Whether 8-bit ALU is supported. */
bool support_8bit_alu;

View file

@ -977,6 +977,35 @@ nir_uclamp(nir_builder *b,
return nir_umin(b, nir_umax(b, x, min_val), max_val);
}
static inline nir_ssa_def *
nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2)
{
if (build->shader->options->avoid_ternary_with_two_constants)
return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
else
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
nir_imm_floatN_t(build, src2, src0->bit_size));
}
static inline nir_ssa_def *
nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2)
{
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
}
static inline nir_ssa_def *
nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2)
{
return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
}
static inline nir_ssa_def *
nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1,
nir_ssa_def *src2)
{
return nir_ffma(build, nir_fneg(build, src1), src2, src0);
}
static inline nir_ssa_def *
nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
{

View file

@ -2410,7 +2410,7 @@ late_optimizations = [
# result, it is very easy for 3-source instruction combined with either
# loads of immediate values or copies from weird register strides to be
# more expensive than the primitive instructions it represents.
(('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->lower_bfe_with_two_constants'),
(('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->avoid_ternary_with_two_constants'),
# b is the lowest order bit to be extracted and c is the number of bits to
# extract. The inner shift removes the bits above b + c by shifting left
@ -2418,7 +2418,7 @@ late_optimizations = [
# -(b + c). The outer shift moves the bit that was at b to bit zero.
# After the first shift, that bit is now at b + (32 - (b + c)) or 32 - c.
# This means that it must be shifted right by 32 - c or -c bits.
(('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->lower_bfe_with_two_constants'),
(('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->avoid_ternary_with_two_constants'),
# Clean up no-op shifts that may result from the bfe lowerings.
(('ishl', a, 0), a),

View file

@ -67,7 +67,7 @@
.lower_unpack_unorm_4x8 = true, \
.lower_usub_sat64 = true, \
.lower_hadd64 = true, \
.lower_bfe_with_two_constants = true, \
.avoid_ternary_with_two_constants = true, \
.max_unroll_iterations = 32, \
.force_indirect_unrolling = nir_var_function_temp