mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-23 22:40:34 +01:00
nir: add ffma creation helpers
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>
This commit is contained in:
parent
4ec4d862c2
commit
ed70b256ce
4 changed files with 37 additions and 5 deletions
|
|
@ -3685,8 +3685,11 @@ typedef struct nir_shader_compiler_options {
|
|||
*/
|
||||
bool intel_vec4;
|
||||
|
||||
/** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
|
||||
bool lower_bfe_with_two_constants;
|
||||
/**
|
||||
* For most Intel GPUs, all ternary operations such as FMA and BFE cannot
|
||||
* have immediates, so two to three instructions may eventually be needed.
|
||||
*/
|
||||
bool avoid_ternary_with_two_constants;
|
||||
|
||||
/** Whether 8-bit ALU is supported. */
|
||||
bool support_8bit_alu;
|
||||
|
|
|
|||
|
|
@ -977,6 +977,35 @@ nir_uclamp(nir_builder *b,
|
|||
return nir_umin(b, nir_umax(b, x, min_val), max_val);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2)
|
||||
{
|
||||
if (build->shader->options->avoid_ternary_with_two_constants)
|
||||
return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
|
||||
else
|
||||
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
|
||||
nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2)
|
||||
{
|
||||
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2)
|
||||
{
|
||||
return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1,
|
||||
nir_ssa_def *src2)
|
||||
{
|
||||
return nir_ffma(build, nir_fneg(build, src1), src2, src0);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2410,7 +2410,7 @@ late_optimizations = [
|
|||
# result, it is very easy for 3-source instruction combined with either
|
||||
# loads of immediate values or copies from weird register strides to be
|
||||
# more expensive than the primitive instructions it represents.
|
||||
(('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->lower_bfe_with_two_constants'),
|
||||
(('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->avoid_ternary_with_two_constants'),
|
||||
|
||||
# b is the lowest order bit to be extracted and c is the number of bits to
|
||||
# extract. The inner shift removes the bits above b + c by shifting left
|
||||
|
|
@ -2418,7 +2418,7 @@ late_optimizations = [
|
|||
# -(b + c). The outer shift moves the bit that was at b to bit zero.
|
||||
# After the first shift, that bit is now at b + (32 - (b + c)) or 32 - c.
|
||||
# This means that it must be shifted right by 32 - c or -c bits.
|
||||
(('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->lower_bfe_with_two_constants'),
|
||||
(('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->avoid_ternary_with_two_constants'),
|
||||
|
||||
# Clean up no-op shifts that may result from the bfe lowerings.
|
||||
(('ishl', a, 0), a),
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@
|
|||
.lower_unpack_unorm_4x8 = true, \
|
||||
.lower_usub_sat64 = true, \
|
||||
.lower_hadd64 = true, \
|
||||
.lower_bfe_with_two_constants = true, \
|
||||
.avoid_ternary_with_two_constants = true, \
|
||||
.max_unroll_iterations = 32, \
|
||||
.force_indirect_unrolling = nir_var_function_temp
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue