mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-02 05:10:17 +01:00
The fmul+fadd -> fma rules in nir_opt_algebraic are marked imprecise, because they are a contraction. However, they respect signed zero/Inf/NaN rules. As such, it is legal to do this fusion with shader float controls as long as the exact bit is not set (mapping to SPIR-V NoContract). Unfortunately, NIR's imprecise rules do not distinguish between contraction issues versus float special case issues, forcing nir_search to skip all imprecise rules when any shader float control modes are used. This notably affects DXVK, which sets shader float controls to get D3D11 float behaviour and hence loses FMA fusing. Therefore, we plumb in the exact bit to express NoContract independent of the float controls, and weaken the requirement for fma fusion to allowable contraction. For fma splitting, it's a similar issue, as inexact GLSL fma in SPIR-V is just a multiply add that we're allowed to contract rather than the real deal. Drivers that use their own FMA fusing passes (notably, Intel and AMD) are unaffected, but DXVK-capable drivers using fuse_ffma should like this. Results on hk shown: Totals from 2194 (4.06% of 54019) affected shaders: MaxWaves: 2174272 -> 2175936 (+0.08%); split: +0.08%, -0.01% Instrs: 1173283 -> 1131494 (-3.56%); split: -3.57%, +0.01% CodeSize: 8568168 -> 8381724 (-2.18%); split: -2.18%, +0.01% Spills: 1094 -> 747 (-31.72%) Fills: 988 -> 681 (-31.07%) Scratch: 4444 -> 3820 (-14.04%) ALU: 953032 -> 913149 (-4.18%); split: -4.19%, +0.01% FSCIB: 953032 -> 913149 (-4.18%); split: -4.19%, +0.01% IC: 215398 -> 215274 (-0.06%) GPRs: 139865 -> 139032 (-0.60%); split: -1.56%, +0.96% Uniforms: 414886 -> 414466 (-0.10%); split: -0.14%, +0.04% Preamble instrs: 646398 -> 644017 (-0.37%); split: -0.43%, +0.07% Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35989>
254 lines
8 KiB
C
254 lines
8 KiB
C
/*
|
|
* Copyright © 2014 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef _NIR_SEARCH_
|
|
#define _NIR_SEARCH_
|
|
|
|
#include "util/u_dynarray.h"
|
|
#include "nir.h"
|
|
#include "nir_worklist.h"
|
|
|
|
#define NIR_SEARCH_MAX_VARIABLES 24
|
|
|
|
struct nir_builder;
|
|
|
|
typedef enum ENUM_PACKED {
|
|
nir_search_value_expression,
|
|
nir_search_value_variable,
|
|
nir_search_value_constant,
|
|
} nir_search_value_type;
|
|
|
|
typedef struct {
|
|
nir_search_value_type type;
|
|
|
|
/**
|
|
* Bit size of the value. It is interpreted as follows:
|
|
*
|
|
* For a search expression:
|
|
* - If bit_size > 0, then the value only matches an SSA value with the
|
|
* given bit size.
|
|
* - If bit_size <= 0, then the value matches any size SSA value.
|
|
*
|
|
* For a replace expression:
|
|
* - If bit_size > 0, then the value is constructed with the given bit size.
|
|
* - If bit_size == 0, then the value is constructed with the same bit size
|
|
* as the search value.
|
|
* - If bit_size < 0, then the value is constructed with the same bit size
|
|
* as variable (-bit_size - 1).
|
|
*/
|
|
int8_t bit_size;
|
|
} nir_search_value;
|
|
|
|
typedef struct {
|
|
nir_search_value value;
|
|
|
|
/** The variable index; Must be less than NIR_SEARCH_MAX_VARIABLES */
|
|
uint8_t variable : 7;
|
|
|
|
/** Indicates that the given variable must be a constant
|
|
*
|
|
* This is only allowed in search expressions and indicates that the
|
|
* given variable is only allowed to match constant values.
|
|
*/
|
|
bool is_constant : 1;
|
|
|
|
/** Indicates that the given variable must have a certain type
|
|
*
|
|
* This is only allowed in search expressions and indicates that the
|
|
* given variable is only allowed to match values that come from an ALU
|
|
* instruction with the given output type. A type of nir_type_void
|
|
* means it can match any type.
|
|
*
|
|
* Note: A variable that is both constant and has a non-void type will
|
|
* never match anything.
|
|
*/
|
|
nir_alu_type type;
|
|
|
|
/** Optional table->variable_cond[] fxn ptr index
|
|
*
|
|
* This is only allowed in search expressions, and allows additional
|
|
* constraints to be placed on the match. Typically used for 'is_constant'
|
|
* variables to require, for example, power-of-two in order for the search
|
|
* to match.
|
|
*/
|
|
int16_t cond_index;
|
|
|
|
/** Swizzle (for replace only) */
|
|
uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
|
|
} nir_search_variable;
|
|
|
|
typedef struct {
|
|
nir_search_value value;
|
|
|
|
nir_alu_type type;
|
|
|
|
union {
|
|
uint64_t u;
|
|
int64_t i;
|
|
double d;
|
|
} data;
|
|
} nir_search_constant;
|
|
|
|
enum nir_search_op {
|
|
nir_search_op_i2f = nir_last_opcode + 1,
|
|
nir_search_op_u2f,
|
|
nir_search_op_f2f,
|
|
nir_search_op_f2u,
|
|
nir_search_op_f2i,
|
|
nir_search_op_u2u,
|
|
nir_search_op_i2i,
|
|
nir_search_op_b2f,
|
|
nir_search_op_b2i,
|
|
nir_num_search_ops,
|
|
};
|
|
|
|
uint16_t nir_search_op_for_nir_op(nir_op op);
|
|
|
|
typedef struct {
|
|
nir_search_value value;
|
|
|
|
/* When set on a search expression, the expression will only match an SSA
|
|
* value that does *not* have the exact bit set. If unset, the exact bit
|
|
* on the SSA value is ignored.
|
|
*/
|
|
bool inexact : 1;
|
|
|
|
/** In a replacement, requests that the instruction be marked exact. */
|
|
bool exact : 1;
|
|
|
|
/** Don't make the replacement exact if the search expression is exact. */
|
|
bool ignore_exact : 1;
|
|
|
|
/** Replacement does not preserve signed of zero. */
|
|
bool nsz : 1;
|
|
|
|
/** Replacement does not preserve NaN. */
|
|
bool nnan : 1;
|
|
|
|
/** Replacement does not preserve infinities. */
|
|
bool ninf : 1;
|
|
|
|
/** Replacement contracts an expression */
|
|
bool contract : 1;
|
|
|
|
/** Whether the use of the instruction should have a swizzle. */
|
|
int16_t swizzle : 5;
|
|
|
|
/* One of nir_op or nir_search_op */
|
|
uint16_t opcode : 13;
|
|
|
|
/* Commutative expression index. This is assigned by opt_algebraic.py when
|
|
* search structures are constructed and is a unique (to this structure)
|
|
* index within the commutative operation bitfield used for searching for
|
|
* all combinations of expressions containing commutative operations.
|
|
*/
|
|
int8_t comm_expr_idx;
|
|
|
|
/* Number of commutative expressions in this expression including this one
|
|
* (if it is commutative).
|
|
*/
|
|
uint8_t comm_exprs;
|
|
|
|
/* Index in table->values[] for the expression operands */
|
|
uint16_t srcs[4];
|
|
|
|
/** Optional table->expression_cond[] fxn ptr index
|
|
*
|
|
* This allows additional constraints on expression matching, it is
|
|
* typically used to match an expressions uses such as the number of times
|
|
* the expression is used, and whether its used by an if.
|
|
*/
|
|
int16_t cond_index;
|
|
} nir_search_expression;
|
|
|
|
struct per_op_table {
|
|
const uint16_t *filter;
|
|
unsigned num_filtered_states;
|
|
const uint16_t *table;
|
|
};
|
|
|
|
struct transform {
|
|
uint16_t search; /* Index in table->values[] for the search expression. */
|
|
uint16_t replace; /* Index in table->values[] for the replace value. */
|
|
unsigned condition_offset;
|
|
};
|
|
|
|
typedef union {
|
|
nir_search_value value; /* base type of the union, first element of each variant struct */
|
|
|
|
nir_search_constant constant;
|
|
nir_search_variable variable;
|
|
nir_search_expression expression;
|
|
} nir_search_value_union;
|
|
|
|
typedef bool (*nir_search_expression_cond)(const nir_alu_instr *instr);
|
|
typedef bool (*nir_search_variable_cond)(struct hash_table *range_ht,
|
|
const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle);
|
|
|
|
/* Generated data table for an algebraic optimization pass. */
|
|
typedef struct {
|
|
/** Array of all transforms in the pass. */
|
|
const struct transform *transforms;
|
|
/** Mapping from automaton state index to location in *transforms. */
|
|
const uint16_t *transform_offsets;
|
|
const struct per_op_table *pass_op_table;
|
|
const nir_search_value_union *values;
|
|
|
|
/**
|
|
* Array of condition functions for expressions, referenced by
|
|
* nir_search_expression->cond.
|
|
*/
|
|
const nir_search_expression_cond *expression_cond;
|
|
|
|
/**
|
|
* Array of condition functions for variables, referenced by
|
|
* nir_search_variable->cond.
|
|
*/
|
|
const nir_search_variable_cond *variable_cond;
|
|
} nir_algebraic_table;
|
|
|
|
/* Note: these must match the start states created in
|
|
* TreeAutomaton._build_table()
|
|
*/
|
|
|
|
/* WILDCARD_STATE = 0 is set by zeroing the state array */
|
|
static const uint16_t CONST_STATE = 1;
|
|
|
|
NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value,
|
|
nir_search_variable, value,
|
|
type, nir_search_value_variable)
|
|
NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value,
|
|
nir_search_constant, value,
|
|
type, nir_search_value_constant)
|
|
NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value,
|
|
nir_search_expression, value,
|
|
type, nir_search_value_expression)
|
|
|
|
bool
|
|
nir_algebraic_impl(nir_function_impl *impl,
|
|
const bool *condition_flags,
|
|
const nir_algebraic_table *table);
|
|
|
|
#endif /* _NIR_SEARCH_ */
|