mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'nir-per-instr-only-nnan-ninf-nsz' into 'main'
nir,spirv,gallivm: fully replace float_control1 remains with per alu floating point math controls See merge request mesa/mesa!39026
This commit is contained in:
commit
117dfe8b63
26 changed files with 138 additions and 258 deletions
|
|
@ -1322,38 +1322,6 @@ nir_op_is_vec_or_mov(nir_op op)
|
|||
return op == nir_op_mov || nir_op_is_vec(op);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_is_float_control_signed_zero_preserve(unsigned execution_mode, unsigned bit_size)
|
||||
{
|
||||
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16) ||
|
||||
(32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32) ||
|
||||
(64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_is_float_control_inf_preserve(unsigned execution_mode, unsigned bit_size)
|
||||
{
|
||||
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP16) ||
|
||||
(32 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP32) ||
|
||||
(64 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP64);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_is_float_control_nan_preserve(unsigned execution_mode, unsigned bit_size)
|
||||
{
|
||||
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16) ||
|
||||
(32 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32) ||
|
||||
(64 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
|
||||
{
|
||||
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
|
||||
(32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
|
||||
(64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
|
||||
{
|
||||
|
|
@ -1522,6 +1490,24 @@ nir_op_is_selection(nir_op op)
|
|||
{
|
||||
return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0;
|
||||
}
|
||||
/**
|
||||
* Floating point fast math control.
|
||||
*
|
||||
* All new bits must restrict optimizations when they are set, not when they
|
||||
* are missing. This means a bitwise OR always produces a no less restrictive set.
|
||||
*
|
||||
* See also nir_alut_instr::exact, which should (and hopefully will be) moved
|
||||
* to this enum in the future.
|
||||
*/
|
||||
typedef enum {
|
||||
nir_fp_preserve_signed_zero = BITFIELD_BIT(0),
|
||||
nir_fp_preserve_inf = BITFIELD_BIT(1),
|
||||
nir_fp_preserve_nan = BITFIELD_BIT(2),
|
||||
|
||||
nir_fp_preserve_sz_inf_nan = BITFIELD_MASK(3),
|
||||
nir_fp_fast_math = 0,
|
||||
nir_fp_no_fast_math = BITFIELD_MASK(3),
|
||||
} nir_fp_math_control;
|
||||
|
||||
/***/
|
||||
typedef struct nir_alu_instr {
|
||||
|
|
@ -1562,7 +1548,7 @@ typedef struct nir_alu_instr {
|
|||
* still handled through the exact bit, and the other float controls bits
|
||||
* (rounding mode and denorm handling) remain in the execution mode only.
|
||||
*/
|
||||
uint32_t fp_fast_math : 9;
|
||||
uint32_t fp_math_ctrl : 3;
|
||||
|
||||
/** Sources
|
||||
*
|
||||
|
|
@ -1574,25 +1560,25 @@ typedef struct nir_alu_instr {
|
|||
static inline bool
|
||||
nir_alu_instr_is_signed_zero_preserve(nir_alu_instr *alu)
|
||||
{
|
||||
return nir_is_float_control_signed_zero_preserve(alu->fp_fast_math, alu->def.bit_size);
|
||||
return alu->fp_math_ctrl & nir_fp_preserve_signed_zero;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_alu_instr_is_inf_preserve(nir_alu_instr *alu)
|
||||
{
|
||||
return nir_is_float_control_inf_preserve(alu->fp_fast_math, alu->def.bit_size);
|
||||
return alu->fp_math_ctrl & nir_fp_preserve_inf;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_alu_instr_is_nan_preserve(nir_alu_instr *alu)
|
||||
{
|
||||
return nir_is_float_control_nan_preserve(alu->fp_fast_math, alu->def.bit_size);
|
||||
return alu->fp_math_ctrl & nir_fp_preserve_nan;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_alu_instr_is_signed_zero_inf_nan_preserve(nir_alu_instr *alu)
|
||||
{
|
||||
return nir_is_float_control_signed_zero_inf_nan_preserve(alu->fp_fast_math, alu->def.bit_size);
|
||||
return alu->fp_math_ctrl & nir_fp_preserve_sz_inf_nan;
|
||||
}
|
||||
|
||||
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src);
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr
|
|||
const nir_op_info *op_info = &nir_op_infos[instr->op];
|
||||
|
||||
instr->exact = build->exact;
|
||||
instr->fp_fast_math = build->fp_fast_math;
|
||||
instr->fp_math_ctrl = build->fp_math_ctrl;
|
||||
|
||||
/* Guess the number of components the destination temporary should have
|
||||
* based on our input sizes, if it's not fixed for the op.
|
||||
|
|
@ -388,7 +388,7 @@ nir_vec_scalars(nir_builder *build, nir_scalar *comp, unsigned num_components)
|
|||
instr->src[i].swizzle[0] = comp[i].comp;
|
||||
}
|
||||
instr->exact = build->exact;
|
||||
instr->fp_fast_math = build->fp_fast_math;
|
||||
instr->fp_math_ctrl = build->fp_math_ctrl;
|
||||
|
||||
/* Note: not reusing nir_builder_alu_instr_finish_and_insert() because it
|
||||
* can't re-guess the num_components when num_components == 1 (nir_op_mov).
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ typedef struct nir_builder {
|
|||
bool constant_fold_alu;
|
||||
|
||||
/* Float_controls2 bits. See nir_alu_instr for details. */
|
||||
uint32_t fp_fast_math;
|
||||
uint32_t fp_math_ctrl;
|
||||
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
|
|
@ -725,7 +725,7 @@ nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
|
|||
nir_def_init(&mov->instr, &mov->def, num_components,
|
||||
nir_src_bit_size(src.src));
|
||||
mov->exact = build->exact;
|
||||
mov->fp_fast_math = build->fp_fast_math;
|
||||
mov->fp_math_ctrl = build->fp_math_ctrl;
|
||||
mov->src[0] = src;
|
||||
nir_builder_instr_insert(build, &mov->instr);
|
||||
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ clone_alu(clone_state *state, const nir_alu_instr *alu)
|
|||
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
|
||||
clone_debug_info(state, &nalu->instr, &alu->instr);
|
||||
nalu->exact = alu->exact;
|
||||
nalu->fp_fast_math = alu->fp_fast_math;
|
||||
nalu->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
nalu->no_signed_wrap = alu->no_signed_wrap;
|
||||
nalu->no_unsigned_wrap = alu->no_unsigned_wrap;
|
||||
|
||||
|
|
|
|||
|
|
@ -807,7 +807,7 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr,
|
|||
*/
|
||||
if (instr->type == nir_instr_type_alu) {
|
||||
nir_instr_as_alu(match)->exact |= nir_instr_as_alu(instr)->exact;
|
||||
nir_instr_as_alu(match)->fp_fast_math |= nir_instr_as_alu(instr)->fp_fast_math;
|
||||
nir_instr_as_alu(match)->fp_math_ctrl |= nir_instr_as_alu(instr)->fp_math_ctrl;
|
||||
}
|
||||
|
||||
assert(!def == !new_def);
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
|
|||
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
b->exact = instr->exact;
|
||||
b->fp_fast_math = instr->fp_fast_math;
|
||||
b->fp_math_ctrl = instr->fp_math_ctrl;
|
||||
|
||||
switch (instr->op) {
|
||||
case nir_op_bitfield_reverse:
|
||||
|
|
@ -176,9 +176,9 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
|
|||
* nir_lower_alu is idempotent, and allows the backend to implement
|
||||
* soundly the no_signed_zero subset of fmin/fmax.
|
||||
*/
|
||||
b->fp_fast_math &= ~FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE;
|
||||
b->fp_math_ctrl &= ~nir_fp_preserve_signed_zero;
|
||||
nir_def *fminmax = max ? nir_fmax(b, s0, s1) : nir_fmin(b, s0, s1);
|
||||
b->fp_fast_math = instr->fp_fast_math;
|
||||
b->fp_math_ctrl = instr->fp_math_ctrl;
|
||||
|
||||
/* If we have a constant source, we can usually optimize */
|
||||
if (s0->num_components == 1 && s0->bit_size == 32) {
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op,
|
|||
chan->src[1].swizzle[0] = chan->src[1].swizzle[channel];
|
||||
}
|
||||
chan->exact = alu->exact;
|
||||
chan->fp_fast_math = alu->fp_fast_math;
|
||||
chan->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
|
||||
nir_builder_instr_insert(builder, &chan->instr);
|
||||
|
||||
|
|
@ -164,7 +164,7 @@ lower_bfdot_to_bfdot2_bfadd(nir_builder *b, nir_alu_instr *alu)
|
|||
}
|
||||
instr->src[2].src = nir_src_for_ssa(acc);
|
||||
instr->exact = b->exact;
|
||||
instr->fp_fast_math = b->fp_fast_math;
|
||||
instr->fp_math_ctrl = b->fp_math_ctrl;
|
||||
|
||||
nir_builder_instr_insert(b, &instr->instr);
|
||||
acc = &instr->def;
|
||||
|
|
@ -206,7 +206,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16)
|
|||
if (i != 0)
|
||||
instr->src[2].src = nir_src_for_ssa(prev);
|
||||
instr->exact = builder->exact;
|
||||
instr->fp_fast_math = builder->fp_fast_math;
|
||||
instr->fp_math_ctrl = builder->fp_math_ctrl;
|
||||
|
||||
nir_builder_instr_insert(builder, &instr->instr);
|
||||
|
||||
|
|
@ -225,7 +225,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
|||
unsigned i, chan;
|
||||
|
||||
b->exact = alu->exact;
|
||||
b->fp_fast_math = alu->fp_fast_math;
|
||||
b->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
|
||||
unsigned num_components = alu->def.num_components;
|
||||
unsigned target_width = 1;
|
||||
|
|
@ -449,7 +449,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
|||
|
||||
nir_alu_ssa_dest_init(lower, components, alu->def.bit_size);
|
||||
lower->exact = alu->exact;
|
||||
lower->fp_fast_math = alu->fp_fast_math;
|
||||
lower->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
|
||||
for (i = 0; i < components; i++) {
|
||||
vec->src[chan + i].src = nir_src_for_ssa(&lower->def);
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ build_atomic(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), before, data);
|
||||
nir_alu_instr *op = nir_def_as_alu(expected);
|
||||
op->exact = true;
|
||||
op->fp_fast_math = 0;
|
||||
op->fp_math_ctrl = nir_fp_no_fast_math;
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
xchg = nir_ssbo_atomic_swap(b, intr->def.bit_size,
|
||||
|
|
|
|||
|
|
@ -88,10 +88,8 @@ get_signed_inf(nir_builder *b, nir_def *zero)
|
|||
static nir_def *
|
||||
get_signed_zero(nir_builder *b, nir_def *src)
|
||||
{
|
||||
uint32_t exec_mode = b->fp_fast_math;
|
||||
|
||||
nir_def *zero;
|
||||
if (nir_is_float_control_signed_zero_preserve(exec_mode, 64)) {
|
||||
if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) {
|
||||
nir_def *hi = nir_unpack_64_2x32_split_y(b, src);
|
||||
nir_def *sign = nir_iand_imm(b, hi, 0x80000000);
|
||||
zero = nir_pack_64_2x32_split(b, nir_imm_int(b, 0), sign);
|
||||
|
|
@ -105,9 +103,7 @@ get_signed_zero(nir_builder *b, nir_def *src)
|
|||
static nir_def *
|
||||
preserve_nan(nir_builder *b, nir_def *src, nir_def *res)
|
||||
{
|
||||
uint32_t exec_mode = b->fp_fast_math;
|
||||
|
||||
if (nir_is_float_control_nan_preserve(exec_mode, 64)) {
|
||||
if (b->fp_math_ctrl & nir_fp_preserve_nan) {
|
||||
nir_def *is_nan = nir_fneu(b, src, src);
|
||||
return nir_bcsel(b, is_nan, src, res);
|
||||
}
|
||||
|
|
@ -317,7 +313,6 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
|
|||
res = nir_ffma(b, y_1, r_1, y_1);
|
||||
}
|
||||
|
||||
uint32_t exec_mode = b->fp_fast_math;
|
||||
if (sqrt) {
|
||||
/* Here, the special cases we need to handle are
|
||||
* 0 -> 0 (sign preserving)
|
||||
|
|
@ -343,7 +338,7 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
|
|||
res = fix_inv_result(b, res, src, new_exp);
|
||||
}
|
||||
|
||||
if (nir_is_float_control_nan_preserve(exec_mode, 64))
|
||||
if (b->fp_math_ctrl & nir_fp_preserve_nan)
|
||||
res = nir_bcsel(b, nir_feq_imm(b, src, -INFINITY),
|
||||
nir_imm_double(b, NAN), res);
|
||||
|
||||
|
|
@ -504,7 +499,7 @@ lower_minmax(nir_builder *b, nir_op cmp, nir_def *src0, nir_def *src1)
|
|||
/* IEEE-754-2019 requires that fmin/fmax compare -0 < 0, but -0 and 0 are
|
||||
* indistinguishable for flt/fge. So, we fix up signed zeroes.
|
||||
*/
|
||||
if (nir_is_float_control_signed_zero_preserve(b->fp_fast_math, 64)) {
|
||||
if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) {
|
||||
nir_def *src0_is_negzero = nir_ieq_imm(b, src0, 1ull << 63);
|
||||
nir_def *src1_is_poszero = nir_ieq_imm(b, src1, 0x0);
|
||||
nir_def *neg_pos_zero = nir_iand(b, src0_is_negzero, src1_is_poszero);
|
||||
|
|
@ -772,7 +767,7 @@ lower_doubles_instr(nir_builder *b, nir_instr *instr, void *_data)
|
|||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
/* Easier to set it here than pass it around all over ther place. */
|
||||
b->fp_fast_math = alu->fp_fast_math;
|
||||
b->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
|
||||
nir_def *soft_def =
|
||||
lower_doubles_instr_to_soft(b, alu, data->softfp64, options);
|
||||
|
|
|
|||
|
|
@ -345,7 +345,7 @@ convert_flrp_instruction(nir_builder *bld,
|
|||
|
||||
bld->cursor = nir_before_instr(&alu->instr);
|
||||
bld->exact = alu->exact;
|
||||
bld->fp_fast_math = alu->fp_fast_math;
|
||||
bld->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
|
||||
/* There are two methods to implement flrp(x, y, t). The strictly correct
|
||||
* implementation according to the GLSL spec is:
|
||||
|
|
|
|||
|
|
@ -866,7 +866,7 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
|
|||
{
|
||||
nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
|
||||
nalu->exact = alu->exact;
|
||||
nalu->fp_fast_math = alu->fp_fast_math;
|
||||
nalu->fp_math_ctrl = alu->fp_math_ctrl;
|
||||
|
||||
nir_def_init(&nalu->instr, &nalu->def,
|
||||
alu->def.num_components,
|
||||
|
|
@ -881,7 +881,6 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
|
|||
nir_builder_instr_insert(b, &nalu->instr);
|
||||
|
||||
return &nalu->def;
|
||||
;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ struct chain {
|
|||
unsigned length;
|
||||
nir_scalar srcs[MAX_CHAIN_LENGTH];
|
||||
bool do_global_cse, exact;
|
||||
unsigned fp_fast_math;
|
||||
unsigned fp_math_ctrl;
|
||||
};
|
||||
|
||||
UNUSED static void
|
||||
|
|
@ -222,7 +222,7 @@ build_chain(struct chain *c, nir_scalar def, unsigned reserved_count)
|
|||
* It is safe to add `exact` or float control bits, but not the reverse.
|
||||
*/
|
||||
c->exact |= alu->exact;
|
||||
c->fp_fast_math |= alu->fp_fast_math;
|
||||
c->fp_math_ctrl |= alu->fp_math_ctrl;
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
nir_scalar src = nir_scalar_chase_alu_src(def, i);
|
||||
|
|
@ -451,7 +451,7 @@ reassociate_chain(struct chain *c, void *pair_freq)
|
|||
{
|
||||
nir_builder b = nir_builder_at(nir_before_instr(&c->root->instr));
|
||||
b.exact = c->exact;
|
||||
b.fp_fast_math = c->fp_fast_math;
|
||||
b.fp_math_ctrl = c->fp_math_ctrl;
|
||||
|
||||
/* Pick a new order using sort-by-rank and possibly the CSE heuristics */
|
||||
unsigned pinned = 0;
|
||||
|
|
@ -503,7 +503,7 @@ reassociate_chain(struct chain *c, void *pair_freq)
|
|||
/* Set flags conservatively, matching the rest of the chain */
|
||||
c->root->no_signed_wrap = c->root->no_unsigned_wrap = false;
|
||||
c->root->exact = c->exact;
|
||||
c->root->fp_fast_math = c->fp_fast_math;
|
||||
c->root->fp_math_ctrl = c->fp_math_ctrl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,11 +47,11 @@ phi_srcs_equal(nir_def *a, nir_def *b)
|
|||
if (!nir_instrs_equal(a_instr, b_instr))
|
||||
return false;
|
||||
|
||||
/* nir_instrs_equal ignores exact/fast_math */
|
||||
/* nir_instrs_equal ignores exact/fp_math_ctrl */
|
||||
if (a_instr->type == nir_instr_type_alu) {
|
||||
nir_alu_instr *a_alu = nir_def_as_alu(a);
|
||||
nir_alu_instr *b_alu = nir_def_as_alu(b);
|
||||
if (a_alu->exact != b_alu->exact || a_alu->fp_fast_math != b_alu->fp_fast_math)
|
||||
if (a_alu->exact != b_alu->exact || a_alu->fp_math_ctrl != b_alu->fp_math_ctrl)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1017,20 +1017,20 @@ get_interp_vec4_type(struct linkage_info *linkage, unsigned slot,
|
|||
}
|
||||
|
||||
static bool
|
||||
preserve_infs_nans(nir_shader *nir, unsigned bit_size)
|
||||
uses_preserve_nans(nir_def *def)
|
||||
{
|
||||
unsigned mode = nir->info.float_controls_execution_mode;
|
||||
nir_foreach_use_including_if(use, def) {
|
||||
if (nir_src_is_if(use))
|
||||
return true;
|
||||
if (!nir_src_is_alu(*use))
|
||||
return true;
|
||||
|
||||
return nir_is_float_control_inf_preserve(mode, bit_size) ||
|
||||
nir_is_float_control_nan_preserve(mode, bit_size);
|
||||
}
|
||||
nir_alu_instr *alu = nir_src_as_alu(*use);
|
||||
if (nir_alu_instr_is_nan_preserve(alu))
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
preserve_nans(nir_shader *nir, unsigned bit_size)
|
||||
{
|
||||
unsigned mode = nir->info.float_controls_execution_mode;
|
||||
|
||||
return nir_is_float_control_nan_preserve(mode, bit_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -2521,7 +2521,7 @@ propagate_uniform_expressions(struct linkage_info *linkage,
|
|||
* convert Infs to NaNs manually.
|
||||
*/
|
||||
if (loadi->intrinsic == nir_intrinsic_load_interpolated_input &&
|
||||
preserve_nans(b->shader, clone->bit_size))
|
||||
uses_preserve_nans(&loadi->def))
|
||||
clone = build_convert_inf_to_nan(b, clone);
|
||||
|
||||
/* Replace the original load. */
|
||||
|
|
@ -3123,7 +3123,7 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu)
|
|||
* that instruction, while removing the Infs to NaNs conversion for sourced
|
||||
* interpolated values. We can't do that if Infs and NaNs must be preserved.
|
||||
*/
|
||||
if (preserve_infs_nans(linkage->consumer_builder.shader, alu->def.bit_size))
|
||||
if (nir_alu_instr_is_inf_preserve(alu) || nir_alu_instr_is_nan_preserve(alu))
|
||||
return false;
|
||||
|
||||
switch (alu->op) {
|
||||
|
|
@ -4304,8 +4304,7 @@ relocate_slot(struct linkage_info *linkage, struct scalar_slot *slot,
|
|||
* we need to convert Infs to NaNs manually in the producer to
|
||||
* preserve that.
|
||||
*/
|
||||
if (preserve_nans(linkage->consumer_builder.shader,
|
||||
load->bit_size)) {
|
||||
if (uses_preserve_nans(load)) {
|
||||
list_for_each_entry(struct list_node, iter,
|
||||
&slot->producer.stores, head) {
|
||||
nir_intrinsic_instr *store = iter->instr;
|
||||
|
|
|
|||
|
|
@ -458,10 +458,9 @@ instr_try_combine_alu(struct set *instr_set, nir_alu_instr *alu1, nir_alu_instr
|
|||
*/
|
||||
new_alu->exact = alu1->exact || alu2->exact;
|
||||
|
||||
/* fp_fast_math is a set of FLOAT_CONTROLS_*_PRESERVE_*. Preserve anything
|
||||
* preserved by either instruction.
|
||||
/* fp_math_ctrl is a set of restrictions, take the union of both.
|
||||
*/
|
||||
new_alu->fp_fast_math = alu1->fp_fast_math | alu2->fp_fast_math;
|
||||
new_alu->fp_math_ctrl = alu1->fp_math_ctrl | alu2->fp_math_ctrl;
|
||||
|
||||
/* If all channels don't wrap, we can say that the whole vector doesn't
|
||||
* wrap.
|
||||
|
|
|
|||
|
|
@ -480,7 +480,7 @@ construct_value(nir_builder *build,
|
|||
* replacement should be exact.
|
||||
*/
|
||||
alu->exact = state->has_exact_alu || expr->exact;
|
||||
alu->fp_fast_math = nir_instr_as_alu(instr)->fp_fast_math;
|
||||
alu->fp_math_ctrl = nir_instr_as_alu(instr)->fp_math_ctrl;
|
||||
|
||||
for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
|
||||
/* If the source is an explicitly sized source, then we need to reset
|
||||
|
|
|
|||
|
|
@ -733,7 +733,7 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu)
|
|||
}
|
||||
|
||||
write_def(ctx, &alu->def, header, alu->instr.type);
|
||||
blob_write_uint32(ctx->blob, alu->fp_fast_math);
|
||||
blob_write_uint32(ctx->blob, alu->fp_math_ctrl);
|
||||
|
||||
if (header.alu.packed_src_ssa_16bit) {
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
|
|
@ -788,7 +788,7 @@ read_alu(read_ctx *ctx, union packed_instr header)
|
|||
alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
|
||||
|
||||
read_def(ctx, &alu->def, &alu->instr, header);
|
||||
alu->fp_fast_math = blob_read_uint32(ctx->blob);
|
||||
alu->fp_math_ctrl = blob_read_uint32(ctx->blob);
|
||||
|
||||
if (header.alu.packed_src_ssa_16bit) {
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
|
|
|
|||
|
|
@ -1550,62 +1550,19 @@ enum gl_derivative_group {
|
|||
|
||||
enum float_controls
|
||||
{
|
||||
/* The order of these matters. For float_controls2, only the first 9 bits
|
||||
* are used and stored per-instruction in nir_alu_instr::fp_fast_math.
|
||||
* Any changes in this enum need to be synchronized with that.
|
||||
*/
|
||||
FLOAT_CONTROLS_DEFAULT_FLOAT_CONTROL_MODE = 0,
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 = BITFIELD_BIT(0),
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 = BITFIELD_BIT(1),
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64 = BITFIELD_BIT(2),
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP16 = BITFIELD_BIT(3),
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP32 = BITFIELD_BIT(4),
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP64 = BITFIELD_BIT(5),
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP16 = BITFIELD_BIT(6),
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP32 = BITFIELD_BIT(7),
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP64 = BITFIELD_BIT(8),
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP16 = BITFIELD_BIT(9),
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP32 = BITFIELD_BIT(10),
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP64 = BITFIELD_BIT(11),
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 = BITFIELD_BIT(12),
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 = BITFIELD_BIT(13),
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64 = BITFIELD_BIT(14),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 = BITFIELD_BIT(15),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 = BITFIELD_BIT(16),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 = BITFIELD_BIT(17),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 = BITFIELD_BIT(18),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 = BITFIELD_BIT(19),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64 = BITFIELD_BIT(20),
|
||||
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 =
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP16,
|
||||
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 =
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP32,
|
||||
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64 =
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP64 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP64,
|
||||
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE =
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64,
|
||||
|
||||
FLOAT_CONTROLS_INF_PRESERVE =
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP64,
|
||||
|
||||
FLOAT_CONTROLS_NAN_PRESERVE =
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP64,
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP16 = BITFIELD_BIT(0),
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP32 = BITFIELD_BIT(1),
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP64 = BITFIELD_BIT(2),
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 = BITFIELD_BIT(3),
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 = BITFIELD_BIT(4),
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64 = BITFIELD_BIT(5),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 = BITFIELD_BIT(6),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 = BITFIELD_BIT(7),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 = BITFIELD_BIT(8),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 = BITFIELD_BIT(9),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 = BITFIELD_BIT(10),
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64 = BITFIELD_BIT(11),
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5689,9 +5689,9 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
|
|||
break;
|
||||
case SpvExecutionModeSignedZeroInfNanPreserve:
|
||||
switch (mode->operands[0]) {
|
||||
case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
|
||||
case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
|
||||
case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
|
||||
case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_sz_inf_nan; break;
|
||||
case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_sz_inf_nan; break;
|
||||
case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_sz_inf_nan; break;
|
||||
default: vtn_fail("Floating point type not supported");
|
||||
}
|
||||
break;
|
||||
|
|
@ -5863,29 +5863,27 @@ vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_poin
|
|||
if ((flags & can_fast_math) != can_fast_math)
|
||||
b->exact = true;
|
||||
|
||||
unsigned execution_mode = 0;
|
||||
if (!(flags & SpvFPFastMathModeNotNaNMask)) {
|
||||
switch (glsl_get_bit_size(type->type)) {
|
||||
case 16: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP16; break;
|
||||
case 32: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP32; break;
|
||||
case 64: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP64; break;
|
||||
case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_nan; break;
|
||||
case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_nan; break;
|
||||
case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_nan; break;
|
||||
}
|
||||
}
|
||||
if (!(flags & SpvFPFastMathModeNotInfMask)) {
|
||||
switch (glsl_get_bit_size(type->type)) {
|
||||
case 16: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP16; break;
|
||||
case 32: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP32; break;
|
||||
case 64: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP64; break;
|
||||
case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_inf; break;
|
||||
case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_inf; break;
|
||||
case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_inf; break;
|
||||
}
|
||||
}
|
||||
if (!(flags & SpvFPFastMathModeNSZMask)) {
|
||||
switch (glsl_get_bit_size(type->type)) {
|
||||
case 16: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16; break;
|
||||
case 32: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32; break;
|
||||
case 64: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; break;
|
||||
case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_signed_zero; break;
|
||||
case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_signed_zero; break;
|
||||
case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_signed_zero; break;
|
||||
}
|
||||
}
|
||||
b->shader->info.float_controls_execution_mode |= execution_mode;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -417,22 +417,13 @@ handle_fp_fast_math(struct vtn_builder *b, UNUSED struct vtn_value *val,
|
|||
b->nb.exact = true;
|
||||
|
||||
/* Decoration overrides defaults */
|
||||
b->nb.fp_fast_math = 0;
|
||||
b->nb.fp_math_ctrl = 0;
|
||||
if (!(dec->operands[0] & SpvFPFastMathModeNSZMask))
|
||||
b->nb.fp_fast_math |=
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64;
|
||||
b->nb.fp_math_ctrl |= nir_fp_preserve_signed_zero;
|
||||
if (!(dec->operands[0] & SpvFPFastMathModeNotNaNMask))
|
||||
b->nb.fp_fast_math |=
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_NAN_PRESERVE_FP64;
|
||||
b->nb.fp_math_ctrl |= nir_fp_preserve_nan;
|
||||
if (!(dec->operands[0] & SpvFPFastMathModeNotInfMask))
|
||||
b->nb.fp_fast_math |=
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP32 |
|
||||
FLOAT_CONTROLS_INF_PRESERVE_FP64;
|
||||
b->nb.fp_math_ctrl |= nir_fp_preserve_inf;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -441,18 +432,26 @@ vtn_handle_fp_fast_math(struct vtn_builder *b, struct vtn_value *val)
|
|||
/* Take the NaN/Inf/SZ preserve bits from the execution mode and set them
|
||||
* on the builder, so the generated instructions can take it from it.
|
||||
* We only care about some of them, check nir_alu_instr for details.
|
||||
* We also copy all bit widths, because we can't easily get the correct one
|
||||
* here.
|
||||
*/
|
||||
#define FLOAT_CONTROLS2_BITS (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 | \
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 | \
|
||||
FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64)
|
||||
static_assert(FLOAT_CONTROLS2_BITS == BITSET_MASK(9),
|
||||
"enum float_controls and fp_fast_math out of sync!");
|
||||
b->nb.fp_fast_math = b->shader->info.float_controls_execution_mode &
|
||||
FLOAT_CONTROLS2_BITS;
|
||||
unsigned bit_size;
|
||||
|
||||
/* Some ALU like modf and frexp return a struct of two values. */
|
||||
if (!val->type)
|
||||
bit_size = 0;
|
||||
else if (glsl_type_is_struct(val->type->type))
|
||||
bit_size = glsl_get_bit_size(val->type->type->fields.structure[0].type);
|
||||
else
|
||||
bit_size = glsl_get_bit_size(val->type->type);
|
||||
|
||||
|
||||
switch (bit_size) {
|
||||
case 16: b->nb.fp_math_ctrl = b->fp_math_ctrl_fp16; break;
|
||||
case 32: b->nb.fp_math_ctrl = b->fp_math_ctrl_fp32; break;
|
||||
case 64: b->nb.fp_math_ctrl = b->fp_math_ctrl_fp64; break;
|
||||
default: b->nb.fp_math_ctrl = 0; break;
|
||||
}
|
||||
|
||||
vtn_foreach_decoration(b, val, handle_fp_fast_math, NULL);
|
||||
#undef FLOAT_CONTROLS2_BITS
|
||||
}
|
||||
|
||||
nir_rounding_mode
|
||||
|
|
@ -870,15 +869,15 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
|
||||
case SpvOpIsInf: {
|
||||
const bool save_exact = b->nb.exact;
|
||||
const unsigned save_fast_math = b->nb.fp_fast_math;
|
||||
const unsigned save_math_ctrl = b->nb.fp_math_ctrl;
|
||||
|
||||
b->nb.exact = true;
|
||||
b->nb.fp_fast_math = 0;
|
||||
b->nb.fp_math_ctrl = nir_fp_no_fast_math;
|
||||
nir_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size);
|
||||
dest->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), inf);
|
||||
|
||||
b->nb.exact = save_exact;
|
||||
b->nb.fp_fast_math = save_fast_math;
|
||||
b->nb.fp_math_ctrl = save_math_ctrl;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,21 +38,6 @@
|
|||
#define M_PI_4f ((float) M_PI_4)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Some fp16 instructions (i.e., asin and acos) are lowered as fp32. In these cases the
|
||||
* generated fp32 instructions need the same fp_fast_math settings as fp16.
|
||||
*/
|
||||
static void
|
||||
propagate_fp16_fast_math_to_fp32(struct nir_builder *b)
|
||||
{
|
||||
static_assert(FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 ==
|
||||
(FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1),
|
||||
"FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 is not "
|
||||
"FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1.");
|
||||
|
||||
b->fp_fast_math |= (b->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) << 1;
|
||||
}
|
||||
|
||||
static nir_def *build_det(nir_builder *b, nir_def **col, unsigned cols);
|
||||
|
||||
/* Computes the determinate of the submatrix given by taking src and
|
||||
|
|
@ -178,13 +163,9 @@ build_asin(nir_builder *b, nir_def *x, float p0, float p1, bool piecewise)
|
|||
* approximation in 32-bit math and then we convert the result back to
|
||||
* 16-bit.
|
||||
*/
|
||||
const uint32_t save = b->fp_fast_math;
|
||||
propagate_fp16_fast_math_to_fp32(b);
|
||||
|
||||
nir_def *result =
|
||||
nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1, piecewise));
|
||||
|
||||
b->fp_fast_math = save;
|
||||
return result;
|
||||
}
|
||||
nir_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size);
|
||||
|
|
|
|||
|
|
@ -709,6 +709,10 @@ struct vtn_builder {
|
|||
/* false by default, set to true by the ContractionOff execution mode */
|
||||
bool exact;
|
||||
|
||||
unsigned fp_math_ctrl_fp16;
|
||||
unsigned fp_math_ctrl_fp32;
|
||||
unsigned fp_math_ctrl_fp64;
|
||||
|
||||
/* when a physical memory model is choosen */
|
||||
bool physical_ptrs;
|
||||
|
||||
|
|
|
|||
|
|
@ -3683,32 +3683,20 @@ visit_alu(struct lp_build_nir_soa_context *bld,
|
|||
struct lp_type scalar_double_type = bld->scalar_dbl_bld.type;
|
||||
|
||||
/* Set the per-intruction float controls. */
|
||||
bld->half_bld.type.signed_zero_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16);
|
||||
bld->scalar_half_bld.type.signed_zero_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16);
|
||||
bld->half_bld.type.nan_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP16);
|
||||
bld->scalar_half_bld.type.nan_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP16);
|
||||
bld->half_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr);
|
||||
bld->scalar_half_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr);
|
||||
bld->half_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr);
|
||||
bld->scalar_half_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr);
|
||||
|
||||
bld->base.type.signed_zero_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32);
|
||||
bld->scalar_base.type.signed_zero_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32);
|
||||
bld->base.type.nan_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP32);
|
||||
bld->scalar_base.type.nan_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP32);
|
||||
bld->base.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr);
|
||||
bld->scalar_base.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr);
|
||||
bld->base.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr);
|
||||
bld->scalar_base.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr);
|
||||
|
||||
bld->dbl_bld.type.signed_zero_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
|
||||
bld->scalar_dbl_bld.type.signed_zero_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
|
||||
bld->dbl_bld.type.nan_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
|
||||
bld->scalar_dbl_bld.type.nan_preserve |=
|
||||
!!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
|
||||
bld->dbl_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr);
|
||||
bld->scalar_dbl_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr);
|
||||
bld->dbl_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr);
|
||||
bld->scalar_dbl_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr);
|
||||
|
||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||
/**
|
||||
|
|
@ -5877,30 +5865,18 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
|
|||
lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
|
||||
{
|
||||
struct lp_type float_type = type;
|
||||
float_type.signed_zero_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32);
|
||||
float_type.nan_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32);
|
||||
lp_build_context_init(&bld.base, gallivm, float_type);
|
||||
}
|
||||
{
|
||||
struct lp_type dbl_type;
|
||||
dbl_type = type;
|
||||
dbl_type.width *= 2;
|
||||
dbl_type.signed_zero_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
|
||||
dbl_type.nan_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
|
||||
lp_build_context_init(&bld.dbl_bld, gallivm, dbl_type);
|
||||
}
|
||||
{
|
||||
struct lp_type half_type;
|
||||
half_type = type;
|
||||
half_type.width /= 2;
|
||||
half_type.signed_zero_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16);
|
||||
half_type.nan_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16);
|
||||
lp_build_context_init(&bld.half_bld, gallivm, half_type);
|
||||
}
|
||||
{
|
||||
|
|
@ -5952,30 +5928,18 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
|
|||
lp_build_context_init(&bld.scalar_int_bld, gallivm, lp_int_type(elem_type));
|
||||
{
|
||||
struct lp_type float_type = elem_type;
|
||||
float_type.signed_zero_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32);
|
||||
float_type.nan_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32);
|
||||
lp_build_context_init(&bld.scalar_base, gallivm, float_type);
|
||||
}
|
||||
{
|
||||
struct lp_type dbl_type;
|
||||
dbl_type = elem_type;
|
||||
dbl_type.width *= 2;
|
||||
dbl_type.signed_zero_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
|
||||
dbl_type.nan_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
|
||||
lp_build_context_init(&bld.scalar_dbl_bld, gallivm, dbl_type);
|
||||
}
|
||||
{
|
||||
struct lp_type half_type;
|
||||
half_type = elem_type;
|
||||
half_type.width /= 2;
|
||||
half_type.signed_zero_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16);
|
||||
half_type.nan_preserve =
|
||||
!!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16);
|
||||
lp_build_context_init(&bld.scalar_half_bld, gallivm, half_type);
|
||||
}
|
||||
{
|
||||
|
|
|
|||
|
|
@ -301,8 +301,7 @@ impl SPIRVBin {
|
|||
private_data: ptr::from_mut(log).cast(),
|
||||
});
|
||||
|
||||
let float_controls = float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 as u32
|
||||
| float_controls::FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE as u32;
|
||||
let float_controls = float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 as u32;
|
||||
spirv_to_nir_options {
|
||||
create_library: library,
|
||||
environment: nir_spirv_execution_environment::NIR_SPIRV_OPENCL,
|
||||
|
|
|
|||
|
|
@ -219,7 +219,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b,
|
|||
mul_src[0] = nir_fneg(b, mul_src[0]);
|
||||
|
||||
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
|
||||
ffma->fp_fast_math = mul->fp_fast_math | add->fp_fast_math;
|
||||
ffma->fp_math_ctrl = mul->fp_math_ctrl | add->fp_math_ctrl;
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ lower_atomic_in_lock(nir_builder *b, nir_intrinsic_instr *intr, nir_def *loaded)
|
|||
b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), loaded, data);
|
||||
nir_alu_instr *alu = nir_def_as_alu(to_store);
|
||||
alu->exact = true;
|
||||
alu->fp_fast_math = 0;
|
||||
alu->fp_math_ctrl = nir_fp_no_fast_math;
|
||||
break;
|
||||
}
|
||||
case nir_atomic_op_xchg: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue