diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7209d28e853..2abf965802b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1322,38 +1322,6 @@ nir_op_is_vec_or_mov(nir_op op) return op == nir_op_mov || nir_op_is_vec(op); } -static inline bool -nir_is_float_control_signed_zero_preserve(unsigned execution_mode, unsigned bit_size) -{ - return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16) || - (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32) || - (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64); -} - -static inline bool -nir_is_float_control_inf_preserve(unsigned execution_mode, unsigned bit_size) -{ - return (16 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP16) || - (32 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP32) || - (64 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP64); -} - -static inline bool -nir_is_float_control_nan_preserve(unsigned execution_mode, unsigned bit_size) -{ - return (16 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16) || - (32 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32) || - (64 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64); -} - -static inline bool -nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) -{ - return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || - (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || - (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); -} - static inline bool nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) { @@ -1522,6 +1490,24 @@ nir_op_is_selection(nir_op op) { return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0; } +/** + * Floating point fast math control. + * + * All new bits must restrict optimizations when they are set, not when they + * are missing. This means a bitwise OR always produces a no less restrictive set. + * + * See also nir_alut_instr::exact, which should (and hopefully will be) moved + * to this enum in the future. + */ +typedef enum { + nir_fp_preserve_signed_zero = BITFIELD_BIT(0), + nir_fp_preserve_inf = BITFIELD_BIT(1), + nir_fp_preserve_nan = BITFIELD_BIT(2), + + nir_fp_preserve_sz_inf_nan = BITFIELD_MASK(3), + nir_fp_fast_math = 0, + nir_fp_no_fast_math = BITFIELD_MASK(3), +} nir_fp_math_control; /***/ typedef struct nir_alu_instr { @@ -1562,7 +1548,7 @@ typedef struct nir_alu_instr { * still handled through the exact bit, and the other float controls bits * (rounding mode and denorm handling) remain in the execution mode only. */ - uint32_t fp_fast_math : 9; + uint32_t fp_math_ctrl : 3; /** Sources * @@ -1574,25 +1560,25 @@ typedef struct nir_alu_instr { static inline bool nir_alu_instr_is_signed_zero_preserve(nir_alu_instr *alu) { - return nir_is_float_control_signed_zero_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_signed_zero; } static inline bool nir_alu_instr_is_inf_preserve(nir_alu_instr *alu) { - return nir_is_float_control_inf_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_inf; } static inline bool nir_alu_instr_is_nan_preserve(nir_alu_instr *alu) { - return nir_is_float_control_nan_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_nan; } static inline bool nir_alu_instr_is_signed_zero_inf_nan_preserve(nir_alu_instr *alu) { - return nir_is_float_control_signed_zero_inf_nan_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_sz_inf_nan; } void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); diff --git a/src/compiler/nir/nir_builder.c b/src/compiler/nir/nir_builder.c index 493aac67cdf..d9719b3fb9a 100644 --- a/src/compiler/nir/nir_builder.c +++ b/src/compiler/nir/nir_builder.c @@ -72,7 +72,7 @@ nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr const nir_op_info *op_info = &nir_op_infos[instr->op]; instr->exact = build->exact; - instr->fp_fast_math = build->fp_fast_math; + instr->fp_math_ctrl = build->fp_math_ctrl; /* Guess the number of components the destination temporary should have * based on our input sizes, if it's not fixed for the op. @@ -388,7 +388,7 @@ nir_vec_scalars(nir_builder *build, nir_scalar *comp, unsigned num_components) instr->src[i].swizzle[0] = comp[i].comp; } instr->exact = build->exact; - instr->fp_fast_math = build->fp_fast_math; + instr->fp_math_ctrl = build->fp_math_ctrl; /* Note: not reusing nir_builder_alu_instr_finish_and_insert() because it * can't re-guess the num_components when num_components == 1 (nir_op_mov). diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 8d3a2e1b6a4..6f9e5262865 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -44,7 +44,7 @@ typedef struct nir_builder { bool constant_fold_alu; /* Float_controls2 bits. See nir_alu_instr for details. */ - uint32_t fp_fast_math; + uint32_t fp_math_ctrl; nir_shader *shader; nir_function_impl *impl; @@ -725,7 +725,7 @@ nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) nir_def_init(&mov->instr, &mov->def, num_components, nir_src_bit_size(src.src)); mov->exact = build->exact; - mov->fp_fast_math = build->fp_fast_math; + mov->fp_math_ctrl = build->fp_math_ctrl; mov->src[0] = src; nir_builder_instr_insert(build, &mov->instr); diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index aa9159fe73f..4cbeba34256 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -268,7 +268,7 @@ clone_alu(clone_state *state, const nir_alu_instr *alu) nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); clone_debug_info(state, &nalu->instr, &alu->instr); nalu->exact = alu->exact; - nalu->fp_fast_math = alu->fp_fast_math; + nalu->fp_math_ctrl = alu->fp_math_ctrl; nalu->no_signed_wrap = alu->no_signed_wrap; nalu->no_unsigned_wrap = alu->no_unsigned_wrap; diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index ed6cb6e7ca9..e158e4b8607 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -807,7 +807,7 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr, */ if (instr->type == nir_instr_type_alu) { nir_instr_as_alu(match)->exact |= nir_instr_as_alu(instr)->exact; - nir_instr_as_alu(match)->fp_fast_math |= nir_instr_as_alu(instr)->fp_fast_math; + nir_instr_as_alu(match)->fp_math_ctrl |= nir_instr_as_alu(instr)->fp_math_ctrl; } assert(!def == !new_def); diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c index 431d0d34edf..a7f447df33b 100644 --- a/src/compiler/nir/nir_lower_alu.c +++ b/src/compiler/nir/nir_lower_alu.c @@ -44,7 +44,7 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data) b->cursor = nir_before_instr(&instr->instr); b->exact = instr->exact; - b->fp_fast_math = instr->fp_fast_math; + b->fp_math_ctrl = instr->fp_math_ctrl; switch (instr->op) { case nir_op_bitfield_reverse: @@ -176,9 +176,9 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data) * nir_lower_alu is idempotent, and allows the backend to implement * soundly the no_signed_zero subset of fmin/fmax. */ - b->fp_fast_math &= ~FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE; + b->fp_math_ctrl &= ~nir_fp_preserve_signed_zero; nir_def *fminmax = max ? nir_fmax(b, s0, s1) : nir_fmin(b, s0, s1); - b->fp_fast_math = instr->fp_fast_math; + b->fp_math_ctrl = instr->fp_math_ctrl; /* If we have a constant source, we can usually optimize */ if (s0->num_components == 1 && s0->bit_size == 32) { diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index e23325fd5fb..9d12cd9ac7c 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -111,7 +111,7 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op, chan->src[1].swizzle[0] = chan->src[1].swizzle[channel]; } chan->exact = alu->exact; - chan->fp_fast_math = alu->fp_fast_math; + chan->fp_math_ctrl = alu->fp_math_ctrl; nir_builder_instr_insert(builder, &chan->instr); @@ -164,7 +164,7 @@ lower_bfdot_to_bfdot2_bfadd(nir_builder *b, nir_alu_instr *alu) } instr->src[2].src = nir_src_for_ssa(acc); instr->exact = b->exact; - instr->fp_fast_math = b->fp_fast_math; + instr->fp_math_ctrl = b->fp_math_ctrl; nir_builder_instr_insert(b, &instr->instr); acc = &instr->def; @@ -206,7 +206,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16) if (i != 0) instr->src[2].src = nir_src_for_ssa(prev); instr->exact = builder->exact; - instr->fp_fast_math = builder->fp_fast_math; + instr->fp_math_ctrl = builder->fp_math_ctrl; nir_builder_instr_insert(builder, &instr->instr); @@ -225,7 +225,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) unsigned i, chan; b->exact = alu->exact; - b->fp_fast_math = alu->fp_fast_math; + b->fp_math_ctrl = alu->fp_math_ctrl; unsigned num_components = alu->def.num_components; unsigned target_width = 1; @@ -449,7 +449,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) nir_alu_ssa_dest_init(lower, components, alu->def.bit_size); lower->exact = alu->exact; - lower->fp_fast_math = alu->fp_fast_math; + lower->fp_math_ctrl = alu->fp_math_ctrl; for (i = 0; i < components; i++) { vec->src[chan + i].src = nir_src_for_ssa(&lower->def); diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index bdad280fdcd..bf5f332f2df 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -83,7 +83,7 @@ build_atomic(nir_builder *b, nir_intrinsic_instr *intr) b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), before, data); nir_alu_instr *op = nir_def_as_alu(expected); op->exact = true; - op->fp_fast_math = 0; + op->fp_math_ctrl = nir_fp_no_fast_math; switch (intr->intrinsic) { case nir_intrinsic_ssbo_atomic: xchg = nir_ssbo_atomic_swap(b, intr->def.bit_size, diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 12df030de9c..1f320a7ee19 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -88,10 +88,8 @@ get_signed_inf(nir_builder *b, nir_def *zero) static nir_def * get_signed_zero(nir_builder *b, nir_def *src) { - uint32_t exec_mode = b->fp_fast_math; - nir_def *zero; - if (nir_is_float_control_signed_zero_preserve(exec_mode, 64)) { + if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) { nir_def *hi = nir_unpack_64_2x32_split_y(b, src); nir_def *sign = nir_iand_imm(b, hi, 0x80000000); zero = nir_pack_64_2x32_split(b, nir_imm_int(b, 0), sign); @@ -105,9 +103,7 @@ get_signed_zero(nir_builder *b, nir_def *src) static nir_def * preserve_nan(nir_builder *b, nir_def *src, nir_def *res) { - uint32_t exec_mode = b->fp_fast_math; - - if (nir_is_float_control_nan_preserve(exec_mode, 64)) { + if (b->fp_math_ctrl & nir_fp_preserve_nan) { nir_def *is_nan = nir_fneu(b, src, src); return nir_bcsel(b, is_nan, src, res); } @@ -317,7 +313,6 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt) res = nir_ffma(b, y_1, r_1, y_1); } - uint32_t exec_mode = b->fp_fast_math; if (sqrt) { /* Here, the special cases we need to handle are * 0 -> 0 (sign preserving) @@ -343,7 +338,7 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt) res = fix_inv_result(b, res, src, new_exp); } - if (nir_is_float_control_nan_preserve(exec_mode, 64)) + if (b->fp_math_ctrl & nir_fp_preserve_nan) res = nir_bcsel(b, nir_feq_imm(b, src, -INFINITY), nir_imm_double(b, NAN), res); @@ -504,7 +499,7 @@ lower_minmax(nir_builder *b, nir_op cmp, nir_def *src0, nir_def *src1) /* IEEE-754-2019 requires that fmin/fmax compare -0 < 0, but -0 and 0 are * indistinguishable for flt/fge. So, we fix up signed zeroes. */ - if (nir_is_float_control_signed_zero_preserve(b->fp_fast_math, 64)) { + if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) { nir_def *src0_is_negzero = nir_ieq_imm(b, src0, 1ull << 63); nir_def *src1_is_poszero = nir_ieq_imm(b, src1, 0x0); nir_def *neg_pos_zero = nir_iand(b, src0_is_negzero, src1_is_poszero); @@ -772,7 +767,7 @@ lower_doubles_instr(nir_builder *b, nir_instr *instr, void *_data) nir_alu_instr *alu = nir_instr_as_alu(instr); /* Easier to set it here than pass it around all over ther place. */ - b->fp_fast_math = alu->fp_fast_math; + b->fp_math_ctrl = alu->fp_math_ctrl; nir_def *soft_def = lower_doubles_instr_to_soft(b, alu, data->softfp64, options); diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c index c2f7892a371..e723d271141 100644 --- a/src/compiler/nir/nir_lower_flrp.c +++ b/src/compiler/nir/nir_lower_flrp.c @@ -345,7 +345,7 @@ convert_flrp_instruction(nir_builder *bld, bld->cursor = nir_before_instr(&alu->instr); bld->exact = alu->exact; - bld->fp_fast_math = alu->fp_fast_math; + bld->fp_math_ctrl = alu->fp_math_ctrl; /* There are two methods to implement flrp(x, y, t). The strictly correct * implementation according to the GLSL spec is: diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index cd2cce1f8b7..0be68225964 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -866,7 +866,7 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, { nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op); nalu->exact = alu->exact; - nalu->fp_fast_math = alu->fp_fast_math; + nalu->fp_math_ctrl = alu->fp_math_ctrl; nir_def_init(&nalu->instr, &nalu->def, alu->def.num_components, @@ -881,7 +881,6 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, nir_builder_instr_insert(b, &nalu->instr); return &nalu->def; - ; } /* diff --git a/src/compiler/nir/nir_opt_reassociate.c b/src/compiler/nir/nir_opt_reassociate.c index 6367147f53d..2f878234b50 100644 --- a/src/compiler/nir/nir_opt_reassociate.c +++ b/src/compiler/nir/nir_opt_reassociate.c @@ -174,7 +174,7 @@ struct chain { unsigned length; nir_scalar srcs[MAX_CHAIN_LENGTH]; bool do_global_cse, exact; - unsigned fp_fast_math; + unsigned fp_math_ctrl; }; UNUSED static void @@ -222,7 +222,7 @@ build_chain(struct chain *c, nir_scalar def, unsigned reserved_count) * It is safe to add `exact` or float control bits, but not the reverse. */ c->exact |= alu->exact; - c->fp_fast_math |= alu->fp_fast_math; + c->fp_math_ctrl |= alu->fp_math_ctrl; for (unsigned i = 0; i < 2; ++i) { nir_scalar src = nir_scalar_chase_alu_src(def, i); @@ -451,7 +451,7 @@ reassociate_chain(struct chain *c, void *pair_freq) { nir_builder b = nir_builder_at(nir_before_instr(&c->root->instr)); b.exact = c->exact; - b.fp_fast_math = c->fp_fast_math; + b.fp_math_ctrl = c->fp_math_ctrl; /* Pick a new order using sort-by-rank and possibly the CSE heuristics */ unsigned pinned = 0; @@ -503,7 +503,7 @@ reassociate_chain(struct chain *c, void *pair_freq) /* Set flags conservatively, matching the rest of the chain */ c->root->no_signed_wrap = c->root->no_unsigned_wrap = false; c->root->exact = c->exact; - c->root->fp_fast_math = c->fp_fast_math; + c->root->fp_math_ctrl = c->fp_math_ctrl; return true; } diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c index a6624bc9300..8e7136fa1ee 100644 --- a/src/compiler/nir/nir_opt_remove_phis.c +++ b/src/compiler/nir/nir_opt_remove_phis.c @@ -47,11 +47,11 @@ phi_srcs_equal(nir_def *a, nir_def *b) if (!nir_instrs_equal(a_instr, b_instr)) return false; - /* nir_instrs_equal ignores exact/fast_math */ + /* nir_instrs_equal ignores exact/fp_math_ctrl */ if (a_instr->type == nir_instr_type_alu) { nir_alu_instr *a_alu = nir_def_as_alu(a); nir_alu_instr *b_alu = nir_def_as_alu(b); - if (a_alu->exact != b_alu->exact || a_alu->fp_fast_math != b_alu->fp_fast_math) + if (a_alu->exact != b_alu->exact || a_alu->fp_math_ctrl != b_alu->fp_math_ctrl) return false; } diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 55797507488..0c3931040ef 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -1017,20 +1017,20 @@ get_interp_vec4_type(struct linkage_info *linkage, unsigned slot, } static bool -preserve_infs_nans(nir_shader *nir, unsigned bit_size) +uses_preserve_nans(nir_def *def) { - unsigned mode = nir->info.float_controls_execution_mode; + nir_foreach_use_including_if(use, def) { + if (nir_src_is_if(use)) + return true; + if (!nir_src_is_alu(*use)) + return true; - return nir_is_float_control_inf_preserve(mode, bit_size) || - nir_is_float_control_nan_preserve(mode, bit_size); -} + nir_alu_instr *alu = nir_src_as_alu(*use); + if (nir_alu_instr_is_nan_preserve(alu)) + return true; + } -static bool -preserve_nans(nir_shader *nir, unsigned bit_size) -{ - unsigned mode = nir->info.float_controls_execution_mode; - - return nir_is_float_control_nan_preserve(mode, bit_size); + return false; } static nir_def * @@ -2521,7 +2521,7 @@ propagate_uniform_expressions(struct linkage_info *linkage, * convert Infs to NaNs manually. */ if (loadi->intrinsic == nir_intrinsic_load_interpolated_input && - preserve_nans(b->shader, clone->bit_size)) + uses_preserve_nans(&loadi->def)) clone = build_convert_inf_to_nan(b, clone); /* Replace the original load. */ @@ -3123,7 +3123,7 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu) * that instruction, while removing the Infs to NaNs conversion for sourced * interpolated values. We can't do that if Infs and NaNs must be preserved. */ - if (preserve_infs_nans(linkage->consumer_builder.shader, alu->def.bit_size)) + if (nir_alu_instr_is_inf_preserve(alu) || nir_alu_instr_is_nan_preserve(alu)) return false; switch (alu->op) { @@ -4304,8 +4304,7 @@ relocate_slot(struct linkage_info *linkage, struct scalar_slot *slot, * we need to convert Infs to NaNs manually in the producer to * preserve that. */ - if (preserve_nans(linkage->consumer_builder.shader, - load->bit_size)) { + if (uses_preserve_nans(load)) { list_for_each_entry(struct list_node, iter, &slot->producer.stores, head) { nir_intrinsic_instr *store = iter->instr; diff --git a/src/compiler/nir/nir_opt_vectorize.c b/src/compiler/nir/nir_opt_vectorize.c index 987dee341a1..2fbe82b356b 100644 --- a/src/compiler/nir/nir_opt_vectorize.c +++ b/src/compiler/nir/nir_opt_vectorize.c @@ -458,10 +458,9 @@ instr_try_combine_alu(struct set *instr_set, nir_alu_instr *alu1, nir_alu_instr */ new_alu->exact = alu1->exact || alu2->exact; - /* fp_fast_math is a set of FLOAT_CONTROLS_*_PRESERVE_*. Preserve anything - * preserved by either instruction. + /* fp_math_ctrl is a set of restrictions, take the union of both. */ - new_alu->fp_fast_math = alu1->fp_fast_math | alu2->fp_fast_math; + new_alu->fp_math_ctrl = alu1->fp_math_ctrl | alu2->fp_math_ctrl; /* If all channels don't wrap, we can say that the whole vector doesn't * wrap. diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 0a7933e006f..10f0d26c85c 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -480,7 +480,7 @@ construct_value(nir_builder *build, * replacement should be exact. */ alu->exact = state->has_exact_alu || expr->exact; - alu->fp_fast_math = nir_instr_as_alu(instr)->fp_fast_math; + alu->fp_math_ctrl = nir_instr_as_alu(instr)->fp_math_ctrl; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { /* If the source is an explicitly sized source, then we need to reset diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 44c57b921b2..8fc0bbfb6e9 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -733,7 +733,7 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu) } write_def(ctx, &alu->def, header, alu->instr.type); - blob_write_uint32(ctx->blob, alu->fp_fast_math); + blob_write_uint32(ctx->blob, alu->fp_math_ctrl); if (header.alu.packed_src_ssa_16bit) { for (unsigned i = 0; i < num_srcs; i++) { @@ -788,7 +788,7 @@ read_alu(read_ctx *ctx, union packed_instr header) alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; read_def(ctx, &alu->def, &alu->instr, header); - alu->fp_fast_math = blob_read_uint32(ctx->blob); + alu->fp_math_ctrl = blob_read_uint32(ctx->blob); if (header.alu.packed_src_ssa_16bit) { for (unsigned i = 0; i < num_srcs; i++) { diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index e13c18a0089..f0842b61f18 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -1550,62 +1550,19 @@ enum gl_derivative_group { enum float_controls { - /* The order of these matters. For float_controls2, only the first 9 bits - * are used and stored per-instruction in nir_alu_instr::fp_fast_math. - * Any changes in this enum need to be synchronized with that. - */ FLOAT_CONTROLS_DEFAULT_FLOAT_CONTROL_MODE = 0, - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 = BITFIELD_BIT(0), - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 = BITFIELD_BIT(1), - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64 = BITFIELD_BIT(2), - FLOAT_CONTROLS_INF_PRESERVE_FP16 = BITFIELD_BIT(3), - FLOAT_CONTROLS_INF_PRESERVE_FP32 = BITFIELD_BIT(4), - FLOAT_CONTROLS_INF_PRESERVE_FP64 = BITFIELD_BIT(5), - FLOAT_CONTROLS_NAN_PRESERVE_FP16 = BITFIELD_BIT(6), - FLOAT_CONTROLS_NAN_PRESERVE_FP32 = BITFIELD_BIT(7), - FLOAT_CONTROLS_NAN_PRESERVE_FP64 = BITFIELD_BIT(8), - FLOAT_CONTROLS_DENORM_PRESERVE_FP16 = BITFIELD_BIT(9), - FLOAT_CONTROLS_DENORM_PRESERVE_FP32 = BITFIELD_BIT(10), - FLOAT_CONTROLS_DENORM_PRESERVE_FP64 = BITFIELD_BIT(11), - FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 = BITFIELD_BIT(12), - FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 = BITFIELD_BIT(13), - FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64 = BITFIELD_BIT(14), - FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 = BITFIELD_BIT(15), - FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 = BITFIELD_BIT(16), - FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 = BITFIELD_BIT(17), - FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 = BITFIELD_BIT(18), - FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 = BITFIELD_BIT(19), - FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64 = BITFIELD_BIT(20), - - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 = - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 | - FLOAT_CONTROLS_INF_PRESERVE_FP16 | - FLOAT_CONTROLS_NAN_PRESERVE_FP16, - - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 = - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 | - FLOAT_CONTROLS_INF_PRESERVE_FP32 | - FLOAT_CONTROLS_NAN_PRESERVE_FP32, - - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64 = - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64 | - FLOAT_CONTROLS_INF_PRESERVE_FP64 | - FLOAT_CONTROLS_NAN_PRESERVE_FP64, - - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE = - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 | - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 | - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64, - - FLOAT_CONTROLS_INF_PRESERVE = - FLOAT_CONTROLS_INF_PRESERVE_FP16 | - FLOAT_CONTROLS_INF_PRESERVE_FP32 | - FLOAT_CONTROLS_INF_PRESERVE_FP64, - - FLOAT_CONTROLS_NAN_PRESERVE = - FLOAT_CONTROLS_NAN_PRESERVE_FP16 | - FLOAT_CONTROLS_NAN_PRESERVE_FP32 | - FLOAT_CONTROLS_NAN_PRESERVE_FP64, + FLOAT_CONTROLS_DENORM_PRESERVE_FP16 = BITFIELD_BIT(0), + FLOAT_CONTROLS_DENORM_PRESERVE_FP32 = BITFIELD_BIT(1), + FLOAT_CONTROLS_DENORM_PRESERVE_FP64 = BITFIELD_BIT(2), + FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 = BITFIELD_BIT(3), + FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 = BITFIELD_BIT(4), + FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64 = BITFIELD_BIT(5), + FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 = BITFIELD_BIT(6), + FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 = BITFIELD_BIT(7), + FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 = BITFIELD_BIT(8), + FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 = BITFIELD_BIT(9), + FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 = BITFIELD_BIT(10), + FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64 = BITFIELD_BIT(11), }; /** diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 7d4b9a7d21f..7bab5e0490b 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -5689,9 +5689,9 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, break; case SpvExecutionModeSignedZeroInfNanPreserve: switch (mode->operands[0]) { - case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break; - case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break; - case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break; + case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_sz_inf_nan; break; + case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_sz_inf_nan; break; + case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_sz_inf_nan; break; default: vtn_fail("Floating point type not supported"); } break; @@ -5863,29 +5863,27 @@ vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_poin if ((flags & can_fast_math) != can_fast_math) b->exact = true; - unsigned execution_mode = 0; if (!(flags & SpvFPFastMathModeNotNaNMask)) { switch (glsl_get_bit_size(type->type)) { - case 16: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP16; break; - case 32: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP32; break; - case 64: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP64; break; + case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_nan; break; + case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_nan; break; + case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_nan; break; } } if (!(flags & SpvFPFastMathModeNotInfMask)) { switch (glsl_get_bit_size(type->type)) { - case 16: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP16; break; - case 32: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP32; break; - case 64: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP64; break; + case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_inf; break; + case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_inf; break; + case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_inf; break; } } if (!(flags & SpvFPFastMathModeNSZMask)) { switch (glsl_get_bit_size(type->type)) { - case 16: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16; break; - case 32: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32; break; - case 64: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; break; + case 16: b->fp_math_ctrl_fp16 |= nir_fp_preserve_signed_zero; break; + case 32: b->fp_math_ctrl_fp32 |= nir_fp_preserve_signed_zero; break; + case 64: b->fp_math_ctrl_fp64 |= nir_fp_preserve_signed_zero; break; } } - b->shader->info.float_controls_execution_mode |= execution_mode; break; } diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 28d87a020e7..b3da3851b76 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -417,22 +417,13 @@ handle_fp_fast_math(struct vtn_builder *b, UNUSED struct vtn_value *val, b->nb.exact = true; /* Decoration overrides defaults */ - b->nb.fp_fast_math = 0; + b->nb.fp_math_ctrl = 0; if (!(dec->operands[0] & SpvFPFastMathModeNSZMask)) - b->nb.fp_fast_math |= - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 | - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 | - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; + b->nb.fp_math_ctrl |= nir_fp_preserve_signed_zero; if (!(dec->operands[0] & SpvFPFastMathModeNotNaNMask)) - b->nb.fp_fast_math |= - FLOAT_CONTROLS_NAN_PRESERVE_FP16 | - FLOAT_CONTROLS_NAN_PRESERVE_FP32 | - FLOAT_CONTROLS_NAN_PRESERVE_FP64; + b->nb.fp_math_ctrl |= nir_fp_preserve_nan; if (!(dec->operands[0] & SpvFPFastMathModeNotInfMask)) - b->nb.fp_fast_math |= - FLOAT_CONTROLS_INF_PRESERVE_FP16 | - FLOAT_CONTROLS_INF_PRESERVE_FP32 | - FLOAT_CONTROLS_INF_PRESERVE_FP64; + b->nb.fp_math_ctrl |= nir_fp_preserve_inf; } void @@ -441,18 +432,26 @@ vtn_handle_fp_fast_math(struct vtn_builder *b, struct vtn_value *val) /* Take the NaN/Inf/SZ preserve bits from the execution mode and set them * on the builder, so the generated instructions can take it from it. * We only care about some of them, check nir_alu_instr for details. - * We also copy all bit widths, because we can't easily get the correct one - * here. */ -#define FLOAT_CONTROLS2_BITS (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 | \ - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 | \ - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64) - static_assert(FLOAT_CONTROLS2_BITS == BITSET_MASK(9), - "enum float_controls and fp_fast_math out of sync!"); - b->nb.fp_fast_math = b->shader->info.float_controls_execution_mode & - FLOAT_CONTROLS2_BITS; + unsigned bit_size; + + /* Some ALU like modf and frexp return a struct of two values. */ + if (!val->type) + bit_size = 0; + else if (glsl_type_is_struct(val->type->type)) + bit_size = glsl_get_bit_size(val->type->type->fields.structure[0].type); + else + bit_size = glsl_get_bit_size(val->type->type); + + + switch (bit_size) { + case 16: b->nb.fp_math_ctrl = b->fp_math_ctrl_fp16; break; + case 32: b->nb.fp_math_ctrl = b->fp_math_ctrl_fp32; break; + case 64: b->nb.fp_math_ctrl = b->fp_math_ctrl_fp64; break; + default: b->nb.fp_math_ctrl = 0; break; + } + vtn_foreach_decoration(b, val, handle_fp_fast_math, NULL); -#undef FLOAT_CONTROLS2_BITS } nir_rounding_mode @@ -870,15 +869,15 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpIsInf: { const bool save_exact = b->nb.exact; - const unsigned save_fast_math = b->nb.fp_fast_math; + const unsigned save_math_ctrl = b->nb.fp_math_ctrl; b->nb.exact = true; - b->nb.fp_fast_math = 0; + b->nb.fp_math_ctrl = nir_fp_no_fast_math; nir_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size); dest->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), inf); b->nb.exact = save_exact; - b->nb.fp_fast_math = save_fast_math; + b->nb.fp_math_ctrl = save_math_ctrl; break; } diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index d5218f71b0a..282d738da7b 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -38,21 +38,6 @@ #define M_PI_4f ((float) M_PI_4) #endif -/** - * Some fp16 instructions (i.e., asin and acos) are lowered as fp32. In these cases the - * generated fp32 instructions need the same fp_fast_math settings as fp16. - */ -static void -propagate_fp16_fast_math_to_fp32(struct nir_builder *b) -{ - static_assert(FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 == - (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1), - "FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 is not " - "FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1."); - - b->fp_fast_math |= (b->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) << 1; -} - static nir_def *build_det(nir_builder *b, nir_def **col, unsigned cols); /* Computes the determinate of the submatrix given by taking src and @@ -178,13 +163,9 @@ build_asin(nir_builder *b, nir_def *x, float p0, float p1, bool piecewise) * approximation in 32-bit math and then we convert the result back to * 16-bit. */ - const uint32_t save = b->fp_fast_math; - propagate_fp16_fast_math_to_fp32(b); - nir_def *result = nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1, piecewise)); - b->fp_fast_math = save; return result; } nir_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size); diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index 5d601f95c86..4b028b57290 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -709,6 +709,10 @@ struct vtn_builder { /* false by default, set to true by the ContractionOff execution mode */ bool exact; + unsigned fp_math_ctrl_fp16; + unsigned fp_math_ctrl_fp32; + unsigned fp_math_ctrl_fp64; + /* when a physical memory model is choosen */ bool physical_ptrs; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index b57325c5c55..2b56a527cb0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -3683,32 +3683,20 @@ visit_alu(struct lp_build_nir_soa_context *bld, struct lp_type scalar_double_type = bld->scalar_dbl_bld.type; /* Set the per-intruction float controls. */ - bld->half_bld.type.signed_zero_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16); - bld->scalar_half_bld.type.signed_zero_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16); - bld->half_bld.type.nan_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP16); - bld->scalar_half_bld.type.nan_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP16); + bld->half_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr); + bld->scalar_half_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr); + bld->half_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr); + bld->scalar_half_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr); - bld->base.type.signed_zero_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32); - bld->scalar_base.type.signed_zero_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32); - bld->base.type.nan_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP32); - bld->scalar_base.type.nan_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP32); + bld->base.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr); + bld->scalar_base.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr); + bld->base.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr); + bld->scalar_base.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr); - bld->dbl_bld.type.signed_zero_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64); - bld->scalar_dbl_bld.type.signed_zero_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64); - bld->dbl_bld.type.nan_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP64); - bld->scalar_dbl_bld.type.nan_preserve |= - !!(instr->fp_fast_math & FLOAT_CONTROLS_NAN_PRESERVE_FP64); + bld->dbl_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr); + bld->scalar_dbl_bld.type.signed_zero_preserve |= nir_alu_instr_is_signed_zero_preserve(instr); + bld->dbl_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr); + bld->scalar_dbl_bld.type.nan_preserve |= nir_alu_instr_is_nan_preserve(instr); for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { /** @@ -5877,30 +5865,18 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm, lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); { struct lp_type float_type = type; - float_type.signed_zero_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32); - float_type.nan_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32); lp_build_context_init(&bld.base, gallivm, float_type); } { struct lp_type dbl_type; dbl_type = type; dbl_type.width *= 2; - dbl_type.signed_zero_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64); - dbl_type.nan_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64); lp_build_context_init(&bld.dbl_bld, gallivm, dbl_type); } { struct lp_type half_type; half_type = type; half_type.width /= 2; - half_type.signed_zero_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16); - half_type.nan_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16); lp_build_context_init(&bld.half_bld, gallivm, half_type); } { @@ -5952,30 +5928,18 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm, lp_build_context_init(&bld.scalar_int_bld, gallivm, lp_int_type(elem_type)); { struct lp_type float_type = elem_type; - float_type.signed_zero_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32); - float_type.nan_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32); lp_build_context_init(&bld.scalar_base, gallivm, float_type); } { struct lp_type dbl_type; dbl_type = elem_type; dbl_type.width *= 2; - dbl_type.signed_zero_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64); - dbl_type.nan_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64); lp_build_context_init(&bld.scalar_dbl_bld, gallivm, dbl_type); } { struct lp_type half_type; half_type = elem_type; half_type.width /= 2; - half_type.signed_zero_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16); - half_type.nan_preserve = - !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16); lp_build_context_init(&bld.scalar_half_bld, gallivm, half_type); } { diff --git a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs index c8ac6f67614..3c12bd7d3cc 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs @@ -301,8 +301,7 @@ impl SPIRVBin { private_data: ptr::from_mut(log).cast(), }); - let float_controls = float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 as u32 - | float_controls::FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE as u32; + let float_controls = float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 as u32; spirv_to_nir_options { create_library: library, environment: nir_spirv_execution_environment::NIR_SPIRV_OPENCL, diff --git a/src/intel/compiler/intel_nir_opt_peephole_ffma.c b/src/intel/compiler/intel_nir_opt_peephole_ffma.c index 5781c694eb1..e6cb5df4f6f 100644 --- a/src/intel/compiler/intel_nir_opt_peephole_ffma.c +++ b/src/intel/compiler/intel_nir_opt_peephole_ffma.c @@ -219,7 +219,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b, mul_src[0] = nir_fneg(b, mul_src[0]); nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma); - ffma->fp_fast_math = mul->fp_fast_math | add->fp_fast_math; + ffma->fp_math_ctrl = mul->fp_math_ctrl | add->fp_math_ctrl; for (unsigned i = 0; i < 2; i++) { ffma->src[i].src = nir_src_for_ssa(mul_src[i]); diff --git a/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c b/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c index 9383c695848..d3bbbe183d8 100644 --- a/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c +++ b/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c @@ -51,7 +51,7 @@ lower_atomic_in_lock(nir_builder *b, nir_intrinsic_instr *intr, nir_def *loaded) b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), loaded, data); nir_alu_instr *alu = nir_def_as_alu(to_store); alu->exact = true; - alu->fp_fast_math = 0; + alu->fp_math_ctrl = nir_fp_no_fast_math; break; } case nir_atomic_op_xchg: {