From f3290219ab32fda01b1663a8407eff26548e2fbd Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 18 Dec 2025 17:36:51 +0100 Subject: [PATCH] nir: use a seperate enum for per alu floating point math control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need one bit per bitsize per instruction if only one actually matters in the end. First step towards moving NIR in the direction of full float_controls2 only. Also rename this from fp_fast_math, because that name implied that 0 is the no fast math mode, while the opposite was the case. Reviewed-by: Marek Olšák Part-of: --- src/compiler/nir/nir.h | 28 ++++++++-- src/compiler/nir/nir_builder.c | 4 +- src/compiler/nir/nir_builder.h | 4 +- src/compiler/nir/nir_clone.c | 2 +- src/compiler/nir/nir_instr_set.c | 2 +- src/compiler/nir/nir_lower_alu.c | 6 +- src/compiler/nir/nir_lower_alu_width.c | 10 ++-- src/compiler/nir/nir_lower_atomics.c | 2 +- src/compiler/nir/nir_lower_double_ops.c | 15 ++--- src/compiler/nir/nir_lower_flrp.c | 2 +- src/compiler/nir/nir_opt_if.c | 3 +- src/compiler/nir/nir_opt_reassociate.c | 8 +-- src/compiler/nir/nir_opt_remove_phis.c | 4 +- src/compiler/nir/nir_opt_vectorize.c | 5 +- src/compiler/nir/nir_search.c | 2 +- src/compiler/nir/nir_serialize.c | 4 +- src/compiler/shader_enums.h | 4 -- src/compiler/spirv/vtn_alu.c | 55 ++++++++++--------- src/compiler/spirv/vtn_glsl450.c | 19 ------- .../compiler/intel_nir_opt_peephole_ffma.c | 2 +- .../nak_nir_lower_kepler_shared_atomics.c | 2 +- 21 files changed, 87 insertions(+), 96 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7209d28e853..af421851543 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1522,6 +1522,24 @@ nir_op_is_selection(nir_op op) { return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0; } +/** + * Floating point fast math control. + * + * All new bits must restrict optimizations when they are set, not when they + * are missing. This means a bitwise OR always produces a no less restrictive set. + * + * See also nir_alu_instr::exact, which should (and hopefully will be) moved + * to this enum in the future. + */ +typedef enum { + nir_fp_preserve_signed_zero = BITFIELD_BIT(0), + nir_fp_preserve_inf = BITFIELD_BIT(1), + nir_fp_preserve_nan = BITFIELD_BIT(2), + + nir_fp_preserve_sz_inf_nan = BITFIELD_MASK(3), + nir_fp_fast_math = 0, + nir_fp_no_fast_math = BITFIELD_MASK(3), +} nir_fp_math_control; /***/ typedef struct nir_alu_instr { @@ -1562,7 +1580,7 @@ typedef struct nir_alu_instr { * still handled through the exact bit, and the other float controls bits * (rounding mode and denorm handling) remain in the execution mode only. */ - uint32_t fp_fast_math : 9; + uint32_t fp_math_ctrl : 3; /** Sources * @@ -1574,25 +1592,25 @@ typedef struct nir_alu_instr { static inline bool nir_alu_instr_is_signed_zero_preserve(nir_alu_instr *alu) { - return nir_is_float_control_signed_zero_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_signed_zero; } static inline bool nir_alu_instr_is_inf_preserve(nir_alu_instr *alu) { - return nir_is_float_control_inf_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_inf; } static inline bool nir_alu_instr_is_nan_preserve(nir_alu_instr *alu) { - return nir_is_float_control_nan_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_nan; } static inline bool nir_alu_instr_is_signed_zero_inf_nan_preserve(nir_alu_instr *alu) { - return nir_is_float_control_signed_zero_inf_nan_preserve(alu->fp_fast_math, alu->def.bit_size); + return alu->fp_math_ctrl & nir_fp_preserve_sz_inf_nan; } void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); diff --git a/src/compiler/nir/nir_builder.c b/src/compiler/nir/nir_builder.c index 493aac67cdf..d9719b3fb9a 100644 --- a/src/compiler/nir/nir_builder.c +++ b/src/compiler/nir/nir_builder.c @@ -72,7 +72,7 @@ nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr const nir_op_info *op_info = &nir_op_infos[instr->op]; instr->exact = build->exact; - instr->fp_fast_math = build->fp_fast_math; + instr->fp_math_ctrl = build->fp_math_ctrl; /* Guess the number of components the destination temporary should have * based on our input sizes, if it's not fixed for the op. @@ -388,7 +388,7 @@ nir_vec_scalars(nir_builder *build, nir_scalar *comp, unsigned num_components) instr->src[i].swizzle[0] = comp[i].comp; } instr->exact = build->exact; - instr->fp_fast_math = build->fp_fast_math; + instr->fp_math_ctrl = build->fp_math_ctrl; /* Note: not reusing nir_builder_alu_instr_finish_and_insert() because it * can't re-guess the num_components when num_components == 1 (nir_op_mov). diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 8d3a2e1b6a4..6f9e5262865 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -44,7 +44,7 @@ typedef struct nir_builder { bool constant_fold_alu; /* Float_controls2 bits. See nir_alu_instr for details. */ - uint32_t fp_fast_math; + uint32_t fp_math_ctrl; nir_shader *shader; nir_function_impl *impl; @@ -725,7 +725,7 @@ nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) nir_def_init(&mov->instr, &mov->def, num_components, nir_src_bit_size(src.src)); mov->exact = build->exact; - mov->fp_fast_math = build->fp_fast_math; + mov->fp_math_ctrl = build->fp_math_ctrl; mov->src[0] = src; nir_builder_instr_insert(build, &mov->instr); diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index aa9159fe73f..4cbeba34256 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -268,7 +268,7 @@ clone_alu(clone_state *state, const nir_alu_instr *alu) nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); clone_debug_info(state, &nalu->instr, &alu->instr); nalu->exact = alu->exact; - nalu->fp_fast_math = alu->fp_fast_math; + nalu->fp_math_ctrl = alu->fp_math_ctrl; nalu->no_signed_wrap = alu->no_signed_wrap; nalu->no_unsigned_wrap = alu->no_unsigned_wrap; diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index ed6cb6e7ca9..e158e4b8607 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -807,7 +807,7 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr, */ if (instr->type == nir_instr_type_alu) { nir_instr_as_alu(match)->exact |= nir_instr_as_alu(instr)->exact; - nir_instr_as_alu(match)->fp_fast_math |= nir_instr_as_alu(instr)->fp_fast_math; + nir_instr_as_alu(match)->fp_math_ctrl |= nir_instr_as_alu(instr)->fp_math_ctrl; } assert(!def == !new_def); diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c index 431d0d34edf..a7f447df33b 100644 --- a/src/compiler/nir/nir_lower_alu.c +++ b/src/compiler/nir/nir_lower_alu.c @@ -44,7 +44,7 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data) b->cursor = nir_before_instr(&instr->instr); b->exact = instr->exact; - b->fp_fast_math = instr->fp_fast_math; + b->fp_math_ctrl = instr->fp_math_ctrl; switch (instr->op) { case nir_op_bitfield_reverse: @@ -176,9 +176,9 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data) * nir_lower_alu is idempotent, and allows the backend to implement * soundly the no_signed_zero subset of fmin/fmax. */ - b->fp_fast_math &= ~FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE; + b->fp_math_ctrl &= ~nir_fp_preserve_signed_zero; nir_def *fminmax = max ? nir_fmax(b, s0, s1) : nir_fmin(b, s0, s1); - b->fp_fast_math = instr->fp_fast_math; + b->fp_math_ctrl = instr->fp_math_ctrl; /* If we have a constant source, we can usually optimize */ if (s0->num_components == 1 && s0->bit_size == 32) { diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index e23325fd5fb..9d12cd9ac7c 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -111,7 +111,7 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op, chan->src[1].swizzle[0] = chan->src[1].swizzle[channel]; } chan->exact = alu->exact; - chan->fp_fast_math = alu->fp_fast_math; + chan->fp_math_ctrl = alu->fp_math_ctrl; nir_builder_instr_insert(builder, &chan->instr); @@ -164,7 +164,7 @@ lower_bfdot_to_bfdot2_bfadd(nir_builder *b, nir_alu_instr *alu) } instr->src[2].src = nir_src_for_ssa(acc); instr->exact = b->exact; - instr->fp_fast_math = b->fp_fast_math; + instr->fp_math_ctrl = b->fp_math_ctrl; nir_builder_instr_insert(b, &instr->instr); acc = &instr->def; @@ -206,7 +206,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16) if (i != 0) instr->src[2].src = nir_src_for_ssa(prev); instr->exact = builder->exact; - instr->fp_fast_math = builder->fp_fast_math; + instr->fp_math_ctrl = builder->fp_math_ctrl; nir_builder_instr_insert(builder, &instr->instr); @@ -225,7 +225,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) unsigned i, chan; b->exact = alu->exact; - b->fp_fast_math = alu->fp_fast_math; + b->fp_math_ctrl = alu->fp_math_ctrl; unsigned num_components = alu->def.num_components; unsigned target_width = 1; @@ -449,7 +449,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) nir_alu_ssa_dest_init(lower, components, alu->def.bit_size); lower->exact = alu->exact; - lower->fp_fast_math = alu->fp_fast_math; + lower->fp_math_ctrl = alu->fp_math_ctrl; for (i = 0; i < components; i++) { vec->src[chan + i].src = nir_src_for_ssa(&lower->def); diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index bdad280fdcd..bf5f332f2df 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -83,7 +83,7 @@ build_atomic(nir_builder *b, nir_intrinsic_instr *intr) b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), before, data); nir_alu_instr *op = nir_def_as_alu(expected); op->exact = true; - op->fp_fast_math = 0; + op->fp_math_ctrl = nir_fp_no_fast_math; switch (intr->intrinsic) { case nir_intrinsic_ssbo_atomic: xchg = nir_ssbo_atomic_swap(b, intr->def.bit_size, diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 12df030de9c..1f320a7ee19 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -88,10 +88,8 @@ get_signed_inf(nir_builder *b, nir_def *zero) static nir_def * get_signed_zero(nir_builder *b, nir_def *src) { - uint32_t exec_mode = b->fp_fast_math; - nir_def *zero; - if (nir_is_float_control_signed_zero_preserve(exec_mode, 64)) { + if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) { nir_def *hi = nir_unpack_64_2x32_split_y(b, src); nir_def *sign = nir_iand_imm(b, hi, 0x80000000); zero = nir_pack_64_2x32_split(b, nir_imm_int(b, 0), sign); @@ -105,9 +103,7 @@ get_signed_zero(nir_builder *b, nir_def *src) static nir_def * preserve_nan(nir_builder *b, nir_def *src, nir_def *res) { - uint32_t exec_mode = b->fp_fast_math; - - if (nir_is_float_control_nan_preserve(exec_mode, 64)) { + if (b->fp_math_ctrl & nir_fp_preserve_nan) { nir_def *is_nan = nir_fneu(b, src, src); return nir_bcsel(b, is_nan, src, res); } @@ -317,7 +313,6 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt) res = nir_ffma(b, y_1, r_1, y_1); } - uint32_t exec_mode = b->fp_fast_math; if (sqrt) { /* Here, the special cases we need to handle are * 0 -> 0 (sign preserving) @@ -343,7 +338,7 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt) res = fix_inv_result(b, res, src, new_exp); } - if (nir_is_float_control_nan_preserve(exec_mode, 64)) + if (b->fp_math_ctrl & nir_fp_preserve_nan) res = nir_bcsel(b, nir_feq_imm(b, src, -INFINITY), nir_imm_double(b, NAN), res); @@ -504,7 +499,7 @@ lower_minmax(nir_builder *b, nir_op cmp, nir_def *src0, nir_def *src1) /* IEEE-754-2019 requires that fmin/fmax compare -0 < 0, but -0 and 0 are * indistinguishable for flt/fge. So, we fix up signed zeroes. */ - if (nir_is_float_control_signed_zero_preserve(b->fp_fast_math, 64)) { + if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) { nir_def *src0_is_negzero = nir_ieq_imm(b, src0, 1ull << 63); nir_def *src1_is_poszero = nir_ieq_imm(b, src1, 0x0); nir_def *neg_pos_zero = nir_iand(b, src0_is_negzero, src1_is_poszero); @@ -772,7 +767,7 @@ lower_doubles_instr(nir_builder *b, nir_instr *instr, void *_data) nir_alu_instr *alu = nir_instr_as_alu(instr); /* Easier to set it here than pass it around all over ther place. */ - b->fp_fast_math = alu->fp_fast_math; + b->fp_math_ctrl = alu->fp_math_ctrl; nir_def *soft_def = lower_doubles_instr_to_soft(b, alu, data->softfp64, options); diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c index c2f7892a371..e723d271141 100644 --- a/src/compiler/nir/nir_lower_flrp.c +++ b/src/compiler/nir/nir_lower_flrp.c @@ -345,7 +345,7 @@ convert_flrp_instruction(nir_builder *bld, bld->cursor = nir_before_instr(&alu->instr); bld->exact = alu->exact; - bld->fp_fast_math = alu->fp_fast_math; + bld->fp_math_ctrl = alu->fp_math_ctrl; /* There are two methods to implement flrp(x, y, t). The strictly correct * implementation according to the GLSL spec is: diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index cd2cce1f8b7..0be68225964 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -866,7 +866,7 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, { nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op); nalu->exact = alu->exact; - nalu->fp_fast_math = alu->fp_fast_math; + nalu->fp_math_ctrl = alu->fp_math_ctrl; nir_def_init(&nalu->instr, &nalu->def, alu->def.num_components, @@ -881,7 +881,6 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, nir_builder_instr_insert(b, &nalu->instr); return &nalu->def; - ; } /* diff --git a/src/compiler/nir/nir_opt_reassociate.c b/src/compiler/nir/nir_opt_reassociate.c index 6367147f53d..2f878234b50 100644 --- a/src/compiler/nir/nir_opt_reassociate.c +++ b/src/compiler/nir/nir_opt_reassociate.c @@ -174,7 +174,7 @@ struct chain { unsigned length; nir_scalar srcs[MAX_CHAIN_LENGTH]; bool do_global_cse, exact; - unsigned fp_fast_math; + unsigned fp_math_ctrl; }; UNUSED static void @@ -222,7 +222,7 @@ build_chain(struct chain *c, nir_scalar def, unsigned reserved_count) * It is safe to add `exact` or float control bits, but not the reverse. */ c->exact |= alu->exact; - c->fp_fast_math |= alu->fp_fast_math; + c->fp_math_ctrl |= alu->fp_math_ctrl; for (unsigned i = 0; i < 2; ++i) { nir_scalar src = nir_scalar_chase_alu_src(def, i); @@ -451,7 +451,7 @@ reassociate_chain(struct chain *c, void *pair_freq) { nir_builder b = nir_builder_at(nir_before_instr(&c->root->instr)); b.exact = c->exact; - b.fp_fast_math = c->fp_fast_math; + b.fp_math_ctrl = c->fp_math_ctrl; /* Pick a new order using sort-by-rank and possibly the CSE heuristics */ unsigned pinned = 0; @@ -503,7 +503,7 @@ reassociate_chain(struct chain *c, void *pair_freq) /* Set flags conservatively, matching the rest of the chain */ c->root->no_signed_wrap = c->root->no_unsigned_wrap = false; c->root->exact = c->exact; - c->root->fp_fast_math = c->fp_fast_math; + c->root->fp_math_ctrl = c->fp_math_ctrl; return true; } diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c index a6624bc9300..8e7136fa1ee 100644 --- a/src/compiler/nir/nir_opt_remove_phis.c +++ b/src/compiler/nir/nir_opt_remove_phis.c @@ -47,11 +47,11 @@ phi_srcs_equal(nir_def *a, nir_def *b) if (!nir_instrs_equal(a_instr, b_instr)) return false; - /* nir_instrs_equal ignores exact/fast_math */ + /* nir_instrs_equal ignores exact/fp_math_ctrl */ if (a_instr->type == nir_instr_type_alu) { nir_alu_instr *a_alu = nir_def_as_alu(a); nir_alu_instr *b_alu = nir_def_as_alu(b); - if (a_alu->exact != b_alu->exact || a_alu->fp_fast_math != b_alu->fp_fast_math) + if (a_alu->exact != b_alu->exact || a_alu->fp_math_ctrl != b_alu->fp_math_ctrl) return false; } diff --git a/src/compiler/nir/nir_opt_vectorize.c b/src/compiler/nir/nir_opt_vectorize.c index 987dee341a1..2fbe82b356b 100644 --- a/src/compiler/nir/nir_opt_vectorize.c +++ b/src/compiler/nir/nir_opt_vectorize.c @@ -458,10 +458,9 @@ instr_try_combine_alu(struct set *instr_set, nir_alu_instr *alu1, nir_alu_instr */ new_alu->exact = alu1->exact || alu2->exact; - /* fp_fast_math is a set of FLOAT_CONTROLS_*_PRESERVE_*. Preserve anything - * preserved by either instruction. + /* fp_math_ctrl is a set of restrictions, take the union of both. */ - new_alu->fp_fast_math = alu1->fp_fast_math | alu2->fp_fast_math; + new_alu->fp_math_ctrl = alu1->fp_math_ctrl | alu2->fp_math_ctrl; /* If all channels don't wrap, we can say that the whole vector doesn't * wrap. diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 0a7933e006f..10f0d26c85c 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -480,7 +480,7 @@ construct_value(nir_builder *build, * replacement should be exact. */ alu->exact = state->has_exact_alu || expr->exact; - alu->fp_fast_math = nir_instr_as_alu(instr)->fp_fast_math; + alu->fp_math_ctrl = nir_instr_as_alu(instr)->fp_math_ctrl; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { /* If the source is an explicitly sized source, then we need to reset diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 44c57b921b2..8fc0bbfb6e9 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -733,7 +733,7 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu) } write_def(ctx, &alu->def, header, alu->instr.type); - blob_write_uint32(ctx->blob, alu->fp_fast_math); + blob_write_uint32(ctx->blob, alu->fp_math_ctrl); if (header.alu.packed_src_ssa_16bit) { for (unsigned i = 0; i < num_srcs; i++) { @@ -788,7 +788,7 @@ read_alu(read_ctx *ctx, union packed_instr header) alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; read_def(ctx, &alu->def, &alu->instr, header); - alu->fp_fast_math = blob_read_uint32(ctx->blob); + alu->fp_math_ctrl = blob_read_uint32(ctx->blob); if (header.alu.packed_src_ssa_16bit) { for (unsigned i = 0; i < num_srcs; i++) { diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index e13c18a0089..d9358a0f72c 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -1550,10 +1550,6 @@ enum gl_derivative_group { enum float_controls { - /* The order of these matters. For float_controls2, only the first 9 bits - * are used and stored per-instruction in nir_alu_instr::fp_fast_math. - * Any changes in this enum need to be synchronized with that. - */ FLOAT_CONTROLS_DEFAULT_FLOAT_CONTROL_MODE = 0, FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 = BITFIELD_BIT(0), FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 = BITFIELD_BIT(1), diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 28d87a020e7..9c12afde8cb 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -417,22 +417,13 @@ handle_fp_fast_math(struct vtn_builder *b, UNUSED struct vtn_value *val, b->nb.exact = true; /* Decoration overrides defaults */ - b->nb.fp_fast_math = 0; + b->nb.fp_math_ctrl = 0; if (!(dec->operands[0] & SpvFPFastMathModeNSZMask)) - b->nb.fp_fast_math |= - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 | - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 | - FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; + b->nb.fp_math_ctrl |= nir_fp_preserve_signed_zero; if (!(dec->operands[0] & SpvFPFastMathModeNotNaNMask)) - b->nb.fp_fast_math |= - FLOAT_CONTROLS_NAN_PRESERVE_FP16 | - FLOAT_CONTROLS_NAN_PRESERVE_FP32 | - FLOAT_CONTROLS_NAN_PRESERVE_FP64; + b->nb.fp_math_ctrl |= nir_fp_preserve_nan; if (!(dec->operands[0] & SpvFPFastMathModeNotInfMask)) - b->nb.fp_fast_math |= - FLOAT_CONTROLS_INF_PRESERVE_FP16 | - FLOAT_CONTROLS_INF_PRESERVE_FP32 | - FLOAT_CONTROLS_INF_PRESERVE_FP64; + b->nb.fp_math_ctrl |= nir_fp_preserve_inf; } void @@ -441,18 +432,30 @@ vtn_handle_fp_fast_math(struct vtn_builder *b, struct vtn_value *val) /* Take the NaN/Inf/SZ preserve bits from the execution mode and set them * on the builder, so the generated instructions can take it from it. * We only care about some of them, check nir_alu_instr for details. - * We also copy all bit widths, because we can't easily get the correct one - * here. */ -#define FLOAT_CONTROLS2_BITS (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 | \ - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 | \ - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64) - static_assert(FLOAT_CONTROLS2_BITS == BITSET_MASK(9), - "enum float_controls and fp_fast_math out of sync!"); - b->nb.fp_fast_math = b->shader->info.float_controls_execution_mode & - FLOAT_CONTROLS2_BITS; + + b->nb.fp_math_ctrl = 0; + unsigned exec_mode = b->shader->info.float_controls_execution_mode; + if (val->type) { + unsigned bit_size; + + /* Some ALU like modf and frexp return a struct of two values. */ + if (glsl_type_is_struct(val->type->type)) + bit_size = glsl_get_bit_size(val->type->type->fields.structure[0].type); + else + bit_size = glsl_get_bit_size(val->type->type); + + if (bit_size >= 16 && bit_size <= 64) { + if (nir_is_float_control_signed_zero_preserve(exec_mode, bit_size)) + b->nb.fp_math_ctrl |= nir_fp_preserve_signed_zero; + if (nir_is_float_control_inf_preserve(exec_mode, bit_size)) + b->nb.fp_math_ctrl |= nir_fp_preserve_inf; + if (nir_is_float_control_nan_preserve(exec_mode, bit_size)) + b->nb.fp_math_ctrl |= nir_fp_preserve_nan; + } + } + vtn_foreach_decoration(b, val, handle_fp_fast_math, NULL); -#undef FLOAT_CONTROLS2_BITS } nir_rounding_mode @@ -870,15 +873,15 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpIsInf: { const bool save_exact = b->nb.exact; - const unsigned save_fast_math = b->nb.fp_fast_math; + const unsigned save_math_ctrl = b->nb.fp_math_ctrl; b->nb.exact = true; - b->nb.fp_fast_math = 0; + b->nb.fp_math_ctrl = nir_fp_no_fast_math; nir_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size); dest->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), inf); b->nb.exact = save_exact; - b->nb.fp_fast_math = save_fast_math; + b->nb.fp_math_ctrl = save_math_ctrl; break; } diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index d5218f71b0a..282d738da7b 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -38,21 +38,6 @@ #define M_PI_4f ((float) M_PI_4) #endif -/** - * Some fp16 instructions (i.e., asin and acos) are lowered as fp32. In these cases the - * generated fp32 instructions need the same fp_fast_math settings as fp16. - */ -static void -propagate_fp16_fast_math_to_fp32(struct nir_builder *b) -{ - static_assert(FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 == - (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1), - "FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 is not " - "FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1."); - - b->fp_fast_math |= (b->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) << 1; -} - static nir_def *build_det(nir_builder *b, nir_def **col, unsigned cols); /* Computes the determinate of the submatrix given by taking src and @@ -178,13 +163,9 @@ build_asin(nir_builder *b, nir_def *x, float p0, float p1, bool piecewise) * approximation in 32-bit math and then we convert the result back to * 16-bit. */ - const uint32_t save = b->fp_fast_math; - propagate_fp16_fast_math_to_fp32(b); - nir_def *result = nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1, piecewise)); - b->fp_fast_math = save; return result; } nir_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size); diff --git a/src/intel/compiler/intel_nir_opt_peephole_ffma.c b/src/intel/compiler/intel_nir_opt_peephole_ffma.c index 5781c694eb1..e6cb5df4f6f 100644 --- a/src/intel/compiler/intel_nir_opt_peephole_ffma.c +++ b/src/intel/compiler/intel_nir_opt_peephole_ffma.c @@ -219,7 +219,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b, mul_src[0] = nir_fneg(b, mul_src[0]); nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma); - ffma->fp_fast_math = mul->fp_fast_math | add->fp_fast_math; + ffma->fp_math_ctrl = mul->fp_math_ctrl | add->fp_math_ctrl; for (unsigned i = 0; i < 2; i++) { ffma->src[i].src = nir_src_for_ssa(mul_src[i]); diff --git a/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c b/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c index 9383c695848..d3bbbe183d8 100644 --- a/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c +++ b/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c @@ -51,7 +51,7 @@ lower_atomic_in_lock(nir_builder *b, nir_intrinsic_instr *intr, nir_def *loaded) b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), loaded, data); nir_alu_instr *alu = nir_def_as_alu(to_store); alu->exact = true; - alu->fp_fast_math = 0; + alu->fp_math_ctrl = nir_fp_no_fast_math; break; } case nir_atomic_op_xchg: {