From f3290219ab32fda01b1663a8407eff26548e2fbd Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Thu, 18 Dec 2025 17:36:51 +0100
Subject: [PATCH] nir: use a seperate enum for per alu floating point math
 control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't need one bit per bitsize per instruction if only one actually
matters in the end.

First step towards moving NIR in the direction of full float_controls2
only.

Also rename this from fp_fast_math, because that name implied that 0 is
the no fast math mode, while the opposite was the case.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39026>
---
 src/compiler/nir/nir.h                        | 28 ++++++++--
 src/compiler/nir/nir_builder.c                |  4 +-
 src/compiler/nir/nir_builder.h                |  4 +-
 src/compiler/nir/nir_clone.c                  |  2 +-
 src/compiler/nir/nir_instr_set.c              |  2 +-
 src/compiler/nir/nir_lower_alu.c              |  6 +-
 src/compiler/nir/nir_lower_alu_width.c        | 10 ++--
 src/compiler/nir/nir_lower_atomics.c          |  2 +-
 src/compiler/nir/nir_lower_double_ops.c       | 15 ++---
 src/compiler/nir/nir_lower_flrp.c             |  2 +-
 src/compiler/nir/nir_opt_if.c                 |  3 +-
 src/compiler/nir/nir_opt_reassociate.c        |  8 +--
 src/compiler/nir/nir_opt_remove_phis.c        |  4 +-
 src/compiler/nir/nir_opt_vectorize.c          |  5 +-
 src/compiler/nir/nir_search.c                 |  2 +-
 src/compiler/nir/nir_serialize.c              |  4 +-
 src/compiler/shader_enums.h                   |  4 --
 src/compiler/spirv/vtn_alu.c                  | 55 ++++++++++---------
 src/compiler/spirv/vtn_glsl450.c              | 19 -------
 .../compiler/intel_nir_opt_peephole_ffma.c    |  2 +-
 .../nak_nir_lower_kepler_shared_atomics.c     |  2 +-
 21 files changed, 87 insertions(+), 96 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 7209d28e853..af421851543 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1522,6 +1522,24 @@ nir_op_is_selection(nir_op op)
 {
    return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0;
 }
+/**
+ * Floating point fast math control.
+ *
+ * All new bits must restrict optimizations when they are set, not when they
+ * are missing. This means a bitwise OR always produces a no less restrictive set.
+ *
+ * See also nir_alu_instr::exact, which should (and hopefully will be) moved
+ * to this enum in the future.
+ */
+typedef enum {
+   nir_fp_preserve_signed_zero = BITFIELD_BIT(0),
+   nir_fp_preserve_inf = BITFIELD_BIT(1),
+   nir_fp_preserve_nan = BITFIELD_BIT(2),
+
+   nir_fp_preserve_sz_inf_nan = BITFIELD_MASK(3),
+   nir_fp_fast_math = 0,
+   nir_fp_no_fast_math = BITFIELD_MASK(3),
+} nir_fp_math_control;
 
 /***/
 typedef struct nir_alu_instr {
@@ -1562,7 +1580,7 @@ typedef struct nir_alu_instr {
     * still handled through the exact bit, and the other float controls bits
     * (rounding mode and denorm handling) remain in the execution mode only.
     */
-   uint32_t fp_fast_math : 9;
+   uint32_t fp_math_ctrl : 3;
 
    /** Sources
     *
@@ -1574,25 +1592,25 @@ typedef struct nir_alu_instr {
 static inline bool
 nir_alu_instr_is_signed_zero_preserve(nir_alu_instr *alu)
 {
-   return nir_is_float_control_signed_zero_preserve(alu->fp_fast_math, alu->def.bit_size);
+   return alu->fp_math_ctrl & nir_fp_preserve_signed_zero;
 }
 
 static inline bool
 nir_alu_instr_is_inf_preserve(nir_alu_instr *alu)
 {
-   return nir_is_float_control_inf_preserve(alu->fp_fast_math, alu->def.bit_size);
+   return alu->fp_math_ctrl & nir_fp_preserve_inf;
 }
 
 static inline bool
 nir_alu_instr_is_nan_preserve(nir_alu_instr *alu)
 {
-   return nir_is_float_control_nan_preserve(alu->fp_fast_math, alu->def.bit_size);
+   return alu->fp_math_ctrl & nir_fp_preserve_nan;
 }
 
 static inline bool
 nir_alu_instr_is_signed_zero_inf_nan_preserve(nir_alu_instr *alu)
 {
-   return nir_is_float_control_signed_zero_inf_nan_preserve(alu->fp_fast_math, alu->def.bit_size);
+   return alu->fp_math_ctrl & nir_fp_preserve_sz_inf_nan;
 }
 
 void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src);
diff --git a/src/compiler/nir/nir_builder.c b/src/compiler/nir/nir_builder.c
index 493aac67cdf..d9719b3fb9a 100644
--- a/src/compiler/nir/nir_builder.c
+++ b/src/compiler/nir/nir_builder.c
@@ -72,7 +72,7 @@ nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr
    const nir_op_info *op_info = &nir_op_infos[instr->op];
 
    instr->exact = build->exact;
-   instr->fp_fast_math = build->fp_fast_math;
+   instr->fp_math_ctrl = build->fp_math_ctrl;
 
    /* Guess the number of components the destination temporary should have
     * based on our input sizes, if it's not fixed for the op.
@@ -388,7 +388,7 @@ nir_vec_scalars(nir_builder *build, nir_scalar *comp, unsigned num_components)
       instr->src[i].swizzle[0] = comp[i].comp;
    }
    instr->exact = build->exact;
-   instr->fp_fast_math = build->fp_fast_math;
+   instr->fp_math_ctrl = build->fp_math_ctrl;
 
    /* Note: not reusing nir_builder_alu_instr_finish_and_insert() because it
     * can't re-guess the num_components when num_components == 1 (nir_op_mov).
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 8d3a2e1b6a4..6f9e5262865 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -44,7 +44,7 @@ typedef struct nir_builder {
    bool constant_fold_alu;
 
    /* Float_controls2 bits. See nir_alu_instr for details. */
-   uint32_t fp_fast_math;
+   uint32_t fp_math_ctrl;
 
    nir_shader *shader;
    nir_function_impl *impl;
@@ -725,7 +725,7 @@ nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
    nir_def_init(&mov->instr, &mov->def, num_components,
                 nir_src_bit_size(src.src));
    mov->exact = build->exact;
-   mov->fp_fast_math = build->fp_fast_math;
+   mov->fp_math_ctrl = build->fp_math_ctrl;
    mov->src[0] = src;
    nir_builder_instr_insert(build, &mov->instr);
 
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index aa9159fe73f..4cbeba34256 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -268,7 +268,7 @@ clone_alu(clone_state *state, const nir_alu_instr *alu)
    nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
    clone_debug_info(state, &nalu->instr, &alu->instr);
    nalu->exact = alu->exact;
-   nalu->fp_fast_math = alu->fp_fast_math;
+   nalu->fp_math_ctrl = alu->fp_math_ctrl;
    nalu->no_signed_wrap = alu->no_signed_wrap;
    nalu->no_unsigned_wrap = alu->no_unsigned_wrap;
 
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index ed6cb6e7ca9..e158e4b8607 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -807,7 +807,7 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr,
        */
       if (instr->type == nir_instr_type_alu) {
          nir_instr_as_alu(match)->exact |= nir_instr_as_alu(instr)->exact;
-         nir_instr_as_alu(match)->fp_fast_math |= nir_instr_as_alu(instr)->fp_fast_math;
+         nir_instr_as_alu(match)->fp_math_ctrl |= nir_instr_as_alu(instr)->fp_math_ctrl;
       }
 
       assert(!def == !new_def);
diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c
index 431d0d34edf..a7f447df33b 100644
--- a/src/compiler/nir/nir_lower_alu.c
+++ b/src/compiler/nir/nir_lower_alu.c
@@ -44,7 +44,7 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
 
    b->cursor = nir_before_instr(&instr->instr);
    b->exact = instr->exact;
-   b->fp_fast_math = instr->fp_fast_math;
+   b->fp_math_ctrl = instr->fp_math_ctrl;
 
    switch (instr->op) {
    case nir_op_bitfield_reverse:
@@ -176,9 +176,9 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
        * nir_lower_alu is idempotent, and allows the backend to implement
        * soundly the no_signed_zero subset of fmin/fmax.
        */
-      b->fp_fast_math &= ~FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE;
+      b->fp_math_ctrl &= ~nir_fp_preserve_signed_zero;
       nir_def *fminmax = max ? nir_fmax(b, s0, s1) : nir_fmin(b, s0, s1);
-      b->fp_fast_math = instr->fp_fast_math;
+      b->fp_math_ctrl = instr->fp_math_ctrl;
 
       /* If we have a constant source, we can usually optimize */
       if (s0->num_components == 1 && s0->bit_size == 32) {
diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c
index e23325fd5fb..9d12cd9ac7c 100644
--- a/src/compiler/nir/nir_lower_alu_width.c
+++ b/src/compiler/nir/nir_lower_alu_width.c
@@ -111,7 +111,7 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op,
          chan->src[1].swizzle[0] = chan->src[1].swizzle[channel];
       }
       chan->exact = alu->exact;
-      chan->fp_fast_math = alu->fp_fast_math;
+      chan->fp_math_ctrl = alu->fp_math_ctrl;
 
       nir_builder_instr_insert(builder, &chan->instr);
 
@@ -164,7 +164,7 @@ lower_bfdot_to_bfdot2_bfadd(nir_builder *b, nir_alu_instr *alu)
       }
       instr->src[2].src = nir_src_for_ssa(acc);
       instr->exact = b->exact;
-      instr->fp_fast_math = b->fp_fast_math;
+      instr->fp_math_ctrl = b->fp_math_ctrl;
 
       nir_builder_instr_insert(b, &instr->instr);
       acc = &instr->def;
@@ -206,7 +206,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16)
       if (i != 0)
          instr->src[2].src = nir_src_for_ssa(prev);
       instr->exact = builder->exact;
-      instr->fp_fast_math = builder->fp_fast_math;
+      instr->fp_math_ctrl = builder->fp_math_ctrl;
 
       nir_builder_instr_insert(builder, &instr->instr);
 
@@ -225,7 +225,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
    unsigned i, chan;
 
    b->exact = alu->exact;
-   b->fp_fast_math = alu->fp_fast_math;
+   b->fp_math_ctrl = alu->fp_math_ctrl;
 
    unsigned num_components = alu->def.num_components;
    unsigned target_width = 1;
@@ -449,7 +449,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
 
       nir_alu_ssa_dest_init(lower, components, alu->def.bit_size);
       lower->exact = alu->exact;
-      lower->fp_fast_math = alu->fp_fast_math;
+      lower->fp_math_ctrl = alu->fp_math_ctrl;
 
       for (i = 0; i < components; i++) {
          vec->src[chan + i].src = nir_src_for_ssa(&lower->def);
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index bdad280fdcd..bf5f332f2df 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -83,7 +83,7 @@ build_atomic(nir_builder *b, nir_intrinsic_instr *intr)
          b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), before, data);
       nir_alu_instr *op = nir_def_as_alu(expected);
       op->exact = true;
-      op->fp_fast_math = 0;
+      op->fp_math_ctrl = nir_fp_no_fast_math;
       switch (intr->intrinsic) {
       case nir_intrinsic_ssbo_atomic:
          xchg = nir_ssbo_atomic_swap(b, intr->def.bit_size,
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
index 12df030de9c..1f320a7ee19 100644
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -88,10 +88,8 @@ get_signed_inf(nir_builder *b, nir_def *zero)
 static nir_def *
 get_signed_zero(nir_builder *b, nir_def *src)
 {
-   uint32_t exec_mode = b->fp_fast_math;
-
    nir_def *zero;
-   if (nir_is_float_control_signed_zero_preserve(exec_mode, 64)) {
+   if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) {
       nir_def *hi = nir_unpack_64_2x32_split_y(b, src);
       nir_def *sign = nir_iand_imm(b, hi, 0x80000000);
       zero = nir_pack_64_2x32_split(b, nir_imm_int(b, 0), sign);
@@ -105,9 +103,7 @@ get_signed_zero(nir_builder *b, nir_def *src)
 static nir_def *
 preserve_nan(nir_builder *b, nir_def *src, nir_def *res)
 {
-   uint32_t exec_mode = b->fp_fast_math;
-
-   if (nir_is_float_control_nan_preserve(exec_mode, 64)) {
+   if (b->fp_math_ctrl & nir_fp_preserve_nan) {
       nir_def *is_nan = nir_fneu(b, src, src);
       return nir_bcsel(b, is_nan, src, res);
    }
@@ -317,7 +313,6 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
       res = nir_ffma(b, y_1, r_1, y_1);
    }
 
-   uint32_t exec_mode = b->fp_fast_math;
    if (sqrt) {
       /* Here, the special cases we need to handle are
        * 0 -> 0 (sign preserving)
@@ -343,7 +338,7 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
       res = fix_inv_result(b, res, src, new_exp);
    }
 
-   if (nir_is_float_control_nan_preserve(exec_mode, 64))
+   if (b->fp_math_ctrl & nir_fp_preserve_nan)
       res = nir_bcsel(b, nir_feq_imm(b, src, -INFINITY),
                       nir_imm_double(b, NAN), res);
 
@@ -504,7 +499,7 @@ lower_minmax(nir_builder *b, nir_op cmp, nir_def *src0, nir_def *src1)
    /* IEEE-754-2019 requires that fmin/fmax compare -0 < 0, but -0 and 0 are
     * indistinguishable for flt/fge. So, we fix up signed zeroes.
     */
-   if (nir_is_float_control_signed_zero_preserve(b->fp_fast_math, 64)) {
+   if (b->fp_math_ctrl & nir_fp_preserve_signed_zero) {
       nir_def *src0_is_negzero = nir_ieq_imm(b, src0, 1ull << 63);
       nir_def *src1_is_poszero = nir_ieq_imm(b, src1, 0x0);
       nir_def *neg_pos_zero = nir_iand(b, src0_is_negzero, src1_is_poszero);
@@ -772,7 +767,7 @@ lower_doubles_instr(nir_builder *b, nir_instr *instr, void *_data)
    nir_alu_instr *alu = nir_instr_as_alu(instr);
 
    /* Easier to set it here than pass it around all over ther place. */
-   b->fp_fast_math = alu->fp_fast_math;
+   b->fp_math_ctrl = alu->fp_math_ctrl;
 
    nir_def *soft_def =
       lower_doubles_instr_to_soft(b, alu, data->softfp64, options);
diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c
index c2f7892a371..e723d271141 100644
--- a/src/compiler/nir/nir_lower_flrp.c
+++ b/src/compiler/nir/nir_lower_flrp.c
@@ -345,7 +345,7 @@ convert_flrp_instruction(nir_builder *bld,
 
    bld->cursor = nir_before_instr(&alu->instr);
    bld->exact = alu->exact;
-   bld->fp_fast_math = alu->fp_fast_math;
+   bld->fp_math_ctrl = alu->fp_math_ctrl;
 
    /* There are two methods to implement flrp(x, y, t).  The strictly correct
     * implementation according to the GLSL spec is:
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index cd2cce1f8b7..0be68225964 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -866,7 +866,7 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
 {
    nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
    nalu->exact = alu->exact;
-   nalu->fp_fast_math = alu->fp_fast_math;
+   nalu->fp_math_ctrl = alu->fp_math_ctrl;
 
    nir_def_init(&nalu->instr, &nalu->def,
                 alu->def.num_components,
@@ -881,7 +881,6 @@ clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
    nir_builder_instr_insert(b, &nalu->instr);
 
    return &nalu->def;
-   ;
 }
 
 /*
diff --git a/src/compiler/nir/nir_opt_reassociate.c b/src/compiler/nir/nir_opt_reassociate.c
index 6367147f53d..2f878234b50 100644
--- a/src/compiler/nir/nir_opt_reassociate.c
+++ b/src/compiler/nir/nir_opt_reassociate.c
@@ -174,7 +174,7 @@ struct chain {
    unsigned length;
    nir_scalar srcs[MAX_CHAIN_LENGTH];
    bool do_global_cse, exact;
-   unsigned fp_fast_math;
+   unsigned fp_math_ctrl;
 };
 
 UNUSED static void
@@ -222,7 +222,7 @@ build_chain(struct chain *c, nir_scalar def, unsigned reserved_count)
     * It is safe to add `exact` or float control bits, but not the reverse.
     */
    c->exact |= alu->exact;
-   c->fp_fast_math |= alu->fp_fast_math;
+   c->fp_math_ctrl |= alu->fp_math_ctrl;
 
    for (unsigned i = 0; i < 2; ++i) {
       nir_scalar src = nir_scalar_chase_alu_src(def, i);
@@ -451,7 +451,7 @@ reassociate_chain(struct chain *c, void *pair_freq)
 {
    nir_builder b = nir_builder_at(nir_before_instr(&c->root->instr));
    b.exact = c->exact;
-   b.fp_fast_math = c->fp_fast_math;
+   b.fp_math_ctrl = c->fp_math_ctrl;
 
    /* Pick a new order using sort-by-rank and possibly the CSE heuristics */
    unsigned pinned = 0;
@@ -503,7 +503,7 @@ reassociate_chain(struct chain *c, void *pair_freq)
    /* Set flags conservatively, matching the rest of the chain */
    c->root->no_signed_wrap = c->root->no_unsigned_wrap = false;
    c->root->exact = c->exact;
-   c->root->fp_fast_math = c->fp_fast_math;
+   c->root->fp_math_ctrl = c->fp_math_ctrl;
    return true;
 }
 
diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c
index a6624bc9300..8e7136fa1ee 100644
--- a/src/compiler/nir/nir_opt_remove_phis.c
+++ b/src/compiler/nir/nir_opt_remove_phis.c
@@ -47,11 +47,11 @@ phi_srcs_equal(nir_def *a, nir_def *b)
    if (!nir_instrs_equal(a_instr, b_instr))
       return false;
 
-   /* nir_instrs_equal ignores exact/fast_math */
+   /* nir_instrs_equal ignores exact/fp_math_ctrl */
    if (a_instr->type == nir_instr_type_alu) {
       nir_alu_instr *a_alu = nir_def_as_alu(a);
       nir_alu_instr *b_alu = nir_def_as_alu(b);
-      if (a_alu->exact != b_alu->exact || a_alu->fp_fast_math != b_alu->fp_fast_math)
+      if (a_alu->exact != b_alu->exact || a_alu->fp_math_ctrl != b_alu->fp_math_ctrl)
          return false;
    }
 
diff --git a/src/compiler/nir/nir_opt_vectorize.c b/src/compiler/nir/nir_opt_vectorize.c
index 987dee341a1..2fbe82b356b 100644
--- a/src/compiler/nir/nir_opt_vectorize.c
+++ b/src/compiler/nir/nir_opt_vectorize.c
@@ -458,10 +458,9 @@ instr_try_combine_alu(struct set *instr_set, nir_alu_instr *alu1, nir_alu_instr
     */
    new_alu->exact = alu1->exact || alu2->exact;
 
-   /* fp_fast_math is a set of FLOAT_CONTROLS_*_PRESERVE_*.  Preserve anything
-    * preserved by either instruction.
+   /* fp_math_ctrl is a set of restrictions, take the union of both.
     */
-   new_alu->fp_fast_math = alu1->fp_fast_math | alu2->fp_fast_math;
+   new_alu->fp_math_ctrl = alu1->fp_math_ctrl | alu2->fp_math_ctrl;
 
    /* If all channels don't wrap, we can say that the whole vector doesn't
     * wrap.
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 0a7933e006f..10f0d26c85c 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -480,7 +480,7 @@ construct_value(nir_builder *build,
        * replacement should be exact.
        */
       alu->exact = state->has_exact_alu || expr->exact;
-      alu->fp_fast_math = nir_instr_as_alu(instr)->fp_fast_math;
+      alu->fp_math_ctrl = nir_instr_as_alu(instr)->fp_math_ctrl;
 
       for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
          /* If the source is an explicitly sized source, then we need to reset
diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index 44c57b921b2..8fc0bbfb6e9 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -733,7 +733,7 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu)
    }
 
    write_def(ctx, &alu->def, header, alu->instr.type);
-   blob_write_uint32(ctx->blob, alu->fp_fast_math);
+   blob_write_uint32(ctx->blob, alu->fp_math_ctrl);
 
    if (header.alu.packed_src_ssa_16bit) {
       for (unsigned i = 0; i < num_srcs; i++) {
@@ -788,7 +788,7 @@ read_alu(read_ctx *ctx, union packed_instr header)
    alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
 
    read_def(ctx, &alu->def, &alu->instr, header);
-   alu->fp_fast_math = blob_read_uint32(ctx->blob);
+   alu->fp_math_ctrl = blob_read_uint32(ctx->blob);
 
    if (header.alu.packed_src_ssa_16bit) {
       for (unsigned i = 0; i < num_srcs; i++) {
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index e13c18a0089..d9358a0f72c 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -1550,10 +1550,6 @@ enum gl_derivative_group {
 
 enum float_controls
 {
-   /* The order of these matters. For float_controls2, only the first 9 bits
-    * are used and stored per-instruction in nir_alu_instr::fp_fast_math.
-    * Any changes in this enum need to be synchronized with that.
-    */
    FLOAT_CONTROLS_DEFAULT_FLOAT_CONTROL_MODE = 0,
    FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16  = BITFIELD_BIT(0),
    FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32  = BITFIELD_BIT(1),
diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index 28d87a020e7..9c12afde8cb 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -417,22 +417,13 @@ handle_fp_fast_math(struct vtn_builder *b, UNUSED struct vtn_value *val,
       b->nb.exact = true;
 
    /* Decoration overrides defaults */
-   b->nb.fp_fast_math = 0;
+   b->nb.fp_math_ctrl = 0;
    if (!(dec->operands[0] & SpvFPFastMathModeNSZMask))
-      b->nb.fp_fast_math |=
-         FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16 |
-         FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32 |
-         FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64;
+      b->nb.fp_math_ctrl |= nir_fp_preserve_signed_zero;
    if (!(dec->operands[0] & SpvFPFastMathModeNotNaNMask))
-      b->nb.fp_fast_math |=
-         FLOAT_CONTROLS_NAN_PRESERVE_FP16 |
-         FLOAT_CONTROLS_NAN_PRESERVE_FP32 |
-         FLOAT_CONTROLS_NAN_PRESERVE_FP64;
+      b->nb.fp_math_ctrl |= nir_fp_preserve_nan;
    if (!(dec->operands[0] & SpvFPFastMathModeNotInfMask))
-      b->nb.fp_fast_math |=
-         FLOAT_CONTROLS_INF_PRESERVE_FP16 |
-         FLOAT_CONTROLS_INF_PRESERVE_FP32 |
-         FLOAT_CONTROLS_INF_PRESERVE_FP64;
+      b->nb.fp_math_ctrl |= nir_fp_preserve_inf;
 }
 
 void
@@ -441,18 +432,30 @@ vtn_handle_fp_fast_math(struct vtn_builder *b, struct vtn_value *val)
    /* Take the NaN/Inf/SZ preserve bits from the execution mode and set them
     * on the builder, so the generated instructions can take it from it.
     * We only care about some of them, check nir_alu_instr for details.
-    * We also copy all bit widths, because we can't easily get the correct one
-    * here.
     */
-#define FLOAT_CONTROLS2_BITS (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 | \
-                              FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 | \
-                              FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64)
-   static_assert(FLOAT_CONTROLS2_BITS == BITSET_MASK(9),
-      "enum float_controls and fp_fast_math out of sync!");
-   b->nb.fp_fast_math = b->shader->info.float_controls_execution_mode &
-      FLOAT_CONTROLS2_BITS;
+
+   b->nb.fp_math_ctrl = 0;
+   unsigned exec_mode = b->shader->info.float_controls_execution_mode;
+   if (val->type) {
+      unsigned bit_size;
+
+      /* Some ALU like modf and frexp return a struct of two values. */
+      if (glsl_type_is_struct(val->type->type))
+         bit_size = glsl_get_bit_size(val->type->type->fields.structure[0].type);
+      else
+         bit_size = glsl_get_bit_size(val->type->type);
+
+      if (bit_size >= 16 && bit_size <= 64) {
+         if (nir_is_float_control_signed_zero_preserve(exec_mode, bit_size))
+            b->nb.fp_math_ctrl |= nir_fp_preserve_signed_zero;
+         if (nir_is_float_control_inf_preserve(exec_mode, bit_size))
+            b->nb.fp_math_ctrl |= nir_fp_preserve_inf;
+         if (nir_is_float_control_nan_preserve(exec_mode, bit_size))
+            b->nb.fp_math_ctrl |= nir_fp_preserve_nan;
+      }
+   }
+
    vtn_foreach_decoration(b, val, handle_fp_fast_math, NULL);
-#undef FLOAT_CONTROLS2_BITS
 }
 
 nir_rounding_mode
@@ -870,15 +873,15 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
 
    case SpvOpIsInf: {
       const bool save_exact = b->nb.exact;
-      const unsigned save_fast_math = b->nb.fp_fast_math;
+      const unsigned save_math_ctrl = b->nb.fp_math_ctrl;
 
       b->nb.exact = true;
-      b->nb.fp_fast_math = 0;
+      b->nb.fp_math_ctrl = nir_fp_no_fast_math;
       nir_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size);
       dest->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), inf);
 
       b->nb.exact = save_exact;
-      b->nb.fp_fast_math = save_fast_math;
+      b->nb.fp_math_ctrl = save_math_ctrl;
       break;
    }
 
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index d5218f71b0a..282d738da7b 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -38,21 +38,6 @@
 #define M_PI_4f ((float) M_PI_4)
 #endif
 
-/**
- * Some fp16 instructions (i.e., asin and acos) are lowered as fp32. In these cases the
- * generated fp32 instructions need the same fp_fast_math settings as fp16.
- */
-static void
-propagate_fp16_fast_math_to_fp32(struct nir_builder *b)
-{
-   static_assert(FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 ==
-                 (FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1),
-                 "FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 is not "
-                 "FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 << 1.");
-
-   b->fp_fast_math |= (b->fp_fast_math & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) << 1;
-}
-
 static nir_def *build_det(nir_builder *b, nir_def **col, unsigned cols);
 
 /* Computes the determinate of the submatrix given by taking src and
@@ -178,13 +163,9 @@ build_asin(nir_builder *b, nir_def *x, float p0, float p1, bool piecewise)
        * approximation in 32-bit math and then we convert the result back to
        * 16-bit.
        */
-      const uint32_t save = b->fp_fast_math;
-      propagate_fp16_fast_math_to_fp32(b);
-
       nir_def *result =
          nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1, piecewise));
 
-      b->fp_fast_math = save;
       return result;
    }
    nir_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size);
diff --git a/src/intel/compiler/intel_nir_opt_peephole_ffma.c b/src/intel/compiler/intel_nir_opt_peephole_ffma.c
index 5781c694eb1..e6cb5df4f6f 100644
--- a/src/intel/compiler/intel_nir_opt_peephole_ffma.c
+++ b/src/intel/compiler/intel_nir_opt_peephole_ffma.c
@@ -219,7 +219,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b,
       mul_src[0] = nir_fneg(b, mul_src[0]);
 
    nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
-   ffma->fp_fast_math = mul->fp_fast_math | add->fp_fast_math;
+   ffma->fp_math_ctrl = mul->fp_math_ctrl | add->fp_math_ctrl;
 
    for (unsigned i = 0; i < 2; i++) {
       ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
diff --git a/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c b/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c
index 9383c695848..d3bbbe183d8 100644
--- a/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c
+++ b/src/nouveau/compiler/nak_nir_lower_kepler_shared_atomics.c
@@ -51,7 +51,7 @@ lower_atomic_in_lock(nir_builder *b, nir_intrinsic_instr *intr, nir_def *loaded)
          b, nir_atomic_op_to_alu(nir_intrinsic_atomic_op(intr)), loaded, data);
       nir_alu_instr *alu = nir_def_as_alu(to_store);
       alu->exact = true;
-      alu->fp_fast_math = 0;
+      alu->fp_math_ctrl = nir_fp_no_fast_math;
       break;
    }
    case nir_atomic_op_xchg: {