nir: handle new multadd opcodes in lowerings and opts

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
Karol Herbst 2026-04-23 04:25:24 +02:00 committed by Marge Bot
parent bb2b7c58fc
commit 68dc336af7
6 changed files with 25 additions and 4 deletions

View file

@ -654,6 +654,7 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
name = "__fmul64";
mangled_name = "__fmul64(u641;u641;";
break;
case nir_op_fmad:
case nir_op_ffma_old:
name = "__fmad64";
mangled_name = "__fmad64(u641;u641;u641;";

View file

@ -74,6 +74,7 @@ lower_float_instr_to_soft(nir_builder *b, nir_instr *instr,
case nir_op_fmul:
mangled_name = "__fmul32(u1;u1;";
break;
case nir_op_fmad:
case nir_op_ffma_old:
mangled_name = "__fmad32(u1;u1;u1;";
break;

View file

@ -175,9 +175,14 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state)
break;
}
case nir_op_ffmaz:
case nir_op_fmadz:
case nir_op_ffmaz_old:
src_mark_preserve_sz(&alu->src[2].src, NULL);
break;
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_fmad:
case nir_op_ffma_old:
if ((nir_analyze_fp_class(&state->fp_class_state, alu->src[2].src.ssa) & FP_CLASS_NEG_ZERO) &&
!nir_alu_srcs_equal(alu, alu, 0, 1)) {

View file

@ -222,7 +222,7 @@ visit_undef_use(nir_src *src, struct visit_info *info)
info->replace_undef_with_constant = true;
if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
alu->op != nir_op_fmulz &&
(alu->op != nir_op_ffmaz_old || i == 2) &&
(nir_alu_instr_is_mul_add_z(alu) || i == 2) &&
alu->op != nir_op_pack_half_2x16_rtz_split)
info->prefer_nan = true;
}

View file

@ -3253,7 +3253,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i)
/* Reject exact ops because we are going to do an inexact transformation
* with it.
*/
if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma_old) ||
if (!alu || (alu->op != nir_op_fmul && !nir_alu_instr_is_mul_add(alu)) ||
nir_alu_instr_is_exact(alu) ||
!gather_fmul_tess_coord(iter->instr, alu, vertex_index,
&tess_coord_swizzle, &tess_coord_used,
@ -3263,7 +3263,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i)
/* The multiplication must only be used by ffma. */
if (alu->op == nir_op_fmul) {
nir_alu_instr *ffma = get_single_use_as_alu(&alu->def);
if (!ffma || ffma->op != nir_op_ffma_old)
if (!nir_alu_instr_is_mul_add(ffma))
return false;
if (num_fmuls == 1)
@ -3388,6 +3388,11 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu)
*/
case nir_op_fmul:
case nir_op_fmulz:
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_ffmaz:
case nir_op_fmad:
case nir_op_fmadz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
return GET_SRC_INTERP(alu, 0) == FLAG_INTERP_CONVERGENT ||

View file

@ -838,6 +838,11 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
return;
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_ffmaz:
case nir_op_fmad:
case nir_op_fmadz:
case nir_op_flrp:
push_fp_query(state, alu->src[0].src.ssa);
push_fp_query(state, alu->src[1].src.ssa);
@ -1320,9 +1325,13 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
break;
}
case nir_op_fmad:
case nir_op_fmadz:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_ffmaz:
case nir_op_ffmaz_old: {
bool mulz = alu->op == nir_op_ffmaz_old;
bool mulz = nir_alu_instr_is_mul_add_z(alu);
bool src_eq = nir_alu_srcs_equal(alu, alu, 0, 1);
bool src_neg_eq = !nir_src_is_const(alu->src[0].src) && nir_alu_srcs_negative_equal(alu, alu, 0, 1);
fp_class_mask r_mul = fmul_fp_class(src_res[0], src_res[1], mulz, src_eq, src_neg_eq);