diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 969a4df0759..1afbee8f7e7 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -819,6 +819,7 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr) [nir_op_fmin] = TGSI_OPCODE_MIN, [nir_op_fmax] = TGSI_OPCODE_MAX, + [nir_op_fmad] = TGSI_OPCODE_MAD, [nir_op_ffma_old] = TGSI_OPCODE_MAD, }; diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py index 421a28dcfe7..d22df212268 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py +++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py @@ -52,11 +52,11 @@ r300_nir_prepare_presubtract = [ (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))), (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))), # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form. - (('ffma_old', 2.0, ('fneg', a), 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)), - (('ffma_old', a, -2.0, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)), - (('ffma_old', -2.0, a, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)), - (('ffma_old', 2.0, a, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))), - (('ffma_old', a, 2.0, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))), + (('fmad', 2.0, ('fneg', a), 1.0), ('fmad', ('fneg', a), 2.0, 1.0)), + (('fmad', a, -2.0, 1.0), ('fmad', ('fneg', a), 2.0, 1.0)), + (('fmad', -2.0, a, 1.0), ('fmad', ('fneg', a), 2.0, 1.0)), + (('fmad', 2.0, a, -1.0), ('fneg', ('fmad', ('fneg', a), 2.0, 1.0))), + (('fmad', a, 2.0, -1.0), ('fneg', ('fmad', ('fneg', a), 2.0, 1.0))), # x * 2 can be usually folded into output modifier for the previous # instruction, but that only works if x is a temporary. If it is input or # constant just convert it to add instead. @@ -85,7 +85,7 @@ r300_nir_opt_algebraic_late = [ # This is very late flrp lowering to clean up after bcsel->fcsel->flrp. r300_nir_lower_flrp = [ - (('flrp', a, b, c), ('ffma_old', b, c, ('ffma_old', ('fneg', a), c, a))) + (('flrp', a, b, c), ('fmad', b, c, ('fmad', ('fneg', a), c, a))) ] # Lower fcsel_ge from ftrunc on r300 diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7ae8fae775b..f52b7718b82 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -107,6 +107,9 @@ static struct disk_cache* r300_get_disk_shader_cache(struct pipe_screen* pscreen .fdot_replicates = true, \ .fuse_ffma32 = true, \ .fuse_ffma64 = true, \ + .float_mul_add32 = \ + nir_float_muladd_support_has_fmad | \ + nir_float_muladd_support_fuse, \ .lower_bitops = true, \ .lower_extract_byte = true, \ .lower_extract_word = true, \