mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 13:10:10 +01:00
radeonsi: clean up ffma handling
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6596>
This commit is contained in:
parent
57bf4c2028
commit
758ab39d25
2 changed files with 14 additions and 5 deletions
|
|
@ -853,10 +853,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
}
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
|
||||
result =
|
||||
emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
|
||||
/* FMA is slow on gfx6-8, so it shouldn't be used. */
|
||||
assert(ctx->ac.chip_class >= GFX9);
|
||||
result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1], src[2]);
|
||||
break;
|
||||
case nir_op_ldexp:
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
|
|
|
|||
|
|
@ -937,7 +937,16 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
.lower_bitfield_insert_to_bitfield_select = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_sub = true,
|
||||
.fuse_ffma = true,
|
||||
/* gfx6-8: use MAD (FMA is 4x slower)
|
||||
* gfx9-10: either is OK (MAD and FMA have the same performance)
|
||||
* gfx10.3: use FMA (MAD doesn't exist, separate MUL+ADD are 2x slower)
|
||||
*
|
||||
* FMA has no advantage on gfx9-10 and MAD allows more algebraic optimizations.
|
||||
* Keep FMA enabled on gfx10 to test it, which helps us validate correctness
|
||||
* for gfx10.3 on gfx10.
|
||||
*/
|
||||
.lower_ffma = sscreen->info.chip_class <= GFX9,
|
||||
.fuse_ffma = sscreen->info.chip_class >= GFX10,
|
||||
.lower_fmod = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue