From e6038645fafd5e1de4510e2831905070319dc800 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 14 Mar 2025 16:41:06 +1000 Subject: [PATCH] nak/sm70: allow first parameter of hfma2 to be non-reg Either Rb or Rc can be the non-register, so the copy if both not reg should be sufficient. Totals: CodeSize: 14025216 -> 14022144 (-0.02%) Static cycle count: 5313517 -> 5312651 (-0.02%) Totals from 4 (0.30% of 1332) affected shaders: CodeSize: 119168 -> 116096 (-2.58%) Static cycle count: 33920 -> 33054 (-2.55%) Only affects: q2rtx/q2rtx-rt-pipeline 42 -0.48% -0.45% This also helps with the coop matrix shaders. Part-of: --- src/nouveau/compiler/nak/sm70_encode.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index 695c79dd392..12d836202a2 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -1086,7 +1086,6 @@ impl SM70Op for OpHFma2 { let [src0, src1, src2] = &mut self.srcs; swap_srcs_if_not_reg(src0, src1, gpr); b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); - b.copy_alu_src_if_not_reg(src1, gpr, SrcType::F16v2); b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F16v2); }