From 61b44913f572fd514386812e0bf432808f2899db Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 16 Apr 2025 18:58:27 -0500 Subject: [PATCH] nak/legalize: Take a RegFile in copy_alu_src_and_lower_fmod Otherwise, we'll screw up uniform GPRs. Cc: mesa-stable Part-of: (cherry picked from commit 22a30bfa4f9424c221d641bc79468f4928b39440) --- .pick_status.json | 2 +- src/nouveau/compiler/nak/legalize.rs | 7 ++++--- src/nouveau/compiler/nak/sm50.rs | 29 ++++++++++++++++------------ src/nouveau/compiler/nak/sm70.rs | 2 +- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 8adde53297d..5278269595e 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2504,7 +2504,7 @@ "description": "nak/legalize: Take a RegFile in copy_alu_src_and_lower_fmod", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 18c669820fb..7efcbc83c65 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -231,11 +231,12 @@ pub trait LegalizeBuildHelpers: SSABuilder { fn copy_alu_src_and_lower_fmod( &mut self, src: &mut Src, + reg_file: RegFile, src_type: SrcType, ) { match src_type { SrcType::F16 | SrcType::F16v2 => { - let val = self.alloc_ssa(RegFile::GPR, 1); + let val = self.alloc_ssa(reg_file, 1); self.push_op(OpHAdd2 { dst: val.into(), srcs: [Src::new_zero().fneg(), *src], @@ -246,7 +247,7 @@ pub trait LegalizeBuildHelpers: SSABuilder { *src = val.into(); } SrcType::F32 => { - let val = self.alloc_ssa(RegFile::GPR, 1); + let val = self.alloc_ssa(reg_file, 1); self.push_op(OpFAdd { dst: val.into(), srcs: [Src::new_zero().fneg(), *src], @@ -257,7 +258,7 @@ pub trait LegalizeBuildHelpers: SSABuilder { *src = val.into(); } SrcType::F64 => { - let val = self.alloc_ssa(RegFile::GPR, 2); + let val = self.alloc_ssa(reg_file, 2); self.push_op(OpDAdd { dst: val.into(), srcs: [Src::new_zero().fneg(), *src], diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index 684d5f565e6..872d5b66981 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -318,9 +318,14 @@ impl SM50Encoder<'_> { // pub trait SM50LegalizeBuildHelpers: LegalizeBuildHelpers { - fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) { + fn copy_alu_src_if_fabs( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { if src.src_mod.has_fabs() { - self.copy_alu_src_and_lower_fmod(src, src_type); + self.copy_alu_src_and_lower_fmod(src, reg_file, src_type); } } @@ -451,9 +456,9 @@ impl SM50Op for OpFFma { fn legalize(&mut self, b: &mut LegalizeBuilder) { use RegFile::GPR; let [src0, src1, src2] = &mut self.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F32); - b.copy_alu_src_if_fabs(src1, SrcType::F32); - b.copy_alu_src_if_fabs(src2, SrcType::F32); + b.copy_alu_src_if_fabs(src0, GPR, SrcType::F32); + b.copy_alu_src_if_fabs(src1, GPR, SrcType::F32); + b.copy_alu_src_if_fabs(src2, GPR, SrcType::F32); swap_srcs_if_not_reg(src0, src1, GPR); b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); @@ -558,8 +563,8 @@ impl SM50Op for OpFMul { fn legalize(&mut self, b: &mut LegalizeBuilder) { use RegFile::GPR; let [src0, src1] = &mut self.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F32); - b.copy_alu_src_if_fabs(src1, SrcType::F32); + b.copy_alu_src_if_fabs(src0, GPR, SrcType::F32); + b.copy_alu_src_if_fabs(src1, GPR, SrcType::F32); swap_srcs_if_not_reg(src0, src1, GPR); b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); } @@ -889,9 +894,9 @@ impl SM50Op for OpDFma { fn legalize(&mut self, b: &mut LegalizeBuilder) { use RegFile::GPR; let [src0, src1, src2] = &mut self.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F64); - b.copy_alu_src_if_fabs(src1, SrcType::F64); - b.copy_alu_src_if_fabs(src2, SrcType::F64); + b.copy_alu_src_if_fabs(src0, GPR, SrcType::F64); + b.copy_alu_src_if_fabs(src1, GPR, SrcType::F64); + b.copy_alu_src_if_fabs(src2, GPR, SrcType::F64); swap_srcs_if_not_reg(src0, src1, GPR); b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); @@ -988,8 +993,8 @@ impl SM50Op for OpDMul { fn legalize(&mut self, b: &mut LegalizeBuilder) { use RegFile::GPR; let [src0, src1] = &mut self.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F64); - b.copy_alu_src_if_fabs(src1, SrcType::F64); + b.copy_alu_src_if_fabs(src0, GPR, SrcType::F64); + b.copy_alu_src_if_fabs(src1, GPR, SrcType::F64); swap_srcs_if_not_reg(src0, src1, GPR); b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index e1a8d788ba0..296367586c0 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -1202,7 +1202,7 @@ impl SM70Op for OpHFma2 { // HFMA2 doesn't have fabs or fneg on SRC2. if !src2.src_mod.is_none() { - b.copy_alu_src_and_lower_fmod(src2, SrcType::F16v2); + b.copy_alu_src_and_lower_fmod(src2, gpr, SrcType::F16v2); } }