From b4947b2a14599bcb36c5146318a2330ef5d52159 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Sat, 30 May 2026 03:16:44 -0400 Subject: [PATCH] kraid: Be more lax about immediates Instead of asserting that the swizzle replicates, look at the actual value we're swizzling. This lets us get away with putting immediates in 8 and 16-bit ops in more cases. Part-of: --- src/panfrost/compiler/kraid/ir.rs | 22 ++++++++++++++++++++++ src/panfrost/compiler/kraid/lower_16bit.rs | 4 ++-- src/panfrost/compiler/kraid/validate.rs | 4 ++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/panfrost/compiler/kraid/ir.rs b/src/panfrost/compiler/kraid/ir.rs index 18c6a1c51f9..ba65e59b48c 100644 --- a/src/panfrost/compiler/kraid/ir.rs +++ b/src/panfrost/compiler/kraid/ir.rs @@ -367,6 +367,28 @@ impl Src { pub fn is_zero(&self) -> bool { matches!(self.src_ref, SrcRef::Zero | SrcRef::Imm32(0)) } + + pub fn replicates_byte(&self) -> bool { + match self.src_ref { + SrcRef::Zero => true, + SrcRef::Imm32(u) => self.swizzle.fold_u32(u).is_some_and(|u| { + let b = u.to_le_bytes(); + b[0] == b[1] && b[0] == b[2] && b[0] == b[3] + }), + _ => self.swizzle.replicates_byte(), + } + } + + pub fn replicates_half(&self) -> bool { + match self.src_ref { + SrcRef::Zero => true, + SrcRef::Imm32(u) => self + .swizzle + .fold_u32(u) + .is_some_and(|u| (u & 0xffff) == (u >> 16)), + _ => self.swizzle.replicates_half(), + } + } } impl> From for Src { diff --git a/src/panfrost/compiler/kraid/lower_16bit.rs b/src/panfrost/compiler/kraid/lower_16bit.rs index 9e0ef1e58b1..ea4879fb68f 100644 --- a/src/panfrost/compiler/kraid/lower_16bit.rs +++ b/src/panfrost/compiler/kraid/lower_16bit.rs @@ -24,13 +24,13 @@ macro_rules! lower_op { match $op.$variant.total_bits() { SOME_8 => { for src in $op.srcs() { - debug_assert!(src.swizzle.replicates_byte()); + debug_assert!(src.replicates_byte()); } $op.$variant = replicate_type($op.$variant, 4); } SOME_16 => { for src in $op.srcs() { - debug_assert!(src.swizzle.replicates_half()); + debug_assert!(src.replicates_half()); } $op.$variant = replicate_type($op.$variant, 2); } diff --git a/src/panfrost/compiler/kraid/validate.rs b/src/panfrost/compiler/kraid/validate.rs index ddc3e519af2..13558a2a656 100644 --- a/src/panfrost/compiler/kraid/validate.rs +++ b/src/panfrost/compiler/kraid/validate.rs @@ -18,9 +18,9 @@ fn validate_instr(instr: &Instr, ssa_vals: &mut FxHashSet) { if src_type.comps().unwrap().get() == 1 { if src_type.bits().unwrap().get() == 8 { - assert!(src.swizzle.replicates_byte()); + assert!(src.replicates_byte()); } else if src_type.bits().unwrap().get() == 16 { - assert!(src.swizzle.replicates_half()); + assert!(src.replicates_half()); } }