diff --git a/src/panfrost/compiler/kraid/compile.rs b/src/panfrost/compiler/kraid/compile.rs index 53d9d23992f..b5e3ceaaee1 100644 --- a/src/panfrost/compiler/kraid/compile.rs +++ b/src/panfrost/compiler/kraid/compile.rs @@ -81,6 +81,10 @@ pub extern "C" fn kraid_compile_nir( dump_shader(&s, "after register assignment"); s.validate(); + s.lower_16bit_alu(); + dump_shader(&s, "after lowering 16bit ALU ops"); + s.validate(); + s.assign_message_slots(); dump_shader(&s, "after message slot assignment"); s.validate(); diff --git a/src/panfrost/compiler/kraid/lib.rs b/src/panfrost/compiler/kraid/lib.rs index b6cd1f694ff..9ab415a14da 100644 --- a/src/panfrost/compiler/kraid/lib.rs +++ b/src/panfrost/compiler/kraid/lib.rs @@ -9,6 +9,7 @@ mod encode_v9; mod flow; mod ir; mod isa; +mod lower_16bit; mod message_slots; mod model; mod nir; diff --git a/src/panfrost/compiler/kraid/lower_16bit.rs b/src/panfrost/compiler/kraid/lower_16bit.rs new file mode 100644 index 00000000000..9e0ef1e58b1 --- /dev/null +++ b/src/panfrost/compiler/kraid/lower_16bit.rs @@ -0,0 +1,76 @@ +// Copyright © 2026 Collabora, Ltd. +// SPDX-License-Identifier: MIT + +use crate::ir::*; +use crate::ops::*; +use std::num::NonZeroU8; + +fn type_is_16bit(data_type: DataType) -> bool { + data_type.total_bits() == std::num::NonZeroU8::new(16) +} + +fn replicate_type(data_type: DataType, n: u8) -> DataType { + DataType::v( + data_type.comps().unwrap().get() * n, + data_type.scalar_type(), + ) +} + +macro_rules! lower_op { + ($op: expr, $variant: ident) => {{ + const SOME_8: Option = NonZeroU8::new(8); + const SOME_16: Option = NonZeroU8::new(16); + const SOME_32: Option = NonZeroU8::new(32); + match $op.$variant.total_bits() { + SOME_8 => { + for src in $op.srcs() { + debug_assert!(src.swizzle.replicates_byte()); + } + $op.$variant = replicate_type($op.$variant, 4); + } + SOME_16 => { + for src in $op.srcs() { + debug_assert!(src.swizzle.replicates_half()); + } + $op.$variant = replicate_type($op.$variant, 2); + } + bits => assert!(bits >= SOME_32), + } + }}; +} + +fn lower_instr(instr: &mut Instr) { + match &mut instr.op { + Op::FAdd(op) => lower_op!(op, dst_type), + Op::FCmp(op) => lower_op!(op, src_type), + Op::IAdd(op) => lower_op!(op, dst_type), + Op::ICmp(op) => lower_op!(op, src_type), + Op::LdPka(_) | Op::Load(_) => (), // These handle 16-bit natively + Op::Mov(op) => { + if op.dst_type.total_bits() == std::num::NonZeroU8::new(16) { + instr.op = Op::from(OpIAdd { + dst: op.dst.clone(), + dst_type: DataType::V2I16, + saturate: false, + srcs: [0.into(), op.src.clone().half(0)], + }); + } + } + Op::ShiftLop(op) => lower_op!(op, dst_type), + op => { + for dst in op.dsts() { + assert!(dst.bytes_written() >= 4); + } + } + } +} + +impl Shader<'_> { + pub fn lower_16bit_alu(&mut self) { + for b in self.blocks.iter_mut() { + for i in b.instrs.iter_mut() { + lower_instr(i); + } + } + } +}