diff --git a/src/panfrost/compiler/kraid/builder.rs b/src/panfrost/compiler/kraid/builder.rs
index d3b03994699..3c80df03ea4 100644
--- a/src/panfrost/compiler/kraid/builder.rs
+++ b/src/panfrost/compiler/kraid/builder.rs
@@ -37,6 +37,33 @@ pub trait SSABuilder: Builder {
         });
         def
     }
+
+    fn mkvec_v2i8(&mut self, x: Src, y: Src) -> SSAValue {
+        let def = self.alloc_ssa(16);
+        self.push_op(OpMkVecV2I8 {
+            dst: def.into(),
+            srcs: [x, y],
+        });
+        def
+    }
+
+    fn mkvec_v2i16(&mut self, x: Src, y: Src) -> SSAValue {
+        self.mkvec_v4i8(
+            x.clone().byte(0),
+            x.clone().byte(1),
+            y.clone().byte(0),
+            y.clone().byte(1),
+        )
+    }
+
+    fn mkvec_v4i8(&mut self, x: Src, y: Src, z: Src, w: Src) -> SSAValue {
+        let def = self.alloc_ssa(32);
+        self.push_op(OpMkVecV4I8 {
+            dst: def.into(),
+            srcs: [x, y, z, w],
+        });
+        def
+    }
 }
 
 pub struct InstrBuilder {
diff --git a/src/panfrost/compiler/kraid/nir.rs b/src/panfrost/compiler/kraid/nir.rs
index 234549db34c..de465c9f4fb 100644
--- a/src/panfrost/compiler/kraid/nir.rs
+++ b/src/panfrost/compiler/kraid/nir.rs
@@ -10,7 +10,7 @@ use crate::ssa_value::SSAValueAllocator;
 use compiler::bindings::*;
 use compiler::nir::*;
 use rustc_hash::FxHashMap;
-use std::cmp::max;
+use std::cmp::{max, min};
 
 #[derive(Default)]
 struct BlockLabelMap {
@@ -140,6 +140,117 @@ impl<'a> ShaderFromNir<'a> {
         }
     }
 
+    fn parse_alu(&mut self, b: &mut impl SSABuilder, alu: &nir_alu_instr) {
+        // Handle vectors and pack ops as a special case since they're the only
+        // ALU ops that can produce more than 16B. They are also the only ALU
+        // ops which we allow to consume small (8 and 16-bit) vector data
+        // scattered across multiple dwords
+        if matches!(
+            alu.op,
+            nir_op_mov
+                | nir_op_pack_32_4x8
+                | nir_op_pack_32_4x8_split
+                | nir_op_pack_32_2x16
+                | nir_op_pack_32_2x16_split
+                | nir_op_pack_64_2x32
+                | nir_op_pack_64_2x32_split
+                | nir_op_pack_64_4x16
+                | nir_op_vec2
+                | nir_op_vec3
+                | nir_op_vec4
+                | nir_op_vec5
+                | nir_op_vec8
+                | nir_op_vec16
+        ) {
+            let mut nsrcs = Vec::new();
+            if alu.info().num_inputs == 1 {
+                let src = alu.get_src(0);
+                for c in 0..usize::from(alu.src_components(0)) {
+                    nsrcs.push((src.src.as_def(), src.swizzle[c]));
+                }
+            } else {
+                for src in alu.srcs_as_slice().iter() {
+                    nsrcs.push((src.src.as_def(), src.swizzle[0]))
+                }
+            }
+
+            let src_bit_size = alu.get_src(0).src.bit_size();
+
+            let mut srcs = Vec::new();
+            match src_bit_size {
+                8 => {
+                    for (def, c) in nsrcs {
+                        let ssa = self.get_ssa(def)[usize::from(c) / 4];
+                        srcs.push(Src::from(ssa).byte(c % 4));
+                    }
+                }
+                16 => {
+                    for (def, c) in nsrcs {
+                        let ssa = self.get_ssa(def)[usize::from(c) / 2];
+                        srcs.push(Src::from(ssa).half(c % 2));
+                    }
+                }
+                32 => {
+                    for (def, c) in nsrcs {
+                        let ssa = self.get_ssa(def)[usize::from(c)];
+                        srcs.push(Src::from(ssa));
+                    }
+                }
+                64 => {
+                    for (def, c) in nsrcs {
+                        let vec = self.get_ssa(def);
+                        srcs.push(Src::from(vec[usize::from(c) * 2 + 0]));
+                        srcs.push(Src::from(vec[usize::from(c) * 2 + 1]));
+                    }
+                }
+                _ => panic!("Unsupported bit size: {src_bit_size}"),
+            }
+
+            // We flattened i64 to v2i32
+            let src_bit_size = min(src_bit_size, 32);
+
+            let mut srcs = srcs.into_iter();
+            let mut dst_vec = Vec::new();
+            if srcs.len() == 1 && src_bit_size <= 16 {
+                let x = srcs.next().unwrap();
+                dst_vec.push(b.mov_i16(x));
+            } else if srcs.len() == 2 && src_bit_size == 8 {
+                let x = srcs.next().unwrap();
+                let y = srcs.next().unwrap();
+                dst_vec.push(b.mkvec_v2i8(x, y));
+            } else if src_bit_size == 8 {
+                loop {
+                    let Some(x) = srcs.next() else {
+                        break;
+                    };
+                    let y = srcs.next().unwrap_or(0.into());
+                    let z = srcs.next().unwrap_or(0.into());
+                    let w = srcs.next().unwrap_or(0.into());
+                    dst_vec.push(b.mkvec_v4i8(x, y, z, w));
+                }
+            } else if src_bit_size == 16 {
+                let mut srcs = srcs.into_iter();
+                loop {
+                    let Some(x) = srcs.next() else {
+                        break;
+                    };
+                    let y = srcs.next().unwrap_or(0.into());
+                    dst_vec.push(b.mkvec_v2i16(x, y));
+                }
+            } else if src_bit_size == 32 {
+                dst_vec = srcs.map(|src| b.mov_i32(src)).collect();
+            } else {
+                panic!("Unsupported bit size: {src_bit_size}");
+            }
+            self.set_ssa(&alu.def, dst_vec);
+            return;
+        }
+
+        match alu.op {
+            _ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
+        }
+    }
+
     fn parse_block(
         &mut self,
         ssa_alloc: &mut SSAValueAllocator,
@@ -153,6 +264,9 @@ impl<'a> ShaderFromNir<'a> {
                 nir_instr_type_load_const => {
                     self.parse_const(&mut b, ni.as_load_const().unwrap())
                 }
+                nir_instr_type_alu => {
+                    self.parse_alu(&mut b, ni.as_alu().unwrap())
+                }
                 _ => panic!("Unsupported instruction type"),
             }
         }
diff --git a/src/panfrost/compiler/kraid/ops.rs b/src/panfrost/compiler/kraid/ops.rs
index 9ac23206750..fda0c864117 100644
--- a/src/panfrost/compiler/kraid/ops.rs
+++ b/src/panfrost/compiler/kraid/ops.rs
@@ -66,6 +66,50 @@ impl fmt::Display for OpEnd {
     }
 }
 
+#[repr(C)]
+#[derive(Clone, Opcode)]
+pub struct OpMkVecV2I8 {
+    #[dst_type(V2I8)]
+    pub dst: Dst,
+
+    #[src_type(I8)]
+    pub srcs: [Src; 2],
+}
+
+impl fmt::Display for OpMkVecV2I8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{} = MKVEC.v4i8 {} {}",
+            &self.dst, &self.srcs[0], &self.srcs[1],
+        )
+    }
+}
+
+#[repr(C)]
+#[derive(Clone, Opcode)]
+pub struct OpMkVecV4I8 {
+    #[dst_type(V4I8)]
+    pub dst: Dst,
+
+    #[src_type(I8)]
+    pub srcs: [Src; 4],
+}
+
+impl fmt::Display for OpMkVecV4I8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{} = MKVEC.v4i8 {} {} {} {}",
+            &self.dst,
+            &self.srcs[0],
+            &self.srcs[1],
+            &self.srcs[2],
+            &self.srcs[3],
+        )
+    }
+}
+
 #[repr(C)]
 #[derive(Clone, Opcode)]
 #[variants(dst_type in [I16, I32])]
@@ -85,5 +129,7 @@ impl fmt::Display for OpMov {
 pub enum Op {
     Branch(OpBranch),
     End(OpEnd),
+    MkVecV2I8(OpMkVecV2I8),
+    MkVecV4I8(OpMkVecV4I8),
     Mov(OpMov),
 }