nak: Fix encoding of f16x2 min/max on sm90+

Fixes illegal instruction encoding errors on
dEQP-VK.glsl.atomic_operations.min_f16vec2_fragment
and others.

Fixes: a3fcccb47b ("nak/from_nir: Handle f16v2 atomics")
Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42196>
This commit is contained in:
Mel Henning 2026-06-11 13:23:27 -04:00 committed by Marge Bot
parent 542e050ec5
commit 41b4e04c76
2 changed files with 95 additions and 50 deletions

View file

@ -306,6 +306,18 @@ pub fn test_ld_st_atom() {
(AtomType::F64, ".f64.rn"),
];
let atom_ops = [
AtomOp::Add,
AtomOp::Min,
AtomOp::Max,
AtomOp::Inc,
AtomOp::Dec,
AtomOp::And,
AtomOp::Or,
AtomOp::Xor,
AtomOp::Exch,
];
let spaces = [
MemSpace::Global(MemAddrType::A64),
MemSpace::Shared,
@ -416,55 +428,71 @@ pub fn test_ld_st_atom() {
c.push(instr, expected);
for (atom_type, atom_type_str) in atom_types {
for use_dst in [true, false] {
let instr = OpAtom {
dst: if use_dst {
Dst::Reg(r0)
} else {
Dst::None
},
addr: SrcRef::Reg(r4_64).into(),
uniform_address: urz.clone(),
data: SrcRef::Reg(r2).into(),
atom_op: AtomOp::Add,
cmpr: SrcRef::Reg(r3).into(),
atom_type,
let active_atom_ops = if atom_type.is_float() {
&atom_ops[0..3]
} else {
&atom_ops[..]
};
addr_offset,
addr_stride: addr_stride,
for atom_op in active_atom_ops {
for use_dst in [true, false] {
if !use_dst && *atom_op == AtomOp::Exch {
continue;
}
mem_space: space,
mem_order: order,
mem_eviction_priority: pri,
};
let expected = match space {
MemSpace::Global(_) => {
let op = if use_dst {
"atomg"
} else if sm >= 90 {
"redg"
let instr = OpAtom {
dst: if use_dst {
Dst::Reg(r0)
} else {
"red"
};
let dst =
if use_dst { "pt, r0, " } else { "" };
format!("{op}.e.add.ef{atom_type_str}.strong.{cta} {dst}[{r4_64_str}{uniform_addr}+{addr_offset_str}], r2;")
}
MemSpace::Shared => {
if atom_type.is_float() {
continue;
}
if atom_type.bits() == 64 {
continue;
}
let dst = if use_dst { "r0" } else { "rz" };
format!("atoms.add{atom_type_str} {dst}, [{r4_64_str}{addr_stride}{uniform_addr}+{addr_offset_str}], r2;")
}
MemSpace::Local => continue,
};
Dst::None
},
addr: SrcRef::Reg(r4_64).into(),
uniform_address: urz.clone(),
data: SrcRef::Reg(r2).into(),
atom_op: *atom_op,
cmpr: SrcRef::Reg(r3).into(),
atom_type,
c.push(instr, expected);
addr_offset,
addr_stride: addr_stride,
mem_space: space,
mem_order: order,
mem_eviction_priority: pri,
};
let expected = match space {
MemSpace::Global(_) => {
let op = if use_dst {
"atomg"
} else if sm >= 90 {
"redg"
} else {
"red"
};
let dst = if use_dst {
"pt, r0, "
} else {
""
};
format!("{op}.e{atom_op}.ef{atom_type_str}.strong.{cta} {dst}[{r4_64_str}{uniform_addr}+{addr_offset_str}], r2;")
}
MemSpace::Shared => {
if atom_type.is_float() {
continue;
}
if atom_type.bits() == 64 {
continue;
}
let dst =
if use_dst { "r0" } else { "rz" };
format!("atoms{atom_op}{atom_type_str} {dst}, [{r4_64_str}{addr_stride}{uniform_addr}+{addr_offset_str}], r2;")
}
MemSpace::Local => continue,
};
c.push(instr, expected);
}
}
}
}

View file

@ -3480,6 +3480,19 @@ impl SM70Encoder<'_> {
);
}
fn set_atom_op_sm90_float(&mut self, range: Range<usize>, atom_op: AtomOp) {
assert!(self.sm >= 90);
self.set_field(
range,
match atom_op {
AtomOp::Add => 0_u8,
AtomOp::Min => 2_u8,
AtomOp::Max => 4_u8,
_ => panic!("Unsupported float atomic"),
},
);
}
fn set_atom_type(&mut self, atom_type: AtomType, su: bool) {
if self.sm >= 90 && !su {
// Float/int is differentiated by opcode
@ -3548,13 +3561,14 @@ impl SM70Op for OpAtom {
if self.dst.is_none() {
if e.sm >= 90 && self.atom_type.is_float() {
e.set_opcode(0x9a6);
e.set_atom_op_sm90_float(87..90, self.atom_op);
} else {
e.set_opcode(0x98e);
e.set_atom_op(87..90, self.atom_op);
}
e.set_reg_src(32..40, &self.data);
e.set_field(40..64, self.addr_offset);
e.set_atom_op(87..90, self.atom_op);
if has_ugpr {
e.set_reg_addr(24..32, &self.addr, 90);
e.set_ureg_addr(64, &self.uniform_address, 72);
@ -3576,10 +3590,14 @@ impl SM70Op for OpAtom {
} else {
if e.sm >= 90 && self.atom_type.is_float() {
e.set_opcode(0x9a3);
} else if has_ugpr {
e.set_opcode(0x9a8);
e.set_atom_op_sm90_float(87..91, self.atom_op);
} else {
e.set_opcode(0x3a8);
if has_ugpr {
e.set_opcode(0x9a8);
} else {
e.set_opcode(0x3a8);
}
e.set_atom_op(87..91, self.atom_op);
}
if e.sm >= 100 {
@ -3601,7 +3619,6 @@ impl SM70Op for OpAtom {
e.set_reg_src(32..40, &self.data);
e.set_pred_dst(81..84, &Dst::None);
e.set_atom_op(87..91, self.atom_op);
e.set_bit(91, has_ugpr);
}