diff --git a/.pick_status.json b/.pick_status.json
index 404ac186aa6..1bd8cdaa456 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -144,7 +144,7 @@
"description": "pan/bi: Fix invalid CLPER encoding",
"nominated": true,
"nomination_type": 2,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": "316486dd9f6bbd03e7e13655674f1fa91e533b9a",
"notes": null
diff --git a/src/panfrost/compiler/bi_lower_swizzle.c b/src/panfrost/compiler/bi_lower_swizzle.c
index c9872ce6375..b74bfee5c90 100644
--- a/src/panfrost/compiler/bi_lower_swizzle.c
+++ b/src/panfrost/compiler/bi_lower_swizzle.c
@@ -54,12 +54,17 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
case BI_OPCODE_CSEL_V2I16:
case BI_OPCODE_CSEL_V2S16:
case BI_OPCODE_CSEL_V2U16:
+ break;
/* Despite ostensibly being 32-bit instructions, CLPER does not
* inherently interpret the data, so it can be used for v2f16
* derivatives, which might require swizzle lowering */
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_OLD_I32:
+ if (src == 0)
+ break;
+ else
+ return;
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
* boolean is implemented as a 16-bit integer, the swizzle is needed
diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c
index ba3a4727bc7..d4559440e83 100644
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@@ -1745,7 +1745,7 @@ bi_emit_derivative(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr,
*/
if (nir_def_all_uses_ignore_sign_bit(&instr->def) && !coarse) {
left = s0;
- right = bi_clper(b, s0, bi_imm_u32(axis), BI_LANE_OP_XOR);
+ right = bi_clper(b, s0, bi_imm_u8(axis), BI_LANE_OP_XOR);
} else {
bi_index lane1, lane2;
if (coarse) {
@@ -1758,8 +1758,8 @@ bi_emit_derivative(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr,
lane2 = bi_iadd_u32(b, lane1, bi_imm_u32(axis), false);
}
- left = bi_clper(b, s0, lane1, BI_LANE_OP_NONE);
- right = bi_clper(b, s0, lane2, BI_LANE_OP_NONE);
+ left = bi_clper(b, s0, bi_byte(lane1, 0), BI_LANE_OP_NONE);
+ right = bi_clper(b, s0, bi_byte(lane2, 0), BI_LANE_OP_NONE);
}
bi_fadd_to(b, sz, dst, right, bi_neg(left));
@@ -2052,7 +2052,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_subgroup_from_cluster_size(pan_subgroup_size(b->shader->arch));
bi_clper_i32_to(b, dst,
bi_src_index(&instr->src[0]),
- bi_src_index(&instr->src[1]),
+ bi_byte(bi_src_index(&instr->src[1]), 0),
inactive_result, lane_op, subgroup);
break;
}
diff --git a/src/panfrost/compiler/valhall/ISA.xml b/src/panfrost/compiler/valhall/ISA.xml
index 7227b24c21b..d4ac519c0f9 100644
--- a/src/panfrost/compiler/valhall/ISA.xml
+++ b/src/panfrost/compiler/valhall/ISA.xml
@@ -1977,7 +1977,7 @@
derivatives in fragment shaders.
A
- B
+ B
diff --git a/src/panfrost/compiler/valhall/test/assembler-cases.txt b/src/panfrost/compiler/valhall/test/assembler-cases.txt
index 63cc3910d54..bf8d6b5bdc8 100644
--- a/src/panfrost/compiler/valhall/test/assembler-cases.txt
+++ b/src/panfrost/compiler/valhall/test/assembler-cases.txt
@@ -32,7 +32,7 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0
00 00 00 00 00 c0 00 78 NOP.end
40 c4 c0 9c 01 c1 f0 00 ICMP_OR.u32.gt.m1 r1, ^r0, 0x1000000.b3, 0x0
42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, ^r2, offset:0
-00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0
+00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b00
00 00 00 30 00 c7 90 00 S8_TO_S32 r7, r0.b3
00 00 00 20 00 c6 90 00 S8_TO_S32 r6, r0.b2
00 00 00 00 00 c4 90 00 S8_TO_S32 r4, r0.b0