diff --git a/.pick_status.json b/.pick_status.json index 404ac186aa6..1bd8cdaa456 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -144,7 +144,7 @@ "description": "pan/bi: Fix invalid CLPER encoding", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "316486dd9f6bbd03e7e13655674f1fa91e533b9a", "notes": null diff --git a/src/panfrost/compiler/bi_lower_swizzle.c b/src/panfrost/compiler/bi_lower_swizzle.c index c9872ce6375..b74bfee5c90 100644 --- a/src/panfrost/compiler/bi_lower_swizzle.c +++ b/src/panfrost/compiler/bi_lower_swizzle.c @@ -54,12 +54,17 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src) case BI_OPCODE_CSEL_V2I16: case BI_OPCODE_CSEL_V2S16: case BI_OPCODE_CSEL_V2U16: + break; /* Despite ostensibly being 32-bit instructions, CLPER does not * inherently interpret the data, so it can be used for v2f16 * derivatives, which might require swizzle lowering */ case BI_OPCODE_CLPER_I32: case BI_OPCODE_CLPER_OLD_I32: + if (src == 0) + break; + else + return; /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the * boolean is implemented as a 16-bit integer, the swizzle is needed diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index ba3a4727bc7..d4559440e83 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1745,7 +1745,7 @@ bi_emit_derivative(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr, */ if (nir_def_all_uses_ignore_sign_bit(&instr->def) && !coarse) { left = s0; - right = bi_clper(b, s0, bi_imm_u32(axis), BI_LANE_OP_XOR); + right = bi_clper(b, s0, bi_imm_u8(axis), BI_LANE_OP_XOR); } else { bi_index lane1, lane2; if (coarse) { @@ -1758,8 +1758,8 @@ bi_emit_derivative(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr, lane2 = bi_iadd_u32(b, lane1, bi_imm_u32(axis), false); } - left = bi_clper(b, s0, lane1, BI_LANE_OP_NONE); - right = bi_clper(b, s0, lane2, BI_LANE_OP_NONE); + left = bi_clper(b, s0, bi_byte(lane1, 0), BI_LANE_OP_NONE); + right = bi_clper(b, s0, bi_byte(lane2, 0), BI_LANE_OP_NONE); } bi_fadd_to(b, sz, dst, right, bi_neg(left)); @@ -2052,7 +2052,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_subgroup_from_cluster_size(pan_subgroup_size(b->shader->arch)); bi_clper_i32_to(b, dst, bi_src_index(&instr->src[0]), - bi_src_index(&instr->src[1]), + bi_byte(bi_src_index(&instr->src[1]), 0), inactive_result, lane_op, subgroup); break; } diff --git a/src/panfrost/compiler/valhall/ISA.xml b/src/panfrost/compiler/valhall/ISA.xml index 7227b24c21b..d4ac519c0f9 100644 --- a/src/panfrost/compiler/valhall/ISA.xml +++ b/src/panfrost/compiler/valhall/ISA.xml @@ -1977,7 +1977,7 @@ derivatives in fragment shaders. A - B + B diff --git a/src/panfrost/compiler/valhall/test/assembler-cases.txt b/src/panfrost/compiler/valhall/test/assembler-cases.txt index 63cc3910d54..bf8d6b5bdc8 100644 --- a/src/panfrost/compiler/valhall/test/assembler-cases.txt +++ b/src/panfrost/compiler/valhall/test/assembler-cases.txt @@ -32,7 +32,7 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0 00 00 00 00 00 c0 00 78 NOP.end 40 c4 c0 9c 01 c1 f0 00 ICMP_OR.u32.gt.m1 r1, ^r0, 0x1000000.b3, 0x0 42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, ^r2, offset:0 -00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0 +00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b00 00 00 00 30 00 c7 90 00 S8_TO_S32 r7, r0.b3 00 00 00 20 00 c6 90 00 S8_TO_S32 r6, r0.b2 00 00 00 00 00 c4 90 00 S8_TO_S32 r4, r0.b0