diff --git a/.pick_status.json b/.pick_status.json index f23a8d26deb..ee8c07913b4 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -733,7 +733,7 @@ "description": "pan/bi: Use CLPER_V6 on Mali G31", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/panfrost/bifrost/bi_quirks.h b/src/panfrost/bifrost/bi_quirks.h index ea674df9be1..481d3aa8fea 100644 --- a/src/panfrost/bifrost/bi_quirks.h +++ b/src/panfrost/bifrost/bi_quirks.h @@ -39,15 +39,26 @@ #define BIFROST_NO_FP32_TRANSCENDENTALS (1 << 1) +/* Whether this GPU lacks support for the full form of the CLPER instruction. + * These GPUs use a simple encoding of CLPER that does not support + * inactive_result, subgroup_size, or lane_op. Using those features requires + * lowering to additional ALU instructions. The encoding forces inactive_result + * = zero, subgroup_size = subgroup4, and lane_op = none. */ + +#define BIFROST_LIMITED_CLPER (1 << 2) + static inline unsigned bifrost_get_quirks(unsigned product_id) { switch (product_id >> 8) { case 0x60: - return BIFROST_NO_PRELOAD | BIFROST_NO_FP32_TRANSCENDENTALS; + return BIFROST_NO_PRELOAD | BIFROST_NO_FP32_TRANSCENDENTALS | + BIFROST_LIMITED_CLPER; case 0x62: - return BIFROST_NO_PRELOAD; - case 0x70: + return BIFROST_NO_PRELOAD | BIFROST_LIMITED_CLPER; + case 0x70: /* G31 */ + return BIFROST_LIMITED_CLPER; + case 0x71: case 0x72: case 0x74: return 0; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 1402e46a017..137c195cd45 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1965,7 +1965,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_index left, right; - if (b->shader->arch == 6) { + if (b->shader->quirks & BIFROST_LIMITED_CLPER) { left = bi_clper_v6_i32(b, s0, lane1); right = bi_clper_v6_i32(b, s0, lane2); } else {