From f7c73de1c273f40e61e48c779df2ecbe44fc86e1 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 24 Jan 2024 09:34:41 +0100 Subject: [PATCH] broadcom/compiler: fix incorrect flags update for subgroup elect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit c->execute is 0 (not the block index) for lanes currently active under non-uniform control flow. Also this simplifies a bit the instructions we emit for flag generation, both for uniform and non-uniform control flow. Reviewed-by: Alejandro PiƱeiro Cc: mesa-stable Part-of: (cherry picked from commit 7bdc8898b15333223a7fe2ba8470da1349143897) --- .pick_status.json | 2 +- src/broadcom/compiler/nir_to_vir.c | 47 ++++++++++-------------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index a6dcd529203..c44329d3887 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -334,7 +334,7 @@ "description": "broadcom/compiler: fix incorrect flags update for subgroup elect", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 2f1723d0678..b21ce0461f1 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3222,34 +3222,6 @@ emit_load_local_invocation_index(struct v3d_compile *c) vir_uniform_ui(c, 32 - c->local_invocation_index_bits)); } -/* Various subgroup operations rely on the A flags, so this helper ensures that - * A flags represents currently active lanes in the subgroup. - */ -static void -set_a_flags_for_subgroup(struct v3d_compile *c) -{ - /* MSF returns 0 for disabled lanes in compute shaders so - * PUSHZ will set A=1 for disabled lanes. We want the inverse - * of this but we don't have any means to negate the A flags - * directly, but we can do it by repeating the same operation - * with NORZ (A = ~A & ~Z). - */ - assert(c->s->info.stage == MESA_SHADER_COMPUTE); - vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ); - vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ); - - /* If we are under non-uniform control flow we also need to - * AND the A flags with the current execute mask. - */ - if (vir_in_nonuniform_control_flow(c)) { - const uint32_t bidx = c->cur_block->index; - vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(), - c->execute, - vir_uniform_ui(c, bidx)), - V3D_QPU_UF_ANDZ); - } -} - static void ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) { @@ -3737,10 +3709,23 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_elect: { - set_a_flags_for_subgroup(c); - struct qreg first = vir_FLAFIRST(c); + struct qreg first; + if (vir_in_nonuniform_control_flow(c)) { + /* Sets A=1 for lanes enabled in the execution mask */ + vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); + /* Updates A ANDing with lanes enabled in MSF */ + vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), + V3D_QPU_UF_ANDNZ); + first = vir_FLAFIRST(c); + } else { + /* Sets A=1 for inactive lanes */ + vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), + V3D_QPU_PF_PUSHZ); + first = vir_FLNAFIRST(c); + } - /* Produce a boolean result from Flafirst */ + /* Produce a boolean result */ vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(), first, vir_uniform_ui(c, 1)), V3D_QPU_PF_PUSHZ);