diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index b21ce0461f1..6b449a2a289 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3734,6 +3734,59 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; } + case nir_intrinsic_ballot: { + assert(c->devinfo->ver >= 71); + struct qreg value = ntq_get_src(c, instr->src[0], 0); + struct qreg res = vir_get_temp(c); + if (vir_in_nonuniform_control_flow(c)) { + /* Ballot uses the MSF mask and the condition mask to + * identify active lanes. Particularly, it uses the + * condition mask to filter out lanes disabled by + * control flow. + */ + vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); + vir_set_cond(vir_BALLOT_dest(c, res, value), + V3D_QPU_COND_IFA); + } else { + vir_BALLOT_dest(c, res, value); + } + + ntq_store_def(c, &instr->def, 0, vir_MOV(c, res)); + break; + } + + case nir_intrinsic_read_invocation: { + assert(c->devinfo->ver >= 71); + struct qreg value = ntq_get_src(c, instr->src[0], 0); + struct qreg index = ntq_get_src(c, instr->src[1], 0); + struct qreg res = vir_SHUFFLE(c, value, index); + ntq_store_def(c, &instr->def, 0, vir_MOV(c, res)); + break; + } + + case nir_intrinsic_read_first_invocation: { + assert(c->devinfo->ver >= 71); + struct qreg value = ntq_get_src(c, instr->src[0], 0); + struct qreg res = vir_get_temp(c); + if (vir_in_nonuniform_control_flow(c)) { + /* Bcastf uses the MSF mask and the condition mask to + * identify active lanes. Particularly, it uses the + * condition mask to filter out lanes disabled by + * control flow. + */ + vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); + vir_set_cond(vir_BCASTF_dest(c, res, value), + V3D_QPU_COND_IFA); + } else { + vir_BCASTF_dest(c, res, value); + } + + ntq_store_def(c, &instr->def, 0, vir_MOV(c, res)); + break; + } + case nir_intrinsic_load_num_subgroups: unreachable("Should have been lowered"); break; diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 09190db9b9b..f5794133b6d 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1568,6 +1568,24 @@ lower_subgroup_intrinsics(struct v3d_compile *c, case nir_intrinsic_load_subgroup_size: case nir_intrinsic_load_subgroup_invocation: case nir_intrinsic_elect: + case nir_intrinsic_ballot: + case nir_intrinsic_inverse_ballot: + case nir_intrinsic_ballot_bitfield_extract: + case nir_intrinsic_ballot_bit_count_reduce: + case nir_intrinsic_ballot_find_lsb: + case nir_intrinsic_ballot_find_msb: + case nir_intrinsic_ballot_bit_count_exclusive: + case nir_intrinsic_ballot_bit_count_inclusive: + case nir_intrinsic_reduce: + case nir_intrinsic_inclusive_scan: + case nir_intrinsic_exclusive_scan: + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_load_subgroup_eq_mask: + case nir_intrinsic_load_subgroup_ge_mask: + case nir_intrinsic_load_subgroup_gt_mask: + case nir_intrinsic_load_subgroup_le_mask: + case nir_intrinsic_load_subgroup_lt_mask: c->has_subgroups = true; break; default: @@ -1681,6 +1699,16 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c); + const nir_lower_subgroups_options subgroup_opts = { + .subgroup_size = V3D_CHANNELS, + .ballot_components = 1, + .ballot_bit_size = 32, + .lower_to_scalar = true, + .lower_inverse_ballot = true, + .lower_subgroup_masks = true, + }; + NIR_PASS(_, c->s, nir_lower_subgroups, &subgroup_opts); + v3d_optimize_nir(c, c->s); /* Do late algebraic optimization to turn add(a, neg(b)) back into