mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
broadcom/compiler: support subgroup ballot
This adds support in our compiler for the subgroup ballot feature. To this end we start using the NIR lowering for subgroups which can lowers some of these intrinsics into things more amenable to our hardware and takes care of scalarization. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27211>
This commit is contained in:
parent
295f906517
commit
29a5e3e615
2 changed files with 81 additions and 0 deletions
|
|
@ -3734,6 +3734,59 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_ballot: {
|
||||
assert(c->devinfo->ver >= 71);
|
||||
struct qreg value = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg res = vir_get_temp(c);
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
/* Ballot uses the MSF mask and the condition mask to
|
||||
* identify active lanes. Particularly, it uses the
|
||||
* condition mask to filter out lanes disabled by
|
||||
* control flow.
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_cond(vir_BALLOT_dest(c, res, value),
|
||||
V3D_QPU_COND_IFA);
|
||||
} else {
|
||||
vir_BALLOT_dest(c, res, value);
|
||||
}
|
||||
|
||||
ntq_store_def(c, &instr->def, 0, vir_MOV(c, res));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_read_invocation: {
|
||||
assert(c->devinfo->ver >= 71);
|
||||
struct qreg value = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg index = ntq_get_src(c, instr->src[1], 0);
|
||||
struct qreg res = vir_SHUFFLE(c, value, index);
|
||||
ntq_store_def(c, &instr->def, 0, vir_MOV(c, res));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_read_first_invocation: {
|
||||
assert(c->devinfo->ver >= 71);
|
||||
struct qreg value = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg res = vir_get_temp(c);
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
/* Bcastf uses the MSF mask and the condition mask to
|
||||
* identify active lanes. Particularly, it uses the
|
||||
* condition mask to filter out lanes disabled by
|
||||
* control flow.
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_cond(vir_BCASTF_dest(c, res, value),
|
||||
V3D_QPU_COND_IFA);
|
||||
} else {
|
||||
vir_BCASTF_dest(c, res, value);
|
||||
}
|
||||
|
||||
ntq_store_def(c, &instr->def, 0, vir_MOV(c, res));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
unreachable("Should have been lowered");
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1568,6 +1568,24 @@ lower_subgroup_intrinsics(struct v3d_compile *c,
|
|||
case nir_intrinsic_load_subgroup_size:
|
||||
case nir_intrinsic_load_subgroup_invocation:
|
||||
case nir_intrinsic_elect:
|
||||
case nir_intrinsic_ballot:
|
||||
case nir_intrinsic_inverse_ballot:
|
||||
case nir_intrinsic_ballot_bitfield_extract:
|
||||
case nir_intrinsic_ballot_bit_count_reduce:
|
||||
case nir_intrinsic_ballot_find_lsb:
|
||||
case nir_intrinsic_ballot_find_msb:
|
||||
case nir_intrinsic_ballot_bit_count_exclusive:
|
||||
case nir_intrinsic_ballot_bit_count_inclusive:
|
||||
case nir_intrinsic_reduce:
|
||||
case nir_intrinsic_inclusive_scan:
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
case nir_intrinsic_read_invocation:
|
||||
case nir_intrinsic_read_first_invocation:
|
||||
case nir_intrinsic_load_subgroup_eq_mask:
|
||||
case nir_intrinsic_load_subgroup_ge_mask:
|
||||
case nir_intrinsic_load_subgroup_gt_mask:
|
||||
case nir_intrinsic_load_subgroup_le_mask:
|
||||
case nir_intrinsic_load_subgroup_lt_mask:
|
||||
c->has_subgroups = true;
|
||||
break;
|
||||
default:
|
||||
|
|
@ -1681,6 +1699,16 @@ v3d_attempt_compile(struct v3d_compile *c)
|
|||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);
|
||||
|
||||
const nir_lower_subgroups_options subgroup_opts = {
|
||||
.subgroup_size = V3D_CHANNELS,
|
||||
.ballot_components = 1,
|
||||
.ballot_bit_size = 32,
|
||||
.lower_to_scalar = true,
|
||||
.lower_inverse_ballot = true,
|
||||
.lower_subgroup_masks = true,
|
||||
};
|
||||
NIR_PASS(_, c->s, nir_lower_subgroups, &subgroup_opts);
|
||||
|
||||
v3d_optimize_nir(c, c->s);
|
||||
|
||||
/* Do late algebraic optimization to turn add(a, neg(b)) back into
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue