mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
broadcom/compiler: support subgroup vote
We don't rely in any lowerings for these (other than scalarization). The only noteworthy aspect is that these instructions, like ballot, use the condition mask to filter out valid invocations that are inactive because of control flow. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27211>
This commit is contained in:
parent
3222fd71a1
commit
69d3b90839
2 changed files with 108 additions and 0 deletions
|
|
@ -3796,6 +3796,110 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_vote_feq:
|
||||
case nir_intrinsic_vote_ieq: {
|
||||
assert(c->devinfo->ver >= 71);
|
||||
struct qreg value = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg res = vir_get_temp(c);
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
/* Alleq uses the MSF mask and the condition mask to
|
||||
* identify active lanes. Particularly, it uses the
|
||||
* condition mask to filter out lanes disabled by
|
||||
* control flow.
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_cond(instr->intrinsic == nir_intrinsic_vote_ieq ?
|
||||
vir_ALLEQ_dest(c, res, value) :
|
||||
vir_ALLFEQ_dest(c, res, value),
|
||||
V3D_QPU_COND_IFA);
|
||||
} else {
|
||||
if (instr->intrinsic == nir_intrinsic_vote_ieq)
|
||||
vir_ALLEQ_dest(c, res, value);
|
||||
else
|
||||
vir_ALLFEQ_dest(c, res, value);
|
||||
}
|
||||
|
||||
/* Produce boolean result */
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), res),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
struct qreg result = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFNA);
|
||||
ntq_store_def(c, &instr->def, 0, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_vote_all: {
|
||||
assert(c->devinfo->ver >= 71);
|
||||
struct qreg value = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg res = vir_get_temp(c);
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
/* Alleq uses the MSF mask and the condition mask to
|
||||
* identify active lanes. Particularly, it uses the
|
||||
* condition mask to filter out lanes disabled by
|
||||
* control flow.
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_cond(vir_ALLEQ_dest(c, res, value),
|
||||
V3D_QPU_COND_IFA);
|
||||
} else {
|
||||
vir_ALLEQ_dest(c, res, value);
|
||||
}
|
||||
|
||||
/* We want to check if 'all lanes are equal (alleq != 0) and
|
||||
* their value is True (value != 0)'.
|
||||
*
|
||||
* The first MOV.pushz generates predicate for 'alleq == 0'.
|
||||
* The second MOV.NORZ generates predicate for:
|
||||
* '!(alleq == 0) & !(value == 0).
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), res),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_uf(c, vir_MOV_dest(c, vir_nop_reg(), value),
|
||||
V3D_QPU_UF_NORZ);
|
||||
struct qreg result =
|
||||
ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA);
|
||||
ntq_store_def(c, &instr->def, 0, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_vote_any: {
|
||||
assert(c->devinfo->ver >= 71);
|
||||
struct qreg value = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg res = vir_get_temp(c);
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
/* Alleq uses the MSF mask and the condition mask to
|
||||
* identify active lanes. Particularly, it uses the
|
||||
* condition mask to filter out lanes disabled by
|
||||
* control flow.
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_cond(vir_ALLEQ_dest(c, res, value),
|
||||
V3D_QPU_COND_IFA);
|
||||
} else {
|
||||
vir_ALLEQ_dest(c, res, value);
|
||||
}
|
||||
|
||||
/* We want to check 'not (all lanes are equal (alleq != 0)'
|
||||
* and their value is False (value == 0))'.
|
||||
*
|
||||
* The first MOV.pushz generates predicate for 'alleq == 0'.
|
||||
* The second MOV.NORNZ generates predicate for:
|
||||
* '!(alleq == 0) & (value == 0).
|
||||
* The IFNA condition negates the predicate when evaluated:
|
||||
* '!(!alleq == 0) & (value == 0))
|
||||
*/
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), res),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
vir_set_uf(c, vir_MOV_dest(c, vir_nop_reg(), value),
|
||||
V3D_QPU_UF_NORNZ);
|
||||
struct qreg result =
|
||||
ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFNA);
|
||||
ntq_store_def(c, &instr->def, 0, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
unreachable("Should have been lowered");
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1590,6 +1590,10 @@ lower_subgroup_intrinsics(struct v3d_compile *c,
|
|||
case nir_intrinsic_shuffle_xor:
|
||||
case nir_intrinsic_shuffle_up:
|
||||
case nir_intrinsic_shuffle_down:
|
||||
case nir_intrinsic_vote_all:
|
||||
case nir_intrinsic_vote_any:
|
||||
case nir_intrinsic_vote_feq:
|
||||
case nir_intrinsic_vote_ieq:
|
||||
c->has_subgroups = true;
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue