nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However,
there is no readlane or readfirstlane primitive. shared registers can
only be written to when just one lane is active. This means that we have
to lower readInvocation(val, id) to something like:

if (gl_SubgroupInvocation == id) {
    scalar_reg = val;
}

return scalar_reg;

However it's a bit difficult to actually get the value of
gl_SubgroupInvocation in the backend, because for compute it requires
some calculations and we don't have any CSE support in the backend. This
intrinsic lets us turn it into
"readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which
point the backend code generation is a lot easier.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>
This commit is contained in:
Connor Abbott 2020-09-14 10:14:55 +02:00 committed by Marge Bot
parent e4e79de2a4
commit cc514bfa0e
4 changed files with 22 additions and 0 deletions

View file

@ -4738,6 +4738,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_quad_broadcast_dynamic:1;
bool lower_quad_broadcast_dynamic_to_const:1;
bool lower_elect:1;
bool lower_read_invocation_to_cond:1;
} nir_lower_subgroups_options;
bool nir_lower_subgroups(nir_shader *shader,

View file

View file

@ -362,6 +362,10 @@ intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
# Returns the value of the first source for the lane where the second source is
# true. The second source must be true for exactly one lane.
intrinsic("read_invocation_cond_ir3", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
# Additional SPIR-V ballot intrinsics
#
# These correspond to the SPIR-V opcodes

View file

@ -493,6 +493,15 @@ lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin,
return dst;
}
static nir_ssa_def *
lower_read_invocation_to_cond(nir_builder *b, nir_intrinsic_instr *intrin)
{
return nir_read_invocation_cond_ir3(b, intrin->dest.ssa.bit_size,
intrin->src[0].ssa,
nir_ieq(b, intrin->src[1].ssa,
nir_load_subgroup_invocation(b)));
}
static nir_ssa_def *
lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
{
@ -524,6 +533,14 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
break;
case nir_intrinsic_read_invocation:
if (options->lower_to_scalar && intrin->num_components > 1)
return lower_subgroup_op_to_scalar(b, intrin, false);
if (options->lower_read_invocation_to_cond)
return lower_read_invocation_to_cond(b, intrin);
break;
case nir_intrinsic_read_first_invocation:
if (options->lower_to_scalar && intrin->num_components > 1)
return lower_subgroup_op_to_scalar(b, intrin, false);