nir/spirv: Add inverse_ballot intrinsic

This is actually a no-op on AMD, so we really don't want to lower it to
something more complicated.  There may be a more efficient way to do
this on Intel too. In addition, in the future we'll want to use this for
lowering boolean reduce operations, where the inverse ballot will
operate on the backend's "natural" ballot type as indicated by
options->ballot_bit_size, instead of uvec4 as produced by SPIR-V. In
total, there are now three possible lowerings we may have to perform:

- inverse_ballot with source type of uvec4 from SPIR-V to inverse_ballot
with natural source type, when the backend supports inverse_ballot
natively.
- inverse_ballot with source type of uvec4 from SPIR-V to arithmetic,
when the backend doesn't support inverse_ballot.
- inverse_ballot with natural source type from reduce operation, when
the backend doesn't support inverse_ballot.

Previously we just did the second lowering unconditionally in vtn, but
it's just a combination of the first and third. We add support here for
the first and third lowerings in nir_lower_subgroups, instead of simply
moving the second lowering, to avoid unnecessary churn.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>
This commit is contained in:
Connor Abbott 2019-02-01 11:37:50 +01:00 committed by Marge Bot
parent 0ef87f148d
commit 4282386311
14 changed files with 25 additions and 16 deletions

View file

@ -638,6 +638,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.lower_quad_broadcast_dynamic_to_const = gfx7minus,
.lower_shuffle_to_swizzle_amd = 1,
.lower_ballot_bit_count_to_mbcnt_amd = 1,
.lower_inverse_ballot = 1,
});
NIR_PASS(_, nir, nir_lower_load_const_to_scalar);

View file

@ -5369,6 +5369,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_read_invocation_to_cond : 1;
bool lower_rotate_to_shuffle : 1;
bool lower_ballot_bit_count_to_mbcnt_amd : 1;
bool lower_inverse_ballot : 1;
} nir_lower_subgroups_options;
bool nir_lower_subgroups(nir_shader *shader,

View file

@ -478,6 +478,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
break;
/* Intrinsics which are always divergent */
case nir_intrinsic_inverse_ballot:
case nir_intrinsic_load_color0:
case nir_intrinsic_load_color1:
case nir_intrinsic_load_param:

View file

@ -446,9 +446,11 @@ intrinsic("read_invocation_cond_ir3", src_comp=[0, 1], dest_comp=0, flags=[CAN_E
#
# OpGroupNonUniformElect
# OpSubgroupFirstInvocationKHR
# OpGroupNonUniformInverseBallot
intrinsic("elect", dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
barrier("begin_invocation_interlock")
barrier("end_invocation_interlock")

View file

@ -683,6 +683,16 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
intrin->def.bit_size);
}
case nir_intrinsic_inverse_ballot:
if (options->lower_inverse_ballot) {
return nir_ballot_bitfield_extract(b, 1, intrin->src[0].ssa,
nir_load_subgroup_invocation(b));
} else if (intrin->src[0].ssa->num_components != options->ballot_components ||
intrin->src[0].ssa->bit_size != options->ballot_bit_size) {
return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options));
}
break;
case nir_intrinsic_ballot_bitfield_extract:
case nir_intrinsic_ballot_bit_count_reduce:
case nir_intrinsic_ballot_find_lsb:

View file

@ -103,22 +103,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
}
case SpvOpGroupNonUniformInverseBallot: {
/* This one is just a BallotBitfieldExtract with subgroup invocation.
* We could add a NIR intrinsic but it's easier to just lower it on the
* spot.
*/
nir_intrinsic_instr *intrin =
nir_intrinsic_instr_create(b->nb.shader,
nir_intrinsic_ballot_bitfield_extract);
intrin->src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));
nir_def_init_for_type(&intrin->instr, &intrin->def,
dest_type->type);
nir_builder_instr_insert(&b->nb, &intrin->instr);
vtn_push_nir_ssa(b, w[2], &intrin->def);
nir_def *dest = nir_inverse_ballot(&b->nb, 1, vtn_get_nir_ssa(b, w[4]));
vtn_push_nir_ssa(b, w[2], dest);
break;
}

View file

@ -564,6 +564,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
.lower_read_invocation_to_cond = true,
.lower_shuffle = true,
.lower_relative_shuffle = true,
.lower_inverse_ballot = true,
};
if (!((s->info.stage == MESA_SHADER_COMPUTE) ||

View file

@ -2996,6 +2996,7 @@ lp_build_opt_nir(struct nir_shader *nir)
.lower_to_scalar = true,
.lower_subgroup_masks = true,
.lower_relative_shuffle = true,
.lower_inverse_ballot = true,
};
NIR_PASS(progress, nir, nir_lower_subgroups, &subgroups_options);
} while (progress);

View file

@ -254,6 +254,7 @@ const nir_lower_subgroups_options si_nir_subgroups_options = {
.lower_subgroup_masks = true,
.lower_vote_trivial = false,
.lower_vote_eq = true,
.lower_inverse_ballot = true,
};
/**

View file

@ -5423,6 +5423,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
subgroup_options.subgroup_size = 1;
subgroup_options.lower_vote_trivial = true;
}
subgroup_options.lower_inverse_ballot = true;
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
}

View file

@ -401,6 +401,7 @@ lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_
subgroup_opts.lower_quad = true;
subgroup_opts.ballot_components = 1;
subgroup_opts.ballot_bit_size = 32;
subgroup_opts.lower_inverse_ballot = true;
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
if (nir->info.stage == MESA_SHADER_FRAGMENT)

View file

@ -1000,6 +1000,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
.lower_relative_shuffle = true,
.lower_quad_broadcast_dynamic = true,
.lower_elect = true,
.lower_inverse_ballot = true,
};
OPT(nir_lower_subgroups, &subgroups_options);

View file

@ -971,6 +971,7 @@ dxil_spirv_nir_passes(nir_shader *nir,
.lower_subgroup_masks = true,
.lower_to_scalar = true,
.lower_relative_shuffle = true,
.lower_inverse_ballot = true,
};
if (nir->info.stage != MESA_SHADER_FRAGMENT &&
nir->info.stage != MESA_SHADER_COMPUTE)

View file

@ -3242,6 +3242,7 @@ Converter::run()
subgroup_options.ballot_bit_size = 32;
subgroup_options.ballot_components = 1;
subgroup_options.lower_elect = true;
subgroup_options.lower_inverse_ballot = true;
unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) |
(nir->options->lower_flrp32 ? 32 : 0) |