anv: Add full subgroups WA for the shaders with barriers in Breaking Limit

When barriers are used in invalid shaders with non-uniform control flow
we might get a hang. Forcing 32-wide group can help by making it more
probable that barrier instruction is executed by at least one channel
in each thread, and thus hang will be avoided. This shouldn't affect
Xe2+, where active-thread-only barriers are used anyway.

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11497
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30581>
(cherry picked from commit 7e52b67801)
This commit is contained in:
Sviatoslav Peleshko 2024-08-09 00:36:23 +03:00 committed by Eric Engestrom
parent eedc72aa87
commit 6d80687b24
6 changed files with 24 additions and 1 deletions

View file

@ -224,7 +224,7 @@
"description": "anv: Add full subgroups WA for the shaders with barriers in Breaking Limit",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -77,6 +77,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_VK_KHR_PRESENT_WAIT(false)
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(false)
DRI_CONF_ANV_DISABLE_FCV(false)
DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true)
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
@ -2638,6 +2639,8 @@ anv_init_dri_options(struct anv_instance *instance)
instance->assume_full_subgroups =
driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
instance->assume_full_subgroups_with_barrier =
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_barrier");
instance->limit_trig_input_range =
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
instance->sample_mask_out_opengl_behaviour =

View file

@ -809,6 +809,9 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
const uint8_t afs = device->physical->instance->assume_full_subgroups;
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
const bool afswb = device->physical->instance->assume_full_subgroups_with_barrier;
_mesa_sha1_update(&ctx, &afswb, sizeof(afswb));
_mesa_sha1_update(&ctx, stage->shader_sha1,
sizeof(stage->shader_sha1));
_mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
@ -988,6 +991,15 @@ anv_fixup_subgroup_size(struct anv_device *device, struct shader_info *info)
local_size % BRW_SUBGROUP_SIZE == 0)
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
if (device->physical->instance->assume_full_subgroups_with_barrier &&
info->stage == MESA_SHADER_COMPUTE &&
device->info->verx10 <= 125 &&
info->uses_control_barrier &&
info->subgroup_size == SUBGROUP_SIZE_VARYING &&
local_size &&
local_size % BRW_SUBGROUP_SIZE == 0)
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
/* If the client requests that we dispatch full subgroups but doesn't
* allow us to pick a subgroup size, we have to smash it to the API
* value of 32. Performance will likely be terrible in this case but

View file

@ -1289,6 +1289,7 @@ struct anv_instance {
* Workarounds for game bugs.
*/
uint8_t assume_full_subgroups;
bool assume_full_subgroups_with_barrier;
bool limit_trig_input_range;
bool sample_mask_out_opengl_behaviour;
bool force_filter_addr_rounding;

View file

@ -1156,6 +1156,9 @@ TODO: document the other workarounds.
<application name="Aperture Desk Job" executable="deskjob">
<option name="anv_assume_full_subgroups" value="32" />
</application>
<application name="Breaking Limit" executable="GPUScoreVulkan">
<option name="anv_assume_full_subgroups_with_barrier" value="true" />
</application>
<application name="Brawlhalla" executable="BrawlhallaGame.exe">
<option name="hasvk_report_vk_1_3_version" value="true" />
</application>

View file

@ -739,6 +739,10 @@
DRI_CONF_OPT_I(anv_assume_full_subgroups, def, 0, 32, \
"Allow assuming full subgroups requirement even when it's not specified explicitly and set the given size")
#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(def) \
DRI_CONF_OPT_B(anv_assume_full_subgroups_with_barrier, def, \
"Assume full subgroups requirement for compute shaders that use control barriers")
#define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \
DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \
"Ignore sample mask out when having single sampled target")