mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
anv: Add full subgroups WA for the shaders with barriers in Breaking Limit
When barriers are used in invalid shaders with non-uniform control flow
we might get a hang. Forcing 32-wide group can help by making it more
probable that barrier instruction is executed by at least one channel
in each thread, and thus hang will be avoided. This shouldn't affect
Xe2+, where active-thread-only barriers are used anyway.
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11497
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30581>
(cherry picked from commit 7e52b67801)
This commit is contained in:
parent
eedc72aa87
commit
6d80687b24
6 changed files with 24 additions and 1 deletions
|
|
@ -224,7 +224,7 @@
|
|||
"description": "anv: Add full subgroups WA for the shaders with barriers in Breaking Limit",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ static const driOptionDescription anv_dri_options[] = {
|
|||
DRI_CONF_VK_KHR_PRESENT_WAIT(false)
|
||||
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
|
||||
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
|
||||
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(false)
|
||||
DRI_CONF_ANV_DISABLE_FCV(false)
|
||||
DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true)
|
||||
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
|
||||
|
|
@ -2638,6 +2639,8 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||
|
||||
instance->assume_full_subgroups =
|
||||
driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
|
||||
instance->assume_full_subgroups_with_barrier =
|
||||
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_barrier");
|
||||
instance->limit_trig_input_range =
|
||||
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
|
||||
instance->sample_mask_out_opengl_behaviour =
|
||||
|
|
|
|||
|
|
@ -809,6 +809,9 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
|
|||
const uint8_t afs = device->physical->instance->assume_full_subgroups;
|
||||
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
|
||||
|
||||
const bool afswb = device->physical->instance->assume_full_subgroups_with_barrier;
|
||||
_mesa_sha1_update(&ctx, &afswb, sizeof(afswb));
|
||||
|
||||
_mesa_sha1_update(&ctx, stage->shader_sha1,
|
||||
sizeof(stage->shader_sha1));
|
||||
_mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
|
||||
|
|
@ -988,6 +991,15 @@ anv_fixup_subgroup_size(struct anv_device *device, struct shader_info *info)
|
|||
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
if (device->physical->instance->assume_full_subgroups_with_barrier &&
|
||||
info->stage == MESA_SHADER_COMPUTE &&
|
||||
device->info->verx10 <= 125 &&
|
||||
info->uses_control_barrier &&
|
||||
info->subgroup_size == SUBGROUP_SIZE_VARYING &&
|
||||
local_size &&
|
||||
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
/* If the client requests that we dispatch full subgroups but doesn't
|
||||
* allow us to pick a subgroup size, we have to smash it to the API
|
||||
* value of 32. Performance will likely be terrible in this case but
|
||||
|
|
|
|||
|
|
@ -1289,6 +1289,7 @@ struct anv_instance {
|
|||
* Workarounds for game bugs.
|
||||
*/
|
||||
uint8_t assume_full_subgroups;
|
||||
bool assume_full_subgroups_with_barrier;
|
||||
bool limit_trig_input_range;
|
||||
bool sample_mask_out_opengl_behaviour;
|
||||
bool force_filter_addr_rounding;
|
||||
|
|
|
|||
|
|
@ -1156,6 +1156,9 @@ TODO: document the other workarounds.
|
|||
<application name="Aperture Desk Job" executable="deskjob">
|
||||
<option name="anv_assume_full_subgroups" value="32" />
|
||||
</application>
|
||||
<application name="Breaking Limit" executable="GPUScoreVulkan">
|
||||
<option name="anv_assume_full_subgroups_with_barrier" value="true" />
|
||||
</application>
|
||||
<application name="Brawlhalla" executable="BrawlhallaGame.exe">
|
||||
<option name="hasvk_report_vk_1_3_version" value="true" />
|
||||
</application>
|
||||
|
|
|
|||
|
|
@ -739,6 +739,10 @@
|
|||
DRI_CONF_OPT_I(anv_assume_full_subgroups, def, 0, 32, \
|
||||
"Allow assuming full subgroups requirement even when it's not specified explicitly and set the given size")
|
||||
|
||||
#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(def) \
|
||||
DRI_CONF_OPT_B(anv_assume_full_subgroups_with_barrier, def, \
|
||||
"Assume full subgroups requirement for compute shaders that use control barriers")
|
||||
|
||||
#define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \
|
||||
DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \
|
||||
"Ignore sample mask out when having single sampled target")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue