mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 20:00:11 +01:00
anv: Add full subgroups workaround for the shaders that use shared memory
This workaround is similar to anv_assume_full_subgroups, but it applies
to the shaders that use shared memory. If they rely on the implicit
synchronization, and we choose a smaller group size than the
(broken) shader expects, it will produce incorrect results.
Cc: mesa-stable
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23408>
(cherry picked from commit 369aec5704)
This commit is contained in:
parent
3be28b42e2
commit
090dbbc995
5 changed files with 25 additions and 1 deletions
|
|
@ -2014,7 +2014,7 @@
|
|||
"description": "anv: Add full subgroups workaround for the shaders that use shared memory",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ static const driOptionDescription anv_dri_options[] = {
|
|||
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
|
||||
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
|
||||
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(false)
|
||||
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_SHARED_MEMORY(false)
|
||||
DRI_CONF_ANV_DISABLE_FCV(false)
|
||||
DRI_CONF_ANV_ENABLE_BUFFER_COMP(false)
|
||||
DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true)
|
||||
|
|
@ -141,6 +142,8 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||
driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
|
||||
instance->assume_full_subgroups_with_barrier =
|
||||
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_barrier");
|
||||
instance->assume_full_subgroups_with_shared_memory =
|
||||
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_shared_memory");
|
||||
instance->limit_trig_input_range =
|
||||
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
|
||||
instance->sample_mask_out_opengl_behaviour =
|
||||
|
|
|
|||
|
|
@ -648,6 +648,9 @@ anv_pipeline_hash_graphics(struct anv_graphics_base_pipeline *pipeline,
|
|||
if (stages[MESA_SHADER_MESH].info || stages[MESA_SHADER_TASK].info) {
|
||||
const uint8_t afs = device->physical->instance->assume_full_subgroups;
|
||||
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
|
||||
|
||||
const bool afs_shm = device->physical->instance->assume_full_subgroups_with_shared_memory;
|
||||
_mesa_sha1_update(&ctx, &afs_shm, sizeof(afs_shm));
|
||||
}
|
||||
|
||||
_mesa_sha1_final(&ctx, sha1_out);
|
||||
|
|
@ -670,6 +673,9 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
|
|||
const bool afswb = device->physical->instance->assume_full_subgroups_with_barrier;
|
||||
_mesa_sha1_update(&ctx, &afswb, sizeof(afswb));
|
||||
|
||||
const bool afs_shm = device->physical->instance->assume_full_subgroups_with_shared_memory;
|
||||
_mesa_sha1_update(&ctx, &afs_shm, sizeof(afs_shm));
|
||||
|
||||
_mesa_sha1_update(&ctx, stage->shader_sha1,
|
||||
sizeof(stage->shader_sha1));
|
||||
_mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
|
||||
|
|
@ -859,6 +865,16 @@ anv_fixup_subgroup_size(struct anv_device *device, struct shader_info *info)
|
|||
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
/* Similarly, sometimes games rely on the implicit synchronization of
|
||||
* the shared memory accesses, and choosing smaller subgroups than the game
|
||||
* expects will cause bugs. */
|
||||
if (device->physical->instance->assume_full_subgroups_with_shared_memory &&
|
||||
info->shared_size > 0 &&
|
||||
info->subgroup_size == SUBGROUP_SIZE_VARYING &&
|
||||
local_size &&
|
||||
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
/* If the client requests that we dispatch full subgroups but doesn't
|
||||
* allow us to pick a subgroup size, we have to smash it to the API
|
||||
* value of 32. Performance will likely be terrible in this case but
|
||||
|
|
|
|||
|
|
@ -1295,6 +1295,7 @@ struct anv_instance {
|
|||
*/
|
||||
uint8_t assume_full_subgroups;
|
||||
bool assume_full_subgroups_with_barrier;
|
||||
bool assume_full_subgroups_with_shared_memory;
|
||||
bool limit_trig_input_range;
|
||||
bool sample_mask_out_opengl_behaviour;
|
||||
bool force_filter_addr_rounding;
|
||||
|
|
|
|||
|
|
@ -794,6 +794,10 @@
|
|||
DRI_CONF_OPT_B(anv_assume_full_subgroups_with_barrier, def, \
|
||||
"Assume full subgroups requirement for compute shaders that use control barriers")
|
||||
|
||||
#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_SHARED_MEMORY(def) \
|
||||
DRI_CONF_OPT_B(anv_assume_full_subgroups_with_shared_memory, def, \
|
||||
"Allow assuming full subgroups requirement for shaders using shared memory even when it's not specified explicitly")
|
||||
|
||||
#define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \
|
||||
DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \
|
||||
"Ignore sample mask out when having single sampled target")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue