mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 19:48:17 +02:00
tu: Disable 128-wide subgroups on No Man's Sky.
It has a lighting compute shader that assumes either 32 or 64, and that produces vertical banding if the compiler opts for 128. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/15423 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41665>
This commit is contained in:
parent
5acc764c82
commit
4c03db9ec8
5 changed files with 31 additions and 8 deletions
|
|
@ -885,8 +885,8 @@ tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
|
|||
p->deviceNodeMask = 0;
|
||||
p->deviceLUIDValid = false;
|
||||
|
||||
p->subgroupSize = pdevice->info->props.supports_double_threadsize ?
|
||||
pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
|
||||
p->subgroupSize =
|
||||
pdevice->expose_double_threadsize ? pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
|
||||
p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
|
|
@ -1034,8 +1034,8 @@ tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
|
|||
struct vk_properties *p)
|
||||
{
|
||||
p->minSubgroupSize = pdevice->info->threadsize_base;
|
||||
p->maxSubgroupSize = pdevice->info->props.supports_double_threadsize ?
|
||||
pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
|
||||
p->maxSubgroupSize =
|
||||
pdevice->expose_double_threadsize ? pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
|
||||
p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
|
||||
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
|
||||
|
||||
|
|
@ -1164,9 +1164,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
props->maxComputeWorkGroupCount[0] =
|
||||
props->maxComputeWorkGroupCount[1] =
|
||||
props->maxComputeWorkGroupCount[2] = 65535;
|
||||
props->maxComputeWorkGroupInvocations = pdevice->info->props.supports_double_threadsize ?
|
||||
pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
|
||||
pdevice->info->threadsize_base * pdevice->info->max_waves;
|
||||
props->maxComputeWorkGroupInvocations = pdevice->expose_double_threadsize
|
||||
? pdevice->info->threadsize_base * 2 * pdevice->info->max_waves
|
||||
: pdevice->info->threadsize_base * pdevice->info->max_waves;
|
||||
if (pdevice->info->props.is_a702) {
|
||||
props->maxComputeWorkGroupSize[0] =
|
||||
props->maxComputeWorkGroupSize[1] = 512;
|
||||
|
|
@ -1687,6 +1687,8 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
goto fail_free_name;
|
||||
}
|
||||
|
||||
device->expose_double_threadsize = info.props.supports_double_threadsize && !instance->restrict_subgroup_size_64;
|
||||
|
||||
device->level1_dcache_size = util_cache_granularity();
|
||||
device->has_cached_non_coherent_memory =
|
||||
device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
|
||||
|
|
@ -1863,6 +1865,7 @@ static const driOptionDescription tu_dri_options[] = {
|
|||
DRI_CONF_TU_EMULATE_ALPHA_TO_COVERAGE(false)
|
||||
DRI_CONF_TU_AUTOTUNE_ALGORITHM()
|
||||
DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(false)
|
||||
DRI_CONF_TU_RESTRICT_SUBGROUP_SIZE_64(false)
|
||||
DRI_CONF_SECTION_END
|
||||
};
|
||||
|
||||
|
|
@ -1911,6 +1914,7 @@ tu_init_dri_options(struct tu_instance *instance)
|
|||
instance->allow_concurrent_binning =
|
||||
(driQueryOptionb(&instance->dri_options, "tu_allow_concurrent_binning") && !TU_DEBUG(NO_CONCURRENT_BINNING)) ||
|
||||
TU_DEBUG(FORCE_CONCURRENT_BINNING);
|
||||
instance->restrict_subgroup_size_64 = driQueryOptionb(&instance->dri_options, "tu_restrict_subgroup_size_64");
|
||||
}
|
||||
|
||||
static uint32_t instance_count = 0;
|
||||
|
|
|
|||
|
|
@ -145,6 +145,8 @@ struct tu_physical_device
|
|||
|
||||
bool has_preemption;
|
||||
|
||||
bool expose_double_threadsize;
|
||||
|
||||
/* Whether performance counter selector registers can be written by userspace CSes. */
|
||||
bool is_perf_cntr_selectable;
|
||||
|
||||
|
|
@ -231,6 +233,11 @@ struct tu_instance
|
|||
*/
|
||||
bool enable_d24s8_border_color_workaround;
|
||||
|
||||
/* Various games assume that gl_SubgroupSize is either 32 or 64, and we hide
|
||||
* our 128-invocation subgroup support for them.
|
||||
*/
|
||||
bool restrict_subgroup_size_64;
|
||||
|
||||
/* When D24S8 is used without enable_d24s8_border_color_workaround, the
|
||||
* fast border color HW feature results in an incorrect color being used.
|
||||
* However, we want to enable fast border colors for apps that are known
|
||||
|
|
|
|||
|
|
@ -3471,7 +3471,7 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key,
|
|||
struct tu_device *dev)
|
||||
{
|
||||
enum ir3_wavesize_option api_wavesize, real_wavesize;
|
||||
if (!dev->physical_device->info->props.supports_double_threadsize) {
|
||||
if (!dev->physical_device->expose_double_threadsize) {
|
||||
api_wavesize = IR3_SINGLE_ONLY;
|
||||
real_wavesize = IR3_SINGLE_ONLY;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1596,6 +1596,14 @@ TODO: document the other workarounds.
|
|||
<application name="Half-Life: Alyx" application_name_match="hlvr">
|
||||
<option name="tu_emulate_alpha_to_coverage" value="true" />
|
||||
</application>
|
||||
<application name='No Man's Sky' application_name_match='No Man's Sky'>
|
||||
<!-- A lighting CS reducing 8x8 regions with fmin/fmax does a write once per
|
||||
subgroup (assuming 64), or collects the results from two subgroups in the
|
||||
32 subgroup size case. Thus, our 128 subgroup size results in vertical banding
|
||||
when half the 8x8 regions don't get written.
|
||||
-->
|
||||
<option name="tu_restrict_subgroup_size_64" value="true" />
|
||||
</application>
|
||||
</device>
|
||||
|
||||
<device driver="asahi">
|
||||
|
|
|
|||
|
|
@ -668,6 +668,10 @@
|
|||
DRI_CONF_OPT_B(tu_allow_concurrent_binning, def, \
|
||||
"Allow concurrent binning on A7XX+, the CB is disabled by default because it regresses performance on desktop games")
|
||||
|
||||
#define DRI_CONF_TU_RESTRICT_SUBGROUP_SIZE_64(def) \
|
||||
DRI_CONF_OPT_B(tu_restrict_subgroup_size_64, def, \
|
||||
"Restrict subgroup size to 64 (instead of a max of 128) to work around games assuming desktop GPU 32/64 sizes")
|
||||
|
||||
/**
|
||||
* \brief Honeykrisp specific configuration options
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue