diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index dbb54f68a34..64fe4a58664 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -885,8 +885,8 @@ tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
p->deviceNodeMask = 0;
p->deviceLUIDValid = false;
- p->subgroupSize = pdevice->info->props.supports_double_threadsize ?
- pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
+ p->subgroupSize =
+ pdevice->expose_double_threadsize ? pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
@@ -1034,8 +1034,8 @@ tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
struct vk_properties *p)
{
p->minSubgroupSize = pdevice->info->threadsize_base;
- p->maxSubgroupSize = pdevice->info->props.supports_double_threadsize ?
- pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
+ p->maxSubgroupSize =
+ pdevice->expose_double_threadsize ? pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
@@ -1164,9 +1164,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->maxComputeWorkGroupCount[0] =
props->maxComputeWorkGroupCount[1] =
props->maxComputeWorkGroupCount[2] = 65535;
- props->maxComputeWorkGroupInvocations = pdevice->info->props.supports_double_threadsize ?
- pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
- pdevice->info->threadsize_base * pdevice->info->max_waves;
+ props->maxComputeWorkGroupInvocations = pdevice->expose_double_threadsize
+ ? pdevice->info->threadsize_base * 2 * pdevice->info->max_waves
+ : pdevice->info->threadsize_base * pdevice->info->max_waves;
if (pdevice->info->props.is_a702) {
props->maxComputeWorkGroupSize[0] =
props->maxComputeWorkGroupSize[1] = 512;
@@ -1687,6 +1687,8 @@ tu_physical_device_init(struct tu_physical_device *device,
goto fail_free_name;
}
+ device->expose_double_threadsize = info.props.supports_double_threadsize && !instance->restrict_subgroup_size_64;
+
device->level1_dcache_size = util_cache_granularity();
device->has_cached_non_coherent_memory =
device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
@@ -1863,6 +1865,7 @@ static const driOptionDescription tu_dri_options[] = {
DRI_CONF_TU_EMULATE_ALPHA_TO_COVERAGE(false)
DRI_CONF_TU_AUTOTUNE_ALGORITHM()
DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(false)
+ DRI_CONF_TU_RESTRICT_SUBGROUP_SIZE_64(false)
DRI_CONF_SECTION_END
};
@@ -1911,6 +1914,7 @@ tu_init_dri_options(struct tu_instance *instance)
instance->allow_concurrent_binning =
(driQueryOptionb(&instance->dri_options, "tu_allow_concurrent_binning") && !TU_DEBUG(NO_CONCURRENT_BINNING)) ||
TU_DEBUG(FORCE_CONCURRENT_BINNING);
+ instance->restrict_subgroup_size_64 = driQueryOptionb(&instance->dri_options, "tu_restrict_subgroup_size_64");
}
static uint32_t instance_count = 0;
diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h
index db22f014fbc..0b3f03f9513 100644
--- a/src/freedreno/vulkan/tu_device.h
+++ b/src/freedreno/vulkan/tu_device.h
@@ -145,6 +145,8 @@ struct tu_physical_device
bool has_preemption;
+ bool expose_double_threadsize;
+
/* Whether performance counter selector registers can be written by userspace CSes. */
bool is_perf_cntr_selectable;
@@ -231,6 +233,11 @@ struct tu_instance
*/
bool enable_d24s8_border_color_workaround;
+ /* Various games assume that gl_SubgroupSize is either 32 or 64, and we hide
+ * our 128-invocation subgroup support for them.
+ */
+ bool restrict_subgroup_size_64;
+
/* When D24S8 is used without enable_d24s8_border_color_workaround, the
* fast border color HW feature results in an incorrect color being used.
* However, we want to enable fast border colors for apps that are known
diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc
index af06245ffb7..2474c639764 100644
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@@ -3471,7 +3471,7 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key,
struct tu_device *dev)
{
enum ir3_wavesize_option api_wavesize, real_wavesize;
- if (!dev->physical_device->info->props.supports_double_threadsize) {
+ if (!dev->physical_device->expose_double_threadsize) {
api_wavesize = IR3_SINGLE_ONLY;
real_wavesize = IR3_SINGLE_ONLY;
} else {
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index 9d98dcdbc48..bd899222caa 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -1596,6 +1596,14 @@ TODO: document the other workarounds.
+
+
+
+
diff --git a/src/util/driconf.h b/src/util/driconf.h
index 4f58e51a414..68f51c96b52 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -668,6 +668,10 @@
DRI_CONF_OPT_B(tu_allow_concurrent_binning, def, \
"Allow concurrent binning on A7XX+, the CB is disabled by default because it regresses performance on desktop games")
+#define DRI_CONF_TU_RESTRICT_SUBGROUP_SIZE_64(def) \
+ DRI_CONF_OPT_B(tu_restrict_subgroup_size_64, def, \
+ "Restrict subgroup size to 64 (instead of a max of 128) to work around games assuming desktop GPU 32/64 sizes")
+
/**
* \brief Honeykrisp specific configuration options
*/