diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index dbb54f68a34..64fe4a58664 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -885,8 +885,8 @@ tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice, p->deviceNodeMask = 0; p->deviceLUIDValid = false; - p->subgroupSize = pdevice->info->props.supports_double_threadsize ? - pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base; + p->subgroupSize = + pdevice->expose_double_threadsize ? pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base; p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT; p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | @@ -1034,8 +1034,8 @@ tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice, struct vk_properties *p) { p->minSubgroupSize = pdevice->info->threadsize_base; - p->maxSubgroupSize = pdevice->info->props.supports_double_threadsize ? - pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base; + p->maxSubgroupSize = + pdevice->expose_double_threadsize ? pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base; p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves; p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL; @@ -1164,9 +1164,9 @@ tu_get_properties(struct tu_physical_device *pdevice, props->maxComputeWorkGroupCount[0] = props->maxComputeWorkGroupCount[1] = props->maxComputeWorkGroupCount[2] = 65535; - props->maxComputeWorkGroupInvocations = pdevice->info->props.supports_double_threadsize ? - pdevice->info->threadsize_base * 2 * pdevice->info->max_waves : - pdevice->info->threadsize_base * pdevice->info->max_waves; + props->maxComputeWorkGroupInvocations = pdevice->expose_double_threadsize + ? pdevice->info->threadsize_base * 2 * pdevice->info->max_waves + : pdevice->info->threadsize_base * pdevice->info->max_waves; if (pdevice->info->props.is_a702) { props->maxComputeWorkGroupSize[0] = props->maxComputeWorkGroupSize[1] = 512; @@ -1687,6 +1687,8 @@ tu_physical_device_init(struct tu_physical_device *device, goto fail_free_name; } + device->expose_double_threadsize = info.props.supports_double_threadsize && !instance->restrict_subgroup_size_64; + device->level1_dcache_size = util_cache_granularity(); device->has_cached_non_coherent_memory = device->level1_dcache_size > 0 && !DETECT_ARCH_ARM; @@ -1863,6 +1865,7 @@ static const driOptionDescription tu_dri_options[] = { DRI_CONF_TU_EMULATE_ALPHA_TO_COVERAGE(false) DRI_CONF_TU_AUTOTUNE_ALGORITHM() DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(false) + DRI_CONF_TU_RESTRICT_SUBGROUP_SIZE_64(false) DRI_CONF_SECTION_END }; @@ -1911,6 +1914,7 @@ tu_init_dri_options(struct tu_instance *instance) instance->allow_concurrent_binning = (driQueryOptionb(&instance->dri_options, "tu_allow_concurrent_binning") && !TU_DEBUG(NO_CONCURRENT_BINNING)) || TU_DEBUG(FORCE_CONCURRENT_BINNING); + instance->restrict_subgroup_size_64 = driQueryOptionb(&instance->dri_options, "tu_restrict_subgroup_size_64"); } static uint32_t instance_count = 0; diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index db22f014fbc..0b3f03f9513 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -145,6 +145,8 @@ struct tu_physical_device bool has_preemption; + bool expose_double_threadsize; + /* Whether performance counter selector registers can be written by userspace CSes. */ bool is_perf_cntr_selectable; @@ -231,6 +233,11 @@ struct tu_instance */ bool enable_d24s8_border_color_workaround; + /* Various games assume that gl_SubgroupSize is either 32 or 64, and we hide + * our 128-invocation subgroup support for them. + */ + bool restrict_subgroup_size_64; + /* When D24S8 is used without enable_d24s8_border_color_workaround, the * fast border color HW feature results in an incorrect color being used. * However, we want to enable fast border colors for apps that are known diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index af06245ffb7..2474c639764 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -3471,7 +3471,7 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key, struct tu_device *dev) { enum ir3_wavesize_option api_wavesize, real_wavesize; - if (!dev->physical_device->info->props.supports_double_threadsize) { + if (!dev->physical_device->expose_double_threadsize) { api_wavesize = IR3_SINGLE_ONLY; real_wavesize = IR3_SINGLE_ONLY; } else { diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index 9d98dcdbc48..bd899222caa 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -1596,6 +1596,14 @@ TODO: document the other workarounds. + + + diff --git a/src/util/driconf.h b/src/util/driconf.h index 4f58e51a414..68f51c96b52 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -668,6 +668,10 @@ DRI_CONF_OPT_B(tu_allow_concurrent_binning, def, \ "Allow concurrent binning on A7XX+, the CB is disabled by default because it regresses performance on desktop games") +#define DRI_CONF_TU_RESTRICT_SUBGROUP_SIZE_64(def) \ + DRI_CONF_OPT_B(tu_restrict_subgroup_size_64, def, \ + "Restrict subgroup size to 64 (instead of a max of 128) to work around games assuming desktop GPU 32/64 sizes") + /** * \brief Honeykrisp specific configuration options */