diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index ec59bfb1ea9..93763c4ae4c 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1090,6 +1090,13 @@ void radv_GetPhysicalDeviceFeatures2( features->timelineSemaphore = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: { + VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features = + (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext; + features->subgroupSizeControl = true; + features->computeFullSubgroups = true; + break; + } default: break; } @@ -1578,6 +1585,21 @@ void radv_GetPhysicalDeviceProperties2( props->maxTimelineSemaphoreValueDifference = UINT64_MAX; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: { + VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props = + (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext; + props->minSubgroupSize = 64; + props->maxSubgroupSize = 64; + props->maxComputeWorkgroupSubgroups = UINT32_MAX; + props->requiredSubgroupSizeStages = 0; + + if (pdevice->rad_info.chip_class >= GFX10) { + /* Only GFX10+ supports wave32. */ + props->minSubgroupSize = 32; + props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT; + } + break; + } default: break; } diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index a081e2da87a..587e9820844 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -138,6 +138,7 @@ EXTENSIONS = [ Extension('VK_EXT_shader_stencil_export', 1, True), Extension('VK_EXT_shader_subgroup_ballot', 1, True), Extension('VK_EXT_shader_subgroup_vote', 1, True), + Extension('VK_EXT_subgroup_size_control', 2, '!device->use_aco'), Extension('VK_EXT_texel_buffer_alignment', 1, True), Extension('VK_EXT_transform_feedback', 1, True), Extension('VK_EXT_vertex_attribute_divisor', 3, True), diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 533e8c4b0fe..a2839b65dd5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2385,17 +2385,27 @@ radv_fill_shader_keys(struct radv_device *device, keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10; keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples; keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples; + + if (nir[MESA_SHADER_COMPUTE]) { + keys[MESA_SHADER_COMPUTE].cs.subgroup_size = key->compute_subgroup_size; + } } static uint8_t radv_get_wave_size(struct radv_device *device, + const VkPipelineShaderStageCreateInfo *pStage, gl_shader_stage stage, const struct radv_shader_variant_key *key) { if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg) return 64; - else if (stage == MESA_SHADER_COMPUTE) + else if (stage == MESA_SHADER_COMPUTE) { + if (key->cs.subgroup_size) { + /* Return the required subgroup size if specified. */ + return key->cs.subgroup_size; + } return device->physical_device->cs_wave_size; + } else if (stage == MESA_SHADER_FRAGMENT) return device->physical_device->ps_wave_size; else @@ -2404,6 +2414,7 @@ radv_get_wave_size(struct radv_device *device, static void radv_fill_shader_info(struct radv_pipeline *pipeline, + const VkPipelineShaderStageCreateInfo **pStages, struct radv_shader_variant_key *keys, struct radv_shader_info *infos, nir_shader **nir) @@ -2505,7 +2516,8 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, for (int i = 0; i < MESA_SHADER_STAGES; i++) { if (nir[i]) infos[i].wave_size = - radv_get_wave_size(pipeline->device, i, &keys[i]); + radv_get_wave_size(pipeline->device, pStages[i], + i, &keys[i]); } } @@ -2712,7 +2724,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_fill_shader_keys(device, keys, key, nir); - radv_fill_shader_info(pipeline, keys, infos, nir); + radv_fill_shader_info(pipeline, pStages, keys, infos, nir); if ((nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) || @@ -5100,12 +5112,23 @@ static struct radv_pipeline_key radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline, const VkComputePipelineCreateInfo *pCreateInfo) { + const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage; struct radv_pipeline_key key; memset(&key, 0, sizeof(key)); if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) key.optimisations_disabled = 1; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size = + vk_find_struct_const(stage->pNext, + PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); + + if (subgroup_size) { + assert(subgroup_size->requiredSubgroupSize == 32 || + subgroup_size->requiredSubgroupSize == 64); + key.compute_subgroup_size = subgroup_size->requiredSubgroupSize; + } + return key; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 99ba500b254..c4d9fe5ce37 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -411,6 +411,11 @@ struct radv_pipeline_key { uint32_t has_multiview_view_index : 1; uint32_t optimisations_disabled : 1; uint8_t topology; + + /* Non-zero if a required subgroup size is specified via + * VK_EXT_subgroup_size_control. + */ + uint8_t compute_subgroup_size; }; struct radv_shader_binary; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0ee28b9aa3b..0482255bed5 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -104,12 +104,17 @@ struct radv_fs_variant_key { uint32_t is_int10; }; +struct radv_cs_variant_key { + uint8_t subgroup_size; +}; + struct radv_shader_variant_key { union { struct radv_vs_variant_key vs; struct radv_fs_variant_key fs; struct radv_tes_variant_key tes; struct radv_tcs_variant_key tcs; + struct radv_cs_variant_key cs; /* A common prefix of the vs and tes keys. */ struct radv_vs_out_key vs_common_out;