From 0a6f8b40bfdf39faaf1ff7def741faf612cf5706 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Tue, 13 Jun 2023 19:48:16 -0700 Subject: [PATCH] anv: Implement VK_KHR_cooperative_matrix v2: Rebase on moving lowering pass to src/intel/compiler. v3: Don't hide the extension behind an environment variable (ANV_COOPERATIVE_MATRIX) now the we have a better solution for setting PIPELINE_SELECT. v4: Prefix type names with INTEL_CMAT_. Suggested by Lionel. Also rebase on f99e43d606e ("anv: switch to use runtime physical device properties infrastructure"). Reviewed-by: Ian Romanick Part-of: --- src/intel/vulkan/anv_device.c | 75 +++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_pipeline.c | 8 ++++ src/intel/vulkan/anv_private.h | 5 +++ 3 files changed, 88 insertions(+) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 4805cb5f26e..7392474428c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -218,6 +218,9 @@ get_device_extensions(const struct anv_physical_device *device, const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing; + const bool cooperative_matrix_enabled = + anv_has_cooperative_matrix(&device->info); + *ext = (struct vk_device_extension_table) { .KHR_8bit_storage = true, .KHR_16bit_storage = !device->instance->no_16bit, @@ -226,6 +229,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_buffer_device_address = true, .KHR_calibrated_timestamps = device->has_reg_timestamp, .KHR_copy_commands2 = true, + .KHR_cooperative_matrix = cooperative_matrix_enabled, .KHR_create_renderpass2 = true, .KHR_dedicated_allocation = true, .KHR_deferred_host_operations = true, @@ -862,6 +866,9 @@ get_features(const struct anv_physical_device *pdevice, .nestedCommandBuffer = true, .nestedCommandBufferRendering = true, .nestedCommandBufferSimultaneousUse = false, + + /* VK_KHR_cooperative_matrix */ + .cooperativeMatrix = anv_has_cooperative_matrix(&pdevice->info), }; /* The new DOOM and Wolfenstein games require depthBounds without @@ -1319,6 +1326,9 @@ get_properties(const struct anv_physical_device *pdevice, .sparseResidencyStandard3DBlockShape = has_sparse_or_fake, .sparseResidencyAlignedMipSize = false, .sparseResidencyNonResidentStrict = has_sparse_or_fake, + + /* VK_KHR_cooperative_matrix */ + .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT, }; snprintf(props->deviceName, sizeof(props->deviceName), @@ -5127,3 +5137,68 @@ anv_device_get_pat_entry(struct anv_device *device, else return &device->info->pat.writecombining; } + +static VkComponentTypeKHR +convert_component_type(enum intel_cooperative_matrix_component_type t) +{ + switch (t) { + case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR; + case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR; + case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR; + case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR; + case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR; + case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR; + } + unreachable("invalid cooperative matrix component type in configuration"); +} + +static VkScopeKHR +convert_scope(mesa_scope scope) +{ + switch (scope) { + case SCOPE_DEVICE: return VK_SCOPE_DEVICE_KHR; + case SCOPE_WORKGROUP: return VK_SCOPE_WORKGROUP_KHR; + case SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR; + case SCOPE_QUEUE_FAMILY: return VK_SCOPE_QUEUE_FAMILY_KHR; + default: + unreachable("invalid cooperative matrix scope in configuration"); + } +} + +VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkCooperativeMatrixPropertiesKHR* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + const struct intel_device_info *devinfo = &pdevice->info; + + assert(anv_has_cooperative_matrix(devinfo)); + + VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount); + + for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) { + const struct intel_cooperative_matrix_configuration *cfg = + &devinfo->cooperative_matrix_configurations[i]; + + if (cfg->scope == SCOPE_NONE) + break; + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) { + prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR; + + prop->MSize = cfg->m; + prop->NSize = cfg->n; + prop->KSize = cfg->k; + + prop->AType = convert_component_type(cfg->a); + prop->BType = convert_component_type(cfg->b); + prop->CType = convert_component_type(cfg->c); + prop->ResultType = convert_component_type(cfg->result); + + prop->scope = convert_scope(cfg->scope); + } + } + + return vk_outarray_status(&out); +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 6683637d484..2e98f17e8ba 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -144,6 +144,7 @@ anv_shader_stage_to_nir(struct anv_device *device, const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing; const struct spirv_to_nir_options spirv_options = { .caps = { + .cooperative_matrix = anv_has_cooperative_matrix(&pdevice->info), .demote_to_helper_invocation = true, .derivative_group = true, .descriptor_array_dynamic_indexing = true, @@ -977,6 +978,13 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, use_primitive_replication); } + if (gl_shader_stage_uses_workgroup(nir->info.stage)) { + // TODO(coop): Write this restriction the right way, maybe we need to do at the backend. + assert(nir->info.subgroup_size <= SUBGROUP_SIZE_REQUIRE_8); + nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8; + NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.subgroup_size); + } + /* The patch control points are delivered through a push constant when * dynamic. */ diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 94798bdbac0..b4407f57cca 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -5804,6 +5804,11 @@ static inline void anv_perfetto_end_submit(struct anv_queue *queue, {} #endif +static bool +anv_has_cooperative_matrix(const struct intel_device_info *info) +{ + return info->cooperative_matrix_configurations[0].scope != SCOPE_NONE; +} #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ VK_FROM_HANDLE(__anv_type, __name, __handle)