From 91ebeddbad6cc39a098b95eee9efe953e32dca64 Mon Sep 17 00:00:00 2001 From: Olivia Lee Date: Wed, 11 Jun 2025 03:10:20 -0700 Subject: [PATCH] panvk: make extensions, features, and properties per-arch This makes things a little more flexible for groups of fields that are arch-dependent, and allows us to use existing per-arch constant macros instead of open-coding their values. Signed-off-by: Olivia Lee Reviewed-by: Boris Brezillon Acked-by: Erik Faye-Lund Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- src/panfrost/vulkan/meson.build | 1 + src/panfrost/vulkan/panvk_physical_device.c | 1030 +---------------- src/panfrost/vulkan/panvk_physical_device.h | 22 + .../vulkan/panvk_vX_physical_device.c | 947 +++++++++++++++ 4 files changed, 1030 insertions(+), 970 deletions(-) create mode 100644 src/panfrost/vulkan/panvk_vX_physical_device.c diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index 724418eda9b..aaaf16264b5 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -111,6 +111,7 @@ common_per_arch_files = [ 'panvk_vX_descriptor_set.c', 'panvk_vX_descriptor_set_layout.c', 'panvk_vX_device.c', + 'panvk_vX_physical_device.c', 'panvk_vX_precomp_cache.c', 'panvk_vX_query_pool.c', 'panvk_vX_image_view.c', diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index 8b69067c60c..f8e65fa047a 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -11,7 +11,6 @@ #include #include -#include #include "util/disk_cache.h" #include "git_sha1.h" @@ -19,9 +18,7 @@ #include "vk_device.h" #include "vk_drm_syncobj.h" #include "vk_format.h" -#include "vk_limits.h" #include "vk_log.h" -#include "vk_shader_module.h" #include "vk_util.h" #include "panvk_device.h" @@ -30,18 +27,38 @@ #include "panvk_physical_device.h" #include "panvk_wsi.h" -#include "pan_format.h" #include "pan_props.h" #include "genxml/gen_macros.h" -#define ARM_VENDOR_ID 0x13b5 -#define MAX_PUSH_DESCRIPTORS 32 -/* We reserve one ubo for push constant, one for sysvals and one per-set for the - * descriptor metadata */ -#define RESERVED_UBO_COUNT 6 -#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT -#define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16) +#define PER_ARCH_FUNCS(_ver) \ + void panvk_v##_ver##_get_physical_device_extensions( \ + const struct panvk_physical_device *device, \ + struct vk_device_extension_table *ext); \ + \ + void panvk_v##_ver##_get_physical_device_features( \ + const struct panvk_instance *instance, \ + const struct panvk_physical_device *device, \ + struct vk_features *features); \ + \ + void panvk_v##_ver##_get_physical_device_properties( \ + const struct panvk_instance *instance, \ + const struct panvk_physical_device *device, \ + struct vk_properties *properties); \ + \ + VkResult panvk_v##_ver##_create_device( \ + struct panvk_physical_device *physical_device, \ + const VkDeviceCreateInfo *pCreateInfo, \ + const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); \ + \ + void panvk_v##_ver##_destroy_device( \ + struct panvk_device *device, const VkAllocationCallbacks *pAllocator) + +PER_ARCH_FUNCS(6); +PER_ARCH_FUNCS(7); +PER_ARCH_FUNCS(10); +PER_ARCH_FUNCS(12); +PER_ARCH_FUNCS(13); static VkResult create_kmod_dev(struct panvk_physical_device *device, @@ -203,946 +220,6 @@ get_device_sync_types(struct panvk_physical_device *device, return VK_SUCCESS; } -static void -get_device_extensions(const struct panvk_physical_device *device, - struct vk_device_extension_table *ext) -{ - const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); - - bool has_vk1_1 = arch >= 10; - bool has_vk1_2 = arch >= 10; - - *ext = (struct vk_device_extension_table){ - .KHR_8bit_storage = true, - .KHR_16bit_storage = true, - .KHR_bind_memory2 = true, - .KHR_buffer_device_address = true, - .KHR_copy_commands2 = true, - .KHR_create_renderpass2 = true, - .KHR_dedicated_allocation = true, - .KHR_descriptor_update_template = true, - .KHR_depth_clamp_zero_one = true, - .KHR_depth_stencil_resolve = true, - .KHR_device_group = true, - .KHR_draw_indirect_count = arch >= 10, - .KHR_driver_properties = true, - .KHR_dynamic_rendering = true, - .KHR_dynamic_rendering_local_read = true, - .KHR_external_fence = true, - .KHR_external_fence_fd = true, - .KHR_external_memory = true, - .KHR_external_memory_fd = true, - .KHR_external_semaphore = true, - .KHR_external_semaphore_fd = true, - .KHR_format_feature_flags2 = true, - .KHR_get_memory_requirements2 = true, - .KHR_global_priority = true, - .KHR_image_format_list = true, - .KHR_imageless_framebuffer = true, - .KHR_index_type_uint8 = true, - .KHR_line_rasterization = true, - .KHR_load_store_op_none = true, - .KHR_maintenance1 = true, - .KHR_maintenance2 = true, - .KHR_maintenance3 = true, - .KHR_maintenance4 = has_vk1_1, - .KHR_maintenance5 = has_vk1_1, - .KHR_map_memory2 = true, - .KHR_multiview = true, - .KHR_pipeline_executable_properties = true, - .KHR_pipeline_library = true, - .KHR_push_descriptor = true, - .KHR_relaxed_block_layout = true, - .KHR_sampler_mirror_clamp_to_edge = true, - .KHR_sampler_ycbcr_conversion = arch >= 10, - .KHR_separate_depth_stencil_layouts = true, - .KHR_shader_draw_parameters = true, - .KHR_shader_expect_assume = true, - .KHR_shader_float_controls = true, - .KHR_shader_float_controls2 = has_vk1_1, - .KHR_shader_float16_int8 = true, - .KHR_shader_integer_dot_product = true, - .KHR_shader_maximal_reconvergence = has_vk1_1, - .KHR_shader_non_semantic_info = true, - .KHR_shader_quad_control = has_vk1_2, - .KHR_shader_relaxed_extended_instruction = true, - .KHR_shader_subgroup_extended_types = has_vk1_1, - .KHR_shader_subgroup_rotate = true, - .KHR_shader_subgroup_uniform_control_flow = has_vk1_1, - .KHR_shader_terminate_invocation = true, - .KHR_spirv_1_4 = arch >= 10, - .KHR_storage_buffer_storage_class = true, -#ifdef PANVK_USE_WSI_PLATFORM - .KHR_present_id2 = true, - .KHR_present_wait2 = true, - .KHR_swapchain = true, -#endif - .KHR_synchronization2 = true, - .KHR_timeline_semaphore = true, - .KHR_uniform_buffer_standard_layout = true, - .KHR_variable_pointers = true, - .KHR_vertex_attribute_divisor = true, - .KHR_vulkan_memory_model = true, - .KHR_zero_initialize_workgroup_memory = true, - .EXT_4444_formats = true, - .EXT_border_color_swizzle = true, - .EXT_buffer_device_address = true, - .EXT_custom_border_color = true, - .EXT_depth_bias_control = true, - .EXT_depth_clamp_zero_one = true, - .EXT_depth_clip_enable = true, - .EXT_depth_clip_control = true, -#ifdef VK_USE_PLATFORM_DISPLAY_KHR - .EXT_display_control = true, -#endif - .EXT_extended_dynamic_state = true, - .EXT_extended_dynamic_state2 = true, - .EXT_external_memory_dma_buf = true, - .EXT_global_priority = true, - .EXT_global_priority_query = true, - .EXT_graphics_pipeline_library = true, - .EXT_hdr_metadata = true, - .EXT_host_query_reset = true, - .EXT_image_2d_view_of_3d = true, - /* EXT_image_drm_format_modifier depends on KHR_sampler_ycbcr_conversion */ - .EXT_image_drm_format_modifier = arch >= 10, - .EXT_image_robustness = true, - .EXT_index_type_uint8 = true, - .EXT_line_rasterization = true, - .EXT_load_store_op_none = true, - .EXT_physical_device_drm = true, - .EXT_pipeline_creation_cache_control = true, - .EXT_pipeline_creation_feedback = true, - .EXT_pipeline_robustness = true, - .EXT_private_data = true, - .EXT_primitive_topology_list_restart = true, - .EXT_provoking_vertex = true, - .EXT_queue_family_foreign = true, - .EXT_sampler_filter_minmax = arch >= 10, - .EXT_scalar_block_layout = true, - .EXT_separate_stencil_usage = true, - .EXT_shader_module_identifier = true, - .EXT_shader_demote_to_helper_invocation = true, - .EXT_shader_replicated_composites = true, - .EXT_shader_subgroup_ballot = true, - .EXT_shader_subgroup_vote = true, - .EXT_subgroup_size_control = has_vk1_1, - .EXT_texel_buffer_alignment = true, - .EXT_texture_compression_astc_hdr = true, - .EXT_tooling_info = true, - .EXT_vertex_attribute_divisor = true, - .EXT_vertex_input_dynamic_state = true, - .EXT_ycbcr_2plane_444_formats = arch >= 10, - .EXT_ycbcr_image_arrays = arch >= 10, - .EXT_inline_uniform_block = true, - .GOOGLE_decorate_string = true, - .GOOGLE_hlsl_functionality1 = true, - .GOOGLE_user_type = true, - }; -} - -static bool -has_compressed_formats(const struct panvk_physical_device *physical_device, - const uint32_t required_formats) -{ - uint32_t supported_compr_fmts = - pan_query_compressed_formats(&physical_device->kmod.props); - - return (supported_compr_fmts & required_formats) == required_formats; -} - -static bool -has_texture_compression_etc2(const struct panvk_physical_device *physical_device) -{ - return has_compressed_formats(physical_device, - BITFIELD_BIT(MALI_ETC2_RGB8) | - BITFIELD_BIT(MALI_ETC2_RGB8A1) | BITFIELD_BIT(MALI_ETC2_RGBA8) | - BITFIELD_BIT(MALI_ETC2_R11_UNORM) | BITFIELD_BIT(MALI_ETC2_R11_SNORM) | - BITFIELD_BIT(MALI_ETC2_RG11_UNORM) | BITFIELD_BIT(MALI_ETC2_RG11_SNORM)); -} - -static bool -has_texture_compression_astc_ldr(const struct panvk_physical_device *physical_device) -{ - return has_compressed_formats(physical_device, BITFIELD_BIT(MALI_ASTC_2D_LDR)); -} - -static bool -has_texture_compression_astc_hdr(const struct panvk_physical_device *physical_device) -{ - return has_compressed_formats(physical_device, BITFIELD_BIT(MALI_ASTC_2D_HDR)); -} - -static bool -has_texture_compression_bc(const struct panvk_physical_device *physical_device) -{ - return has_compressed_formats(physical_device, - BITFIELD_BIT(MALI_BC1_UNORM) | BITFIELD_BIT(MALI_BC2_UNORM) | - BITFIELD_BIT(MALI_BC3_UNORM) | BITFIELD_BIT(MALI_BC4_UNORM) | - BITFIELD_BIT(MALI_BC4_SNORM) | BITFIELD_BIT(MALI_BC5_UNORM) | - BITFIELD_BIT(MALI_BC5_SNORM) | BITFIELD_BIT(MALI_BC6H_SF16) | - BITFIELD_BIT(MALI_BC6H_UF16) | BITFIELD_BIT(MALI_BC7_UNORM)); -} - -static void -get_features(const struct panvk_instance *instance, - const struct panvk_physical_device *device, - struct vk_features *features) -{ - unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); - - *features = (struct vk_features){ - /* Vulkan 1.0 */ - .depthClamp = true, - .depthBiasClamp = true, - .dualSrcBlend = true, - .robustBufferAccess = true, - .fullDrawIndexUint32 = true, - .imageCubeArray = true, - .independentBlend = true, - .sampleRateShading = true, - .logicOp = true, - .multiDrawIndirect = arch >= 10, - .wideLines = true, - .largePoints = true, - .occlusionQueryPrecise = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = has_texture_compression_etc2(device), - .textureCompressionASTC_LDR = has_texture_compression_astc_ldr(device), - .textureCompressionBC = has_texture_compression_bc(device), - .fragmentStoresAndAtomics = arch >= 10, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = true, - .shaderStorageImageReadWithoutFormat = true, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = true, - .shaderSampledImageArrayDynamicIndexing = true, - .shaderStorageBufferArrayDynamicIndexing = true, - .shaderStorageImageArrayDynamicIndexing = true, - .shaderInt16 = true, - .shaderInt64 = true, - .drawIndirectFirstInstance = true, - - /* On v13+, the hardware isn't speculatively referencing to invalid - indices anymore. */ - .vertexPipelineStoresAndAtomics = - arch >= 13 && instance->enable_vertex_pipeline_stores_atomics, - - /* Vulkan 1.1 */ - .storageBuffer16BitAccess = true, - .uniformAndStorageBuffer16BitAccess = true, - .storagePushConstant16 = true, - .storageInputOutput16 = true, - .multiview = true, - .multiviewGeometryShader = false, - .multiviewTessellationShader = false, - .variablePointersStorageBuffer = true, - .variablePointers = true, - .protectedMemory = false, - .samplerYcbcrConversion = arch >= 10, - .shaderDrawParameters = true, - - /* Vulkan 1.2 */ - .samplerMirrorClampToEdge = true, - .drawIndirectCount = arch >= 10, - .storageBuffer8BitAccess = true, - .uniformAndStorageBuffer8BitAccess = true, - .storagePushConstant8 = true, - .shaderBufferInt64Atomics = false, - .shaderSharedInt64Atomics = false, - .shaderFloat16 = arch >= 10, - .shaderInt8 = true, - - .descriptorIndexing = false, - .shaderInputAttachmentArrayDynamicIndexing = false, - .shaderUniformTexelBufferArrayDynamicIndexing = false, - .shaderStorageTexelBufferArrayDynamicIndexing = false, - .shaderUniformBufferArrayNonUniformIndexing = false, - .shaderSampledImageArrayNonUniformIndexing = false, - .shaderStorageBufferArrayNonUniformIndexing = false, - .shaderStorageImageArrayNonUniformIndexing = false, - .shaderInputAttachmentArrayNonUniformIndexing = false, - .shaderUniformTexelBufferArrayNonUniformIndexing = false, - .shaderStorageTexelBufferArrayNonUniformIndexing = false, - .descriptorBindingUniformBufferUpdateAfterBind = false, - .descriptorBindingSampledImageUpdateAfterBind = false, - .descriptorBindingStorageImageUpdateAfterBind = false, - .descriptorBindingStorageBufferUpdateAfterBind = false, - .descriptorBindingUniformTexelBufferUpdateAfterBind = false, - .descriptorBindingStorageTexelBufferUpdateAfterBind = false, - .descriptorBindingUpdateUnusedWhilePending = false, - .descriptorBindingPartiallyBound = false, - .descriptorBindingVariableDescriptorCount = false, - .runtimeDescriptorArray = false, - - .samplerFilterMinmax = arch >= 10, - .scalarBlockLayout = true, - .imagelessFramebuffer = true, - .uniformBufferStandardLayout = true, - .shaderSubgroupExtendedTypes = true, - .separateDepthStencilLayouts = true, - .hostQueryReset = true, - .timelineSemaphore = true, - .bufferDeviceAddress = true, - .bufferDeviceAddressCaptureReplay = false, - .bufferDeviceAddressMultiDevice = false, - .vulkanMemoryModel = true, - .vulkanMemoryModelDeviceScope = true, - .vulkanMemoryModelAvailabilityVisibilityChains = true, - .shaderOutputViewportIndex = false, - .shaderOutputLayer = false, - .subgroupBroadcastDynamicId = true, - - /* Vulkan 1.3 */ - .robustImageAccess = true, - .inlineUniformBlock = true, - .descriptorBindingInlineUniformBlockUpdateAfterBind = true, - .extendedDynamicState = true, - .extendedDynamicState2 = true, - .extendedDynamicState2LogicOp = true, - .extendedDynamicState2PatchControlPoints = false, - .pipelineCreationCacheControl = true, - .privateData = true, - .shaderDemoteToHelperInvocation = true, - .shaderTerminateInvocation = true, - .subgroupSizeControl = true, - .computeFullSubgroups = true, - .synchronization2 = true, - .textureCompressionASTC_HDR = has_texture_compression_astc_hdr(device), - .shaderZeroInitializeWorkgroupMemory = true, - .dynamicRendering = true, - .dynamicRenderingLocalRead = true, - .shaderIntegerDotProduct = true, - .maintenance4 = true, - .maintenance5 = true, - - /* Vulkan 1.4 */ - .shaderSubgroupRotate = true, - .shaderSubgroupRotateClustered = true, - - /* VK_KHR_depth_clamp_zero_one */ - .depthClampZeroOne = true, - - /* VK_KHR_line_rasterization */ - .rectangularLines = true, - .bresenhamLines = true, - - /* VK_EXT_graphics_pipeline_library */ - .graphicsPipelineLibrary = true, - - /* VK_KHR_global_priority */ - .globalPriorityQuery = true, - - /* VK_KHR_index_type_uint8 */ - .indexTypeUint8 = true, - - /* VK_KHR_vertex_attribute_divisor */ - .vertexAttributeInstanceRateDivisor = true, - .vertexAttributeInstanceRateZeroDivisor = true, - - /* VK_EXT_vertex_input_dynamic_state */ - .vertexInputDynamicState = true, - - /* VK_EXT_depth_bias_control */ - .depthBiasControl = true, - .leastRepresentableValueForceUnormRepresentation = false, - .floatRepresentation = false, - .depthBiasExact = true, - - /* VK_EXT_depth_clip_control */ - .depthClipControl = true, - - /* VK_EXT_depth_clip_enable */ - .depthClipEnable = true, - - /* VK_EXT_4444_formats */ - .formatA4R4G4B4 = true, - .formatA4B4G4R4 = true, - - /* VK_EXT_custom_border_color */ - .customBorderColors = true, - - /* VK_EXT_border_color_swizzle */ - .borderColorSwizzle = true, - .borderColorSwizzleFromImage = true, - - /* VK_EXT_image_2d_view_of_3d */ - .image2DViewOf3D = true, - .sampler2DViewOf3D = true, - - /* VK_EXT_primitive_topology_list_restart */ - .primitiveTopologyListRestart = true, - .primitiveTopologyPatchListRestart = false, - - /* VK_EXT_provoking_vertex */ - .provokingVertexLast = true, - .transformFeedbackPreservesProvokingVertex = false, - - /* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to - * make it work, which forces us to apply the same swizzle on the border - * color, meaning we need to know the format when preparing the border - * color. - */ - .customBorderColorWithoutFormat = arch != 7, - - /* VK_KHR_pipeline_executable_properties */ - .pipelineExecutableInfo = true, - - /* VK_EXT_pipeline_robustness */ - .pipelineRobustness = true, - - /* VK_KHR_shader_float_controls2 */ - .shaderFloatControls2 = true, - - /* VK_KHR_shader_quad_control */ - .shaderQuadControl = true, - - /* VK_KHR_shader_relaxed_extended_instruction */ - .shaderRelaxedExtendedInstruction = true, - - /* VK_KHR_shader_maximal_reconvergence */ - .shaderMaximalReconvergence = true, - - /* VK_KHR_shader_subgroup_uniform_control_flow */ - .shaderSubgroupUniformControlFlow = true, - - /* VK_KHR_shader_expect_assume */ - .shaderExpectAssume = true, - - /* VK_EXT_shader_module_identifier */ - .shaderModuleIdentifier = true, - - /* VK_EXT_shader_replicated_composites */ - .shaderReplicatedComposites = true, - - /* VK_EXT_texel_buffer_alignment */ - .texelBufferAlignment = true, - - /* VK_EXT_ycbcr_2plane_444_formats */ - .ycbcr2plane444Formats = arch >= 10, - - /* VK_EXT_ycbcr_image_arrays */ - .ycbcrImageArrays = arch >= 10, - - /* VK_KHR_push_descriptor */ - .pushDescriptor = true, - }; -} - -static uint32_t -get_api_version(unsigned arch) -{ - const uint32_t version_override = vk_get_version_override(); - if (version_override) - return version_override; - - if (arch >= 10) - return VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION); - - return VK_MAKE_API_VERSION(0, 1, 0, VK_HEADER_VERSION); -} - -static VkConformanceVersion -get_conformance_version(unsigned arch) -{ - if (arch == 10) - return (VkConformanceVersion){1, 4, 1, 2}; - - return (VkConformanceVersion){0, 0, 0, 0}; -} - -static VkSampleCountFlags -get_sample_counts(unsigned arch, unsigned max_tib_size, - unsigned max_cbuf_atts, unsigned format_size) -{ - VkSampleCountFlags sample_counts = - VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; - - unsigned max_msaa = - pan_get_max_msaa(arch, max_tib_size, max_cbuf_atts, format_size); - - assert(max_msaa >= 4); - - if (arch >= 12) - sample_counts |= VK_SAMPLE_COUNT_2_BIT; - - if (max_msaa >= 8) - sample_counts |= VK_SAMPLE_COUNT_8_BIT; - - if (max_msaa >= 16) - sample_counts |= VK_SAMPLE_COUNT_16_BIT; - - return sample_counts; -} - -static void -get_device_properties(const struct panvk_instance *instance, - const struct panvk_physical_device *device, - struct vk_properties *properties) -{ - unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); - unsigned max_tib_size = pan_get_max_tib_size(arch, device->model); - const unsigned max_cbuf_format = 16; /* R32G32B32A32 */ - - unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size); - VkSampleCountFlags sample_counts = - get_sample_counts(arch, max_tib_size, max_cbuf_atts, max_cbuf_format); - - uint64_t os_page_size = 4096; - os_get_page_size(&os_page_size); - - /* Ensure that the max threads count per workgroup is valid for Bifrost */ - assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024); - - float pointSizeRangeMin; - float pointSizeRangeMax; - - /* On v13+, point size handling changed entirely */ - if (arch >= 13) { - pointSizeRangeMin = 1.0; - pointSizeRangeMax = 1024.0; - } else { - pointSizeRangeMin = 0.125; - pointSizeRangeMax = 4095.9375; - } - - *properties = (struct vk_properties){ - .apiVersion = get_api_version(arch), - .driverVersion = vk_get_driver_version(), - .vendorID = instance->force_vk_vendor ? instance->force_vk_vendor : - ARM_VENDOR_ID, - - /* Collect arch_major, arch_minor, arch_rev and product_major, - * as done by the Arm driver. - */ - .deviceID = device->kmod.props.gpu_prod_id << 16, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - - /* Vulkan 1.0 limits */ - /* Maximum texture dimension is 2^16. */ - .maxImageDimension1D = (1 << 16), - .maxImageDimension2D = (1 << 16), - .maxImageDimension3D = (1 << 16), - .maxImageDimensionCube = (1 << 16), - .maxImageArrayLayers = (1 << 16), - /* Currently limited by the 1D texture size, which is 2^16. - * TODO: If we expose buffer views as 2D textures, we can increase the - * limit. - */ - .maxTexelBufferElements = (1 << 16), - /* Each uniform entry is 16-byte and the number of entries is encoded in a - * 12-bit field, with the minus(1) modifier, which gives 2^20. - */ - .maxUniformBufferRange = 1 << 20, - /* Storage buffer access is lowered to globals, so there's no limit here, - * except for the SW-descriptor we use to encode storage buffer - * descriptors, where the size is a 32-bit field. - */ - .maxStorageBufferRange = UINT32_MAX, - /* Vulkan 1.4 minimum. We currently implement push constants in terms of - * FAUs so we're limited by how many user-defined FAUs the hardware - * offers, minus driver-internal needs. If we ever need go to higher, - * we'll have to implement push constants in terms of both FAUs and global - * loads. - */ - .maxPushConstantsSize = 256, - /* On our kernel drivers we're limited by the available memory rather - * than available allocations. This is better expressed through memory - * properties and budget queries, and by returning - * VK_ERROR_OUT_OF_DEVICE_MEMORY when applicable, rather than - * this limit. - */ - .maxMemoryAllocationCount = UINT32_MAX, - /* On Mali, VkSampler objects do not use any resources other than host - * memory and host address space, availability of which can change - * significantly over time. - */ - .maxSamplerAllocationCount = UINT32_MAX, - /* A cache line. */ - .bufferImageGranularity = 64, - /* Sparse binding not supported yet. */ - .sparseAddressSpaceSize = 0, - /* On Bifrost, this is a software limit. We pick the minimum required by - * Vulkan, because Bifrost GPUs don't have unified descriptor tables, - * which forces us to agregatte all descriptors from all sets and dispatch - * them to per-type descriptor tables emitted at draw/dispatch time. The - * more sets we support the more copies we are likely to have to do at - * draw time. - * - * Valhall has native support for descriptor sets, and allows a maximum - * of 16 sets, but we reserve one for our internal use, so we have 15 - * left. - */ - .maxBoundDescriptorSets = arch <= 7 ? 4 : 15, - /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ - .maxDescriptorSetSamplers = UINT16_MAX, - /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots - * for our internal UBOs. - */ - .maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32, - .maxDescriptorSetUniformBuffers = UINT8_MAX - 32, - /* SSBOs are limited by the size of a uniform buffer which contains our - * panvk_ssbo_addr objects. - * panvk_ssbo_addr is 16-byte, and each uniform entry in the Mali UBO is - * 16-byte too. The number of entries is encoded in a 12-bit field, with - * a minus(1) modifier, which gives a maximum of 2^12 SSBO - * descriptors. - */ - .maxDescriptorSetStorageBuffers = 1 << 12, - /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ - .maxDescriptorSetSampledImages = UINT16_MAX, - /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two - * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images. - */ - .maxDescriptorSetStorageImages = 1 << 8, - /* A maximum of 8 color render targets, and one depth-stencil render - * target. - */ - .maxDescriptorSetInputAttachments = 9, - - /* We could theoretically use the maxDescriptor values here (except for - * UBOs where we're really limited to 256 on the shader side), but on - * Bifrost we have to copy some tables around, which comes at an extra - * memory/processing cost, so let's pick something smaller. - */ - .maxPerStageDescriptorInputAttachments = 9, - .maxPerStageDescriptorSampledImages = 256, - .maxPerStageDescriptorSamplers = 128, - .maxPerStageDescriptorStorageBuffers = 64, - .maxPerStageDescriptorStorageImages = 32, - .maxPerStageDescriptorUniformBuffers = 64, - .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64, - - /* Software limits to keep VkCommandBuffer tracking sane. */ - .maxDescriptorSetUniformBuffersDynamic = 16, - .maxDescriptorSetStorageBuffersDynamic = 8, - /* Software limit to keep VkCommandBuffer tracking sane. The HW supports - * up to 2^9 vertex attributes. - */ - .maxVertexInputAttributes = 16, - .maxVertexInputBindings = 16, - /* MALI_ATTRIBUTE::offset is 32-bit. */ - .maxVertexInputAttributeOffset = UINT32_MAX, - /* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */ - .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE, - /* 32 vec4 varyings. */ - .maxVertexOutputComponents = 128, - /* Tesselation shaders not supported. */ - .maxTessellationGenerationLevel = 0, - .maxTessellationPatchSize = 0, - .maxTessellationControlPerVertexInputComponents = 0, - .maxTessellationControlPerVertexOutputComponents = 0, - .maxTessellationControlPerPatchOutputComponents = 0, - .maxTessellationControlTotalOutputComponents = 0, - .maxTessellationEvaluationInputComponents = 0, - .maxTessellationEvaluationOutputComponents = 0, - /* Geometry shaders not supported. */ - .maxGeometryShaderInvocations = 0, - .maxGeometryInputComponents = 0, - .maxGeometryOutputComponents = 0, - .maxGeometryOutputVertices = 0, - .maxGeometryTotalOutputComponents = 0, - /* 32 vec4 varyings. */ - .maxFragmentInputComponents = 128, - /* 8 render targets. */ - .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = max_cbuf_atts, - /* 8 render targets, 2^12 storage buffers and 2^8 storage images (see - * above). - */ - .maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8), - /* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to - * (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't - * really make sense to expose this amount of memory, especially since - * it's backed by global memory anyway. - */ - .maxComputeSharedMemorySize = 32768, - /* Software limit to meet Vulkan 1.0 requirements. We split the - * dispatch in several jobs if it's too big. - */ - .maxComputeWorkGroupCount = {65535, 65535, 65535}, - - /* We could also split into serveral jobs but this has many limitations. - * As such we limit to the max threads per workgroup supported by the GPU. - */ - .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg, - .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg, - device->kmod.props.max_threads_per_wg, - device->kmod.props.max_threads_per_wg}, - /* 8-bit subpixel precision. */ - .subPixelPrecisionBits = 8, - .subTexelPrecisionBits = 8, - .mipmapPrecisionBits = 8, - /* Software limit. */ - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = arch >= 10 ? UINT32_MAX : 1, - .maxSamplerLodBias = (float)INT16_MAX / 256.0f, - .maxSamplerAnisotropy = 16, - .maxViewports = 1, - /* Same as the framebuffer limit. */ - .maxViewportDimensions = {(1 << 14), (1 << 14)}, - /* Encoded in a 16-bit signed integer. */ - .viewportBoundsRange = {INT16_MIN, INT16_MAX}, - .viewportSubPixelBits = 0, - /* Align on a page. */ - .minMemoryMapAlignment = os_page_size, - /* Some compressed texture formats require 128-byte alignment. */ - .minTexelBufferOffsetAlignment = 64, - /* Always aligned on a uniform slot (vec4). */ - .minUniformBufferOffsetAlignment = 16, - /* Lowered to global accesses, which happen at the 32-bit granularity. */ - .minStorageBufferOffsetAlignment = 4, - /* Signed 4-bit value. */ - .minTexelOffset = -8, - .maxTexelOffset = 7, - .minTexelGatherOffset = -8, - .maxTexelGatherOffset = 7, - .minInterpolationOffset = -0.5, - .maxInterpolationOffset = 0.5, - .subPixelInterpolationOffsetBits = 8, - .maxFramebufferWidth = (1 << 14), - .maxFramebufferHeight = (1 << 14), - .maxFramebufferLayers = 256, - .framebufferColorSampleCounts = sample_counts, - .framebufferDepthSampleCounts = sample_counts, - .framebufferStencilSampleCounts = sample_counts, - .framebufferNoAttachmentsSampleCounts = sample_counts, - .maxColorAttachments = max_cbuf_atts, - .sampledImageColorSampleCounts = sample_counts, - .sampledImageIntegerSampleCounts = sample_counts, - .sampledImageDepthSampleCounts = sample_counts, - .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = false, - .timestampPeriod = 0, - .maxClipDistances = 0, - .maxCullDistances = 0, - .maxCombinedClipAndCullDistances = 0, - .discreteQueuePriorities = 2, - .pointSizeRange = {pointSizeRangeMin, pointSizeRangeMax}, - .lineWidthRange = {0.0, 7.9921875}, - .pointSizeGranularity = (1.0 / 16.0), - .lineWidthGranularity = (1.0 / 128.0), - .strictLines = true, - .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 64, - .optimalBufferCopyRowPitchAlignment = 64, - .nonCoherentAtomSize = 64, - - /* Vulkan 1.0 sparse properties */ - .sparseResidencyNonResidentStrict = false, - .sparseResidencyAlignedMipSize = false, - .sparseResidencyStandard2DBlockShape = false, - .sparseResidencyStandard2DMultisampleBlockShape = false, - .sparseResidencyStandard3DBlockShape = false, - - /* Vulkan 1.1 properties */ - .subgroupSize = pan_subgroup_size(arch), - /* We only support VS, FS, and CS. - * - * The HW may spawn VS invocations for non-existing indices, which could - * be observed through subgroup ops (though the user can observe them - * through infinte loops anyway), so subgroup ops can't be supported in - * VS. - */ - .subgroupSupportedStages = - VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT, - .subgroupSupportedOperations = - VK_SUBGROUP_FEATURE_BASIC_BIT | - VK_SUBGROUP_FEATURE_VOTE_BIT | - VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | - VK_SUBGROUP_FEATURE_BALLOT_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | - VK_SUBGROUP_FEATURE_CLUSTERED_BIT | - VK_SUBGROUP_FEATURE_QUAD_BIT | - VK_SUBGROUP_FEATURE_ROTATE_BIT | - VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT, - .subgroupQuadOperationsInAllStages = false, - .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, - .maxMultiviewViewCount = 8, - .maxMultiviewInstanceIndex = UINT32_MAX, - .protectedNoFault = false, - .maxPerSetDescriptors = UINT16_MAX, - /* Our buffer size fields allow only this much */ - .maxMemoryAllocationSize = UINT32_MAX, - - /* Vulkan 1.2 properties */ - .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | - VK_RESOLVE_MODE_AVERAGE_BIT | - VK_RESOLVE_MODE_MIN_BIT | - VK_RESOLVE_MODE_MAX_BIT, - .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | - VK_RESOLVE_MODE_MIN_BIT | - VK_RESOLVE_MODE_MAX_BIT, - .independentResolveNone = true, - .independentResolve = true, - /* VK_KHR_driver_properties */ - .driverID = VK_DRIVER_ID_MESA_PANVK, - .conformanceVersion = get_conformance_version(arch), - .denormBehaviorIndependence = arch >= 9 ? - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE : - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, - .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, - .shaderSignedZeroInfNanPreserveFloat16 = true, - .shaderSignedZeroInfNanPreserveFloat32 = true, - .shaderSignedZeroInfNanPreserveFloat64 = false, - .shaderDenormPreserveFloat16 = true, - .shaderDenormPreserveFloat32 = true, - .shaderDenormPreserveFloat64 = true, - .shaderDenormFlushToZeroFloat16 = true, - .shaderDenormFlushToZeroFloat32 = true, - .shaderDenormFlushToZeroFloat64 = true, - .shaderRoundingModeRTEFloat16 = true, - .shaderRoundingModeRTEFloat32 = true, - .shaderRoundingModeRTEFloat64 = false, - .shaderRoundingModeRTZFloat16 = true, - .shaderRoundingModeRTZFloat32 = true, - .shaderRoundingModeRTZFloat64 = false, - /* XXX: VK_EXT_descriptor_indexing */ - .maxUpdateAfterBindDescriptorsInAllPools = 0, - .shaderUniformBufferArrayNonUniformIndexingNative = false, - .shaderSampledImageArrayNonUniformIndexingNative = false, - .shaderStorageBufferArrayNonUniformIndexingNative = false, - .shaderStorageImageArrayNonUniformIndexingNative = false, - .shaderInputAttachmentArrayNonUniformIndexingNative = false, - .robustBufferAccessUpdateAfterBind = false, - .quadDivergentImplicitLod = false, - .maxPerStageDescriptorUpdateAfterBindSamplers = 0, - .maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0, - .maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0, - .maxPerStageDescriptorUpdateAfterBindSampledImages = 0, - .maxPerStageDescriptorUpdateAfterBindStorageImages = 0, - .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0, - .maxPerStageUpdateAfterBindResources = 0, - .maxDescriptorSetUpdateAfterBindSamplers = 0, - .maxDescriptorSetUpdateAfterBindUniformBuffers = 0, - .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0, - .maxDescriptorSetUpdateAfterBindStorageBuffers = 0, - .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0, - .maxDescriptorSetUpdateAfterBindSampledImages = 0, - .maxDescriptorSetUpdateAfterBindStorageImages = 0, - .maxDescriptorSetUpdateAfterBindInputAttachments = 0, - .filterMinmaxSingleComponentFormats = arch >= 10, - .filterMinmaxImageComponentMapping = arch >= 10, - .maxTimelineSemaphoreValueDifference = INT64_MAX, - .framebufferIntegerColorSampleCounts = sample_counts, - - /* Vulkan 1.3 properties */ - /* XXX: 1.3 support */ - - /* VK_EXT_subgroup_size_control */ - .minSubgroupSize = pan_subgroup_size(arch), - .maxSubgroupSize = pan_subgroup_size(arch), - .maxComputeWorkgroupSubgroups = - device->kmod.props.max_threads_per_wg / pan_subgroup_size(arch), - .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT, - - /* XXX: VK_EXT_inline_uniform_block */ - .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE, - .maxPerStageDescriptorInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, - .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, - .maxDescriptorSetInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, - .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, - .maxInlineUniformTotalSize = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE, - - /* VK_KHR_shader_integer_dot_product */ - .integerDotProduct8BitUnsignedAccelerated = true, - .integerDotProduct8BitSignedAccelerated = true, - .integerDotProduct4x8BitPackedUnsignedAccelerated = true, - .integerDotProduct4x8BitPackedSignedAccelerated = true, - - /* XXX: VK_EXT_texel_buffer_alignment */ - .storageTexelBufferOffsetAlignmentBytes = 64, - .storageTexelBufferOffsetSingleTexelAlignment = false, - .uniformTexelBufferOffsetAlignmentBytes = 64, - .uniformTexelBufferOffsetSingleTexelAlignment = false, - - /* VK_KHR_maintenance4 */ - .maxBufferSize = 1 << 30, - - /* VK_KHR_line_rasterization */ - .lineSubPixelPrecisionBits = 8, - - /* VK_EXT_custom_border_color */ - .maxCustomBorderColorSamplers = 32768, - - /* VK_EXT_graphics_pipeline_library */ - .graphicsPipelineLibraryFastLinking = true, - .graphicsPipelineLibraryIndependentInterpolationDecoration = true, - - /* VK_EXT_pipeline_robustness */ - .defaultRobustnessStorageBuffers = - VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, - .defaultRobustnessUniformBuffers = - VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, - .defaultRobustnessVertexInputs = - VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, - .defaultRobustnessImages = - VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT, - - /* VK_EXT_provoking_vertex */ - .provokingVertexModePerPipeline = false, - .transformFeedbackPreservesTriangleFanProvokingVertex = false, - - /* VK_KHR_vertex_attribute_divisor */ - /* We will have to restrict this a bit for multiview */ - .maxVertexAttribDivisor = UINT32_MAX, - .supportsNonZeroFirstInstance = true, - - /* VK_KHR_push_descriptor */ - .maxPushDescriptors = MAX_PUSH_DESCRIPTORS, - }; - - snprintf(properties->deviceName, sizeof(properties->deviceName), "%s", - device->name); - - memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE); - - const struct { - uint16_t vendor_id; - uint32_t device_id; - uint8_t pad[8]; - } dev_uuid = { - .vendor_id = ARM_VENDOR_ID, - .device_id = device->model->gpu_id, - }; - - STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE); - memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE); - STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE); - memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE); - - snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk"); - snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE, - "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); - - /* VK_EXT_physical_device_drm */ - if (device->drm.primary_rdev) { - properties->drmHasPrimary = true; - properties->drmPrimaryMajor = major(device->drm.primary_rdev); - properties->drmPrimaryMinor = minor(device->drm.primary_rdev); - } - if (device->drm.render_rdev) { - properties->drmHasRender = true; - properties->drmRenderMajor = major(device->drm.render_rdev); - properties->drmRenderMinor = minor(device->drm.render_rdev); - } - - /* VK_EXT_shader_module_identifier */ - STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == - sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); - memcpy(properties->shaderModuleIdentifierAlgorithmUUID, - vk_shaderModuleIdentifierAlgorithmUUID, - sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); -} - void panvk_physical_device_finish(struct panvk_physical_device *device) { @@ -1237,13 +314,16 @@ panvk_physical_device_init(struct panvk_physical_device *device, vk_warn_non_conformant_implementation("panvk"); struct vk_device_extension_table supported_extensions; - get_device_extensions(device, &supported_extensions); + panvk_arch_dispatch(arch, get_physical_device_extensions, device, + &supported_extensions); struct vk_features supported_features; - get_features(instance, device, &supported_features); + panvk_arch_dispatch(arch, get_physical_device_features, instance, + device, &supported_features); struct vk_properties properties; - get_device_properties(instance, device, &properties); + panvk_arch_dispatch(arch, get_physical_device_properties, instance, + device, &properties); struct vk_physical_device_dispatch_table dispatch_table; vk_physical_device_dispatch_table_from_entrypoints( @@ -1364,21 +444,6 @@ panvk_GetPhysicalDeviceMemoryProperties2( }; } -#define DEVICE_PER_ARCH_FUNCS(_ver) \ - VkResult panvk_v##_ver##_create_device( \ - struct panvk_physical_device *physical_device, \ - const VkDeviceCreateInfo *pCreateInfo, \ - const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); \ - \ - void panvk_v##_ver##_destroy_device( \ - struct panvk_device *device, const VkAllocationCallbacks *pAllocator) - -DEVICE_PER_ARCH_FUNCS(6); -DEVICE_PER_ARCH_FUNCS(7); -DEVICE_PER_ARCH_FUNCS(10); -DEVICE_PER_ARCH_FUNCS(12); -DEVICE_PER_ARCH_FUNCS(13); - VKAPI_ATTR VkResult VKAPI_CALL panvk_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, @@ -1577,6 +642,30 @@ get_image_format_features(struct panvk_physical_device *physical_device, return features; } +VkSampleCountFlags +panvk_get_sample_counts(unsigned arch, unsigned max_tib_size, + unsigned max_cbuf_atts, unsigned format_size) +{ + VkSampleCountFlags sample_counts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + + unsigned max_msaa = + pan_get_max_msaa(arch, max_tib_size, max_cbuf_atts, format_size); + + assert(max_msaa >= 4); + + if (arch >= 12) + sample_counts |= VK_SAMPLE_COUNT_2_BIT; + + if (max_msaa >= 8) + sample_counts |= VK_SAMPLE_COUNT_8_BIT; + + if (max_msaa >= 16) + sample_counts |= VK_SAMPLE_COUNT_16_BIT; + + return sample_counts; +} + static VkFormatFeatureFlags2 get_image_format_sample_counts(struct panvk_physical_device *physical_device, VkFormat format) @@ -1588,7 +677,8 @@ get_image_format_sample_counts(struct panvk_physical_device *physical_device, assert(!vk_format_is_compressed(format)); unsigned format_size = vk_format_get_blocksize(format); - return get_sample_counts(arch, max_tib_size, max_cbuf_atts, format_size); + return panvk_get_sample_counts(arch, max_tib_size, max_cbuf_atts, + format_size); } static VkFormatFeatureFlags2 diff --git a/src/panfrost/vulkan/panvk_physical_device.h b/src/panfrost/vulkan/panvk_physical_device.h index b198dda79ff..a36f3def1db 100644 --- a/src/panfrost/vulkan/panvk_physical_device.h +++ b/src/panfrost/vulkan/panvk_physical_device.h @@ -10,6 +10,7 @@ #include #include "panvk_instance.h" +#include "panvk_macros.h" #include "vk_physical_device.h" #include "vk_sync.h" @@ -82,4 +83,25 @@ VkResult panvk_physical_device_init(struct panvk_physical_device *device, void panvk_physical_device_finish(struct panvk_physical_device *device); + +VkSampleCountFlags panvk_get_sample_counts(unsigned arch, + unsigned max_tib_size, + unsigned max_cbuf_atts, + unsigned format_size); + +#ifdef PAN_ARCH +void panvk_per_arch(get_physical_device_extensions)( + const struct panvk_physical_device *device, + struct vk_device_extension_table *ext); + +void panvk_per_arch(get_physical_device_features)( + const struct panvk_instance *instance, + const struct panvk_physical_device *device, struct vk_features *features); + +void panvk_per_arch(get_physical_device_properties)( + const struct panvk_instance *instance, + const struct panvk_physical_device *device, + struct vk_properties *properties); +#endif + #endif diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c new file mode 100644 index 00000000000..39d6ba53b08 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_physical_device.c @@ -0,0 +1,947 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_device.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "git_sha1.h" + +#include "vk_device.h" +#include "vk_limits.h" +#include "vk_shader_module.h" + +#include "panvk_instance.h" +#include "panvk_physical_device.h" +#include "panvk_wsi.h" + +#include "pan_format.h" +#include "pan_props.h" +#include "util/pan_ir.h" + +#define ARM_VENDOR_ID 0x13b5 +#define MAX_PUSH_DESCRIPTORS 32 +/* We reserve one ubo for push constant, one for sysvals and one per-set for the + * descriptor metadata */ +#define RESERVED_UBO_COUNT 6 +#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT +#define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16) + +void +panvk_per_arch(get_physical_device_extensions)( + const struct panvk_physical_device *device, + struct vk_device_extension_table *ext) +{ + bool has_vk1_1 = PAN_ARCH >= 10; + bool has_vk1_2 = PAN_ARCH >= 10; + + *ext = (struct vk_device_extension_table){ + .KHR_8bit_storage = true, + .KHR_16bit_storage = true, + .KHR_bind_memory2 = true, + .KHR_buffer_device_address = true, + .KHR_copy_commands2 = true, + .KHR_create_renderpass2 = true, + .KHR_dedicated_allocation = true, + .KHR_descriptor_update_template = true, + .KHR_depth_clamp_zero_one = true, + .KHR_depth_stencil_resolve = true, + .KHR_device_group = true, + .KHR_draw_indirect_count = PAN_ARCH >= 10, + .KHR_driver_properties = true, + .KHR_dynamic_rendering = true, + .KHR_dynamic_rendering_local_read = true, + .KHR_external_fence = true, + .KHR_external_fence_fd = true, + .KHR_external_memory = true, + .KHR_external_memory_fd = true, + .KHR_external_semaphore = true, + .KHR_external_semaphore_fd = true, + .KHR_format_feature_flags2 = true, + .KHR_get_memory_requirements2 = true, + .KHR_global_priority = true, + .KHR_image_format_list = true, + .KHR_imageless_framebuffer = true, + .KHR_index_type_uint8 = true, + .KHR_line_rasterization = true, + .KHR_load_store_op_none = true, + .KHR_maintenance1 = true, + .KHR_maintenance2 = true, + .KHR_maintenance3 = true, + .KHR_maintenance4 = has_vk1_1, + .KHR_maintenance5 = has_vk1_1, + .KHR_map_memory2 = true, + .KHR_multiview = true, + .KHR_pipeline_executable_properties = true, + .KHR_pipeline_library = true, + .KHR_push_descriptor = true, + .KHR_relaxed_block_layout = true, + .KHR_sampler_mirror_clamp_to_edge = true, + .KHR_sampler_ycbcr_conversion = PAN_ARCH >= 10, + .KHR_separate_depth_stencil_layouts = true, + .KHR_shader_draw_parameters = true, + .KHR_shader_expect_assume = true, + .KHR_shader_float_controls = true, + .KHR_shader_float_controls2 = has_vk1_1, + .KHR_shader_float16_int8 = true, + .KHR_shader_integer_dot_product = true, + .KHR_shader_maximal_reconvergence = has_vk1_1, + .KHR_shader_non_semantic_info = true, + .KHR_shader_quad_control = has_vk1_2, + .KHR_shader_relaxed_extended_instruction = true, + .KHR_shader_subgroup_extended_types = has_vk1_1, + .KHR_shader_subgroup_rotate = true, + .KHR_shader_subgroup_uniform_control_flow = has_vk1_1, + .KHR_shader_terminate_invocation = true, + .KHR_spirv_1_4 = PAN_ARCH >= 10, + .KHR_storage_buffer_storage_class = true, +#ifdef PANVK_USE_WSI_PLATFORM + .KHR_present_id2 = true, + .KHR_present_wait2 = true, + .KHR_swapchain = true, +#endif + .KHR_synchronization2 = true, + .KHR_timeline_semaphore = true, + .KHR_uniform_buffer_standard_layout = true, + .KHR_variable_pointers = true, + .KHR_vertex_attribute_divisor = true, + .KHR_vulkan_memory_model = true, + .KHR_zero_initialize_workgroup_memory = true, + .EXT_4444_formats = true, + .EXT_border_color_swizzle = true, + .EXT_buffer_device_address = true, + .EXT_custom_border_color = true, + .EXT_depth_bias_control = true, + .EXT_depth_clamp_zero_one = true, + .EXT_depth_clip_enable = true, + .EXT_depth_clip_control = true, +#ifdef VK_USE_PLATFORM_DISPLAY_KHR + .EXT_display_control = true, +#endif + .EXT_extended_dynamic_state = true, + .EXT_extended_dynamic_state2 = true, + .EXT_external_memory_dma_buf = true, + .EXT_global_priority = true, + .EXT_global_priority_query = true, + .EXT_graphics_pipeline_library = true, + .EXT_hdr_metadata = true, + .EXT_host_query_reset = true, + .EXT_image_2d_view_of_3d = true, + /* EXT_image_drm_format_modifier depends on KHR_sampler_ycbcr_conversion */ + .EXT_image_drm_format_modifier = PAN_ARCH >= 10, + .EXT_image_robustness = true, + .EXT_index_type_uint8 = true, + .EXT_line_rasterization = true, + .EXT_load_store_op_none = true, + .EXT_physical_device_drm = true, + .EXT_pipeline_creation_cache_control = true, + .EXT_pipeline_creation_feedback = true, + .EXT_pipeline_robustness = true, + .EXT_private_data = true, + .EXT_primitive_topology_list_restart = true, + .EXT_provoking_vertex = true, + .EXT_queue_family_foreign = true, + .EXT_sampler_filter_minmax = PAN_ARCH >= 10, + .EXT_scalar_block_layout = true, + .EXT_separate_stencil_usage = true, + .EXT_shader_module_identifier = true, + .EXT_shader_demote_to_helper_invocation = true, + .EXT_shader_replicated_composites = true, + .EXT_shader_subgroup_ballot = true, + .EXT_shader_subgroup_vote = true, + .EXT_subgroup_size_control = has_vk1_1, + .EXT_texel_buffer_alignment = true, + .EXT_texture_compression_astc_hdr = true, + .EXT_tooling_info = true, + .EXT_vertex_attribute_divisor = true, + .EXT_vertex_input_dynamic_state = true, + .EXT_ycbcr_2plane_444_formats = PAN_ARCH >= 10, + .EXT_ycbcr_image_arrays = PAN_ARCH >= 10, + .EXT_inline_uniform_block = true, + .GOOGLE_decorate_string = true, + .GOOGLE_hlsl_functionality1 = true, + .GOOGLE_user_type = true, + }; +} + +static bool +has_compressed_formats(const struct panvk_physical_device *physical_device, + const uint32_t required_formats) +{ + uint32_t supported_compr_fmts = + pan_query_compressed_formats(&physical_device->kmod.props); + + return (supported_compr_fmts & required_formats) == required_formats; +} + +static bool +has_texture_compression_etc2(const struct panvk_physical_device *physical_device) +{ + return has_compressed_formats(physical_device, + BITFIELD_BIT(MALI_ETC2_RGB8) | + BITFIELD_BIT(MALI_ETC2_RGB8A1) | BITFIELD_BIT(MALI_ETC2_RGBA8) | + BITFIELD_BIT(MALI_ETC2_R11_UNORM) | BITFIELD_BIT(MALI_ETC2_R11_SNORM) | + BITFIELD_BIT(MALI_ETC2_RG11_UNORM) | BITFIELD_BIT(MALI_ETC2_RG11_SNORM)); +} + +static bool +has_texture_compression_astc_ldr(const struct panvk_physical_device *physical_device) +{ + return has_compressed_formats(physical_device, BITFIELD_BIT(MALI_ASTC_2D_LDR)); +} + +static bool +has_texture_compression_astc_hdr(const struct panvk_physical_device *physical_device) +{ + return has_compressed_formats(physical_device, BITFIELD_BIT(MALI_ASTC_2D_HDR)); +} + +static bool +has_texture_compression_bc(const struct panvk_physical_device *physical_device) +{ + return has_compressed_formats(physical_device, + BITFIELD_BIT(MALI_BC1_UNORM) | BITFIELD_BIT(MALI_BC2_UNORM) | + BITFIELD_BIT(MALI_BC3_UNORM) | BITFIELD_BIT(MALI_BC4_UNORM) | + BITFIELD_BIT(MALI_BC4_SNORM) | BITFIELD_BIT(MALI_BC5_UNORM) | + BITFIELD_BIT(MALI_BC5_SNORM) | BITFIELD_BIT(MALI_BC6H_SF16) | + BITFIELD_BIT(MALI_BC6H_UF16) | BITFIELD_BIT(MALI_BC7_UNORM)); +} + +void +panvk_per_arch(get_physical_device_features)( + const struct panvk_instance *instance, + const struct panvk_physical_device *device, struct vk_features *features) +{ + *features = (struct vk_features){ + /* Vulkan 1.0 */ + .depthClamp = true, + .depthBiasClamp = true, + .dualSrcBlend = true, + .robustBufferAccess = true, + .fullDrawIndexUint32 = true, + .imageCubeArray = true, + .independentBlend = true, + .sampleRateShading = true, + .logicOp = true, + .multiDrawIndirect = PAN_ARCH >= 10, + .wideLines = true, + .largePoints = true, + .occlusionQueryPrecise = true, + .samplerAnisotropy = true, + .textureCompressionETC2 = has_texture_compression_etc2(device), + .textureCompressionASTC_LDR = has_texture_compression_astc_ldr(device), + .textureCompressionBC = has_texture_compression_bc(device), + .fragmentStoresAndAtomics = PAN_ARCH >= 10, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = true, + .shaderStorageImageReadWithoutFormat = true, + .shaderStorageImageWriteWithoutFormat = true, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + .shaderInt16 = true, + .shaderInt64 = true, + .drawIndirectFirstInstance = true, + + /* On v13+, the hardware isn't speculatively referencing to invalid + indices anymore. */ + .vertexPipelineStoresAndAtomics = + PAN_ARCH >= 13 && instance->enable_vertex_pipeline_stores_atomics, + + /* Vulkan 1.1 */ + .storageBuffer16BitAccess = true, + .uniformAndStorageBuffer16BitAccess = true, + .storagePushConstant16 = true, + .storageInputOutput16 = true, + .multiview = true, + .multiviewGeometryShader = false, + .multiviewTessellationShader = false, + .variablePointersStorageBuffer = true, + .variablePointers = true, + .protectedMemory = false, + .samplerYcbcrConversion = PAN_ARCH >= 10, + .shaderDrawParameters = true, + + /* Vulkan 1.2 */ + .samplerMirrorClampToEdge = true, + .drawIndirectCount = PAN_ARCH >= 10, + .storageBuffer8BitAccess = true, + .uniformAndStorageBuffer8BitAccess = true, + .storagePushConstant8 = true, + .shaderBufferInt64Atomics = false, + .shaderSharedInt64Atomics = false, + .shaderFloat16 = PAN_ARCH >= 10, + .shaderInt8 = true, + + .descriptorIndexing = false, + .shaderInputAttachmentArrayDynamicIndexing = false, + .shaderUniformTexelBufferArrayDynamicIndexing = false, + .shaderStorageTexelBufferArrayDynamicIndexing = false, + .shaderUniformBufferArrayNonUniformIndexing = false, + .shaderSampledImageArrayNonUniformIndexing = false, + .shaderStorageBufferArrayNonUniformIndexing = false, + .shaderStorageImageArrayNonUniformIndexing = false, + .shaderInputAttachmentArrayNonUniformIndexing = false, + .shaderUniformTexelBufferArrayNonUniformIndexing = false, + .shaderStorageTexelBufferArrayNonUniformIndexing = false, + .descriptorBindingUniformBufferUpdateAfterBind = false, + .descriptorBindingSampledImageUpdateAfterBind = false, + .descriptorBindingStorageImageUpdateAfterBind = false, + .descriptorBindingStorageBufferUpdateAfterBind = false, + .descriptorBindingUniformTexelBufferUpdateAfterBind = false, + .descriptorBindingStorageTexelBufferUpdateAfterBind = false, + .descriptorBindingUpdateUnusedWhilePending = false, + .descriptorBindingPartiallyBound = false, + .descriptorBindingVariableDescriptorCount = false, + .runtimeDescriptorArray = false, + + .samplerFilterMinmax = PAN_ARCH >= 10, + .scalarBlockLayout = true, + .imagelessFramebuffer = true, + .uniformBufferStandardLayout = true, + .shaderSubgroupExtendedTypes = true, + .separateDepthStencilLayouts = true, + .hostQueryReset = true, + .timelineSemaphore = true, + .bufferDeviceAddress = true, + .bufferDeviceAddressCaptureReplay = false, + .bufferDeviceAddressMultiDevice = false, + .vulkanMemoryModel = true, + .vulkanMemoryModelDeviceScope = true, + .vulkanMemoryModelAvailabilityVisibilityChains = true, + .shaderOutputViewportIndex = false, + .shaderOutputLayer = false, + .subgroupBroadcastDynamicId = true, + + /* Vulkan 1.3 */ + .robustImageAccess = true, + .inlineUniformBlock = true, + .descriptorBindingInlineUniformBlockUpdateAfterBind = true, + .extendedDynamicState = true, + .extendedDynamicState2 = true, + .extendedDynamicState2LogicOp = true, + .extendedDynamicState2PatchControlPoints = false, + .pipelineCreationCacheControl = true, + .privateData = true, + .shaderDemoteToHelperInvocation = true, + .shaderTerminateInvocation = true, + .subgroupSizeControl = true, + .computeFullSubgroups = true, + .synchronization2 = true, + .textureCompressionASTC_HDR = has_texture_compression_astc_hdr(device), + .shaderZeroInitializeWorkgroupMemory = true, + .dynamicRendering = true, + .dynamicRenderingLocalRead = true, + .shaderIntegerDotProduct = true, + .maintenance4 = true, + .maintenance5 = true, + + /* Vulkan 1.4 */ + .shaderSubgroupRotate = true, + .shaderSubgroupRotateClustered = true, + + /* VK_KHR_depth_clamp_zero_one */ + .depthClampZeroOne = true, + + /* VK_KHR_line_rasterization */ + .rectangularLines = true, + .bresenhamLines = true, + + /* VK_EXT_graphics_pipeline_library */ + .graphicsPipelineLibrary = true, + + /* VK_KHR_global_priority */ + .globalPriorityQuery = true, + + /* VK_KHR_index_type_uint8 */ + .indexTypeUint8 = true, + + /* VK_KHR_vertex_attribute_divisor */ + .vertexAttributeInstanceRateDivisor = true, + .vertexAttributeInstanceRateZeroDivisor = true, + + /* VK_EXT_vertex_input_dynamic_state */ + .vertexInputDynamicState = true, + + /* VK_EXT_depth_bias_control */ + .depthBiasControl = true, + .leastRepresentableValueForceUnormRepresentation = false, + .floatRepresentation = false, + .depthBiasExact = true, + + /* VK_EXT_depth_clip_control */ + .depthClipControl = true, + + /* VK_EXT_depth_clip_enable */ + .depthClipEnable = true, + + /* VK_EXT_4444_formats */ + .formatA4R4G4B4 = true, + .formatA4B4G4R4 = true, + + /* VK_EXT_custom_border_color */ + .customBorderColors = true, + + /* VK_EXT_border_color_swizzle */ + .borderColorSwizzle = true, + .borderColorSwizzleFromImage = true, + + /* VK_EXT_image_2d_view_of_3d */ + .image2DViewOf3D = true, + .sampler2DViewOf3D = true, + + /* VK_EXT_primitive_topology_list_restart */ + .primitiveTopologyListRestart = true, + .primitiveTopologyPatchListRestart = false, + + /* VK_EXT_provoking_vertex */ + .provokingVertexLast = true, + .transformFeedbackPreservesProvokingVertex = false, + + /* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to + * make it work, which forces us to apply the same swizzle on the border + * color, meaning we need to know the format when preparing the border + * color. + */ + .customBorderColorWithoutFormat = PAN_ARCH != 7, + + /* VK_KHR_pipeline_executable_properties */ + .pipelineExecutableInfo = true, + + /* VK_EXT_pipeline_robustness */ + .pipelineRobustness = true, + + /* VK_KHR_shader_float_controls2 */ + .shaderFloatControls2 = true, + + /* VK_KHR_shader_quad_control */ + .shaderQuadControl = true, + + /* VK_KHR_shader_relaxed_extended_instruction */ + .shaderRelaxedExtendedInstruction = true, + + /* VK_KHR_shader_maximal_reconvergence */ + .shaderMaximalReconvergence = true, + + /* VK_KHR_shader_subgroup_uniform_control_flow */ + .shaderSubgroupUniformControlFlow = true, + + /* VK_KHR_shader_expect_assume */ + .shaderExpectAssume = true, + + /* VK_EXT_shader_module_identifier */ + .shaderModuleIdentifier = true, + + /* VK_EXT_shader_replicated_composites */ + .shaderReplicatedComposites = true, + + /* VK_EXT_texel_buffer_alignment */ + .texelBufferAlignment = true, + + /* VK_EXT_ycbcr_2plane_444_formats */ + .ycbcr2plane444Formats = PAN_ARCH >= 10, + + /* VK_EXT_ycbcr_image_arrays */ + .ycbcrImageArrays = PAN_ARCH >= 10, + + /* VK_KHR_push_descriptor */ + .pushDescriptor = true, + }; +} + +static uint32_t +get_api_version() +{ + const uint32_t version_override = vk_get_version_override(); + if (version_override) + return version_override; + + if (PAN_ARCH >= 10) + return VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION); + + return VK_MAKE_API_VERSION(0, 1, 0, VK_HEADER_VERSION); +} + +static VkConformanceVersion +get_conformance_version() +{ + if (PAN_ARCH == 10) + return (VkConformanceVersion){1, 4, 1, 2}; + + return (VkConformanceVersion){0, 0, 0, 0}; +} + +void +panvk_per_arch(get_physical_device_properties)( + const struct panvk_instance *instance, + const struct panvk_physical_device *device, struct vk_properties *properties) +{ + unsigned max_tib_size = pan_get_max_tib_size(PAN_ARCH, device->model); + const unsigned max_cbuf_format = 16; /* R32G32B32A32 */ + + unsigned max_cbuf_atts = pan_get_max_cbufs(PAN_ARCH, max_tib_size); + VkSampleCountFlags sample_counts = + panvk_get_sample_counts(PAN_ARCH, max_tib_size, max_cbuf_atts, + max_cbuf_format); + + uint64_t os_page_size = 4096; + os_get_page_size(&os_page_size); + + /* Ensure that the max threads count per workgroup is valid for Bifrost */ + assert(PAN_ARCH > 8 || device->kmod.props.max_threads_per_wg <= 1024); + + float pointSizeRangeMin; + float pointSizeRangeMax; + + /* On v13+, point size handling changed entirely */ + if (PAN_ARCH >= 13) { + pointSizeRangeMin = 1.0; + pointSizeRangeMax = 1024.0; + } else { + pointSizeRangeMin = 0.125; + pointSizeRangeMax = 4095.9375; + } + + *properties = (struct vk_properties){ + .apiVersion = get_api_version(), + .driverVersion = vk_get_driver_version(), + .vendorID = instance->force_vk_vendor ? instance->force_vk_vendor : + ARM_VENDOR_ID, + + /* Collect arch_major, arch_minor, arch_rev and product_major, + * as done by the Arm driver. + */ + .deviceID = device->kmod.props.gpu_prod_id << 16, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + + /* Vulkan 1.0 limits */ + /* Maximum texture dimension is 2^16. */ + .maxImageDimension1D = (1 << 16), + .maxImageDimension2D = (1 << 16), + .maxImageDimension3D = (1 << 16), + .maxImageDimensionCube = (1 << 16), + .maxImageArrayLayers = (1 << 16), + /* Currently limited by the 1D texture size, which is 2^16. + * TODO: If we expose buffer views as 2D textures, we can increase the + * limit. + */ + .maxTexelBufferElements = (1 << 16), + /* Each uniform entry is 16-byte and the number of entries is encoded in a + * 12-bit field, with the minus(1) modifier, which gives 2^20. + */ + .maxUniformBufferRange = 1 << 20, + /* Storage buffer access is lowered to globals, so there's no limit here, + * except for the SW-descriptor we use to encode storage buffer + * descriptors, where the size is a 32-bit field. + */ + .maxStorageBufferRange = UINT32_MAX, + /* Vulkan 1.4 minimum. We currently implement push constants in terms of + * FAUs so we're limited by how many user-defined FAUs the hardware + * offers, minus driver-internal needs. If we ever need go to higher, + * we'll have to implement push constants in terms of both FAUs and global + * loads. + */ + .maxPushConstantsSize = 256, + /* On our kernel drivers we're limited by the available memory rather + * than available allocations. This is better expressed through memory + * properties and budget queries, and by returning + * VK_ERROR_OUT_OF_DEVICE_MEMORY when applicable, rather than + * this limit. + */ + .maxMemoryAllocationCount = UINT32_MAX, + /* On Mali, VkSampler objects do not use any resources other than host + * memory and host address space, availability of which can change + * significantly over time. + */ + .maxSamplerAllocationCount = UINT32_MAX, + /* A cache line. */ + .bufferImageGranularity = 64, + /* Sparse binding not supported yet. */ + .sparseAddressSpaceSize = 0, + /* On Bifrost, this is a software limit. We pick the minimum required by + * Vulkan, because Bifrost GPUs don't have unified descriptor tables, + * which forces us to agregatte all descriptors from all sets and dispatch + * them to per-type descriptor tables emitted at draw/dispatch time. The + * more sets we support the more copies we are likely to have to do at + * draw time. + * + * Valhall has native support for descriptor sets, and allows a maximum + * of 16 sets, but we reserve one for our internal use, so we have 15 + * left. + */ + .maxBoundDescriptorSets = PAN_ARCH <= 7 ? 4 : 15, + /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ + .maxDescriptorSetSamplers = UINT16_MAX, + /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots + * for our internal UBOs. + */ + .maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32, + .maxDescriptorSetUniformBuffers = UINT8_MAX - 32, + /* SSBOs are limited by the size of a uniform buffer which contains our + * panvk_ssbo_addr objects. + * panvk_ssbo_addr is 16-byte, and each uniform entry in the Mali UBO is + * 16-byte too. The number of entries is encoded in a 12-bit field, with + * a minus(1) modifier, which gives a maximum of 2^12 SSBO + * descriptors. + */ + .maxDescriptorSetStorageBuffers = 1 << 12, + /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ + .maxDescriptorSetSampledImages = UINT16_MAX, + /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two + * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images. + */ + .maxDescriptorSetStorageImages = 1 << 8, + /* A maximum of 8 color render targets, and one depth-stencil render + * target. + */ + .maxDescriptorSetInputAttachments = 9, + + /* We could theoretically use the maxDescriptor values here (except for + * UBOs where we're really limited to 256 on the shader side), but on + * Bifrost we have to copy some tables around, which comes at an extra + * memory/processing cost, so let's pick something smaller. + */ + .maxPerStageDescriptorInputAttachments = 9, + .maxPerStageDescriptorSampledImages = 256, + .maxPerStageDescriptorSamplers = 128, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorStorageImages = 32, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64, + + /* Software limits to keep VkCommandBuffer tracking sane. */ + .maxDescriptorSetUniformBuffersDynamic = 16, + .maxDescriptorSetStorageBuffersDynamic = 8, + /* Software limit to keep VkCommandBuffer tracking sane. The HW supports + * up to 2^9 vertex attributes. + */ + .maxVertexInputAttributes = 16, + .maxVertexInputBindings = 16, + /* MALI_ATTRIBUTE::offset is 32-bit. */ + .maxVertexInputAttributeOffset = UINT32_MAX, + /* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */ + .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE, + /* 32 vec4 varyings. */ + .maxVertexOutputComponents = 128, + /* Tesselation shaders not supported. */ + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + /* Geometry shaders not supported. */ + .maxGeometryShaderInvocations = 0, + .maxGeometryInputComponents = 0, + .maxGeometryOutputComponents = 0, + .maxGeometryOutputVertices = 0, + .maxGeometryTotalOutputComponents = 0, + /* 32 vec4 varyings. */ + .maxFragmentInputComponents = 128, + /* 8 render targets. */ + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = max_cbuf_atts, + /* 8 render targets, 2^12 storage buffers and 2^8 storage images (see + * above). + */ + .maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8), + /* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to + * (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't + * really make sense to expose this amount of memory, especially since + * it's backed by global memory anyway. + */ + .maxComputeSharedMemorySize = 32768, + /* Software limit to meet Vulkan 1.0 requirements. We split the + * dispatch in several jobs if it's too big. + */ + .maxComputeWorkGroupCount = {65535, 65535, 65535}, + + /* We could also split into serveral jobs but this has many limitations. + * As such we limit to the max threads per workgroup supported by the GPU. + */ + .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg, + .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg, + device->kmod.props.max_threads_per_wg, + device->kmod.props.max_threads_per_wg}, + /* 8-bit subpixel precision. */ + .subPixelPrecisionBits = 8, + .subTexelPrecisionBits = 8, + .mipmapPrecisionBits = 8, + /* Software limit. */ + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = PAN_ARCH >= 10 ? UINT32_MAX : 1, + .maxSamplerLodBias = (float)INT16_MAX / 256.0f, + .maxSamplerAnisotropy = 16, + .maxViewports = 1, + /* Same as the framebuffer limit. */ + .maxViewportDimensions = {(1 << 14), (1 << 14)}, + /* Encoded in a 16-bit signed integer. */ + .viewportBoundsRange = {INT16_MIN, INT16_MAX}, + .viewportSubPixelBits = 0, + /* Align on a page. */ + .minMemoryMapAlignment = os_page_size, + /* Some compressed texture formats require 128-byte alignment. */ + .minTexelBufferOffsetAlignment = 64, + /* Always aligned on a uniform slot (vec4). */ + .minUniformBufferOffsetAlignment = 16, + /* Lowered to global accesses, which happen at the 32-bit granularity. */ + .minStorageBufferOffsetAlignment = 4, + /* Signed 4-bit value. */ + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = -0.5, + .maxInterpolationOffset = 0.5, + .subPixelInterpolationOffsetBits = 8, + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = 256, + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = max_cbuf_atts, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = sample_counts, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, + .timestampPeriod = 0, + .maxClipDistances = 0, + .maxCullDistances = 0, + .maxCombinedClipAndCullDistances = 0, + .discreteQueuePriorities = 2, + .pointSizeRange = {pointSizeRangeMin, pointSizeRangeMax}, + .lineWidthRange = {0.0, 7.9921875}, + .pointSizeGranularity = (1.0 / 16.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = true, + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 64, + .optimalBufferCopyRowPitchAlignment = 64, + .nonCoherentAtomSize = 64, + + /* Vulkan 1.0 sparse properties */ + .sparseResidencyNonResidentStrict = false, + .sparseResidencyAlignedMipSize = false, + .sparseResidencyStandard2DBlockShape = false, + .sparseResidencyStandard2DMultisampleBlockShape = false, + .sparseResidencyStandard3DBlockShape = false, + + /* Vulkan 1.1 properties */ + .subgroupSize = pan_subgroup_size(PAN_ARCH), + /* We only support VS, FS, and CS. + * + * The HW may spawn VS invocations for non-existing indices, which could + * be observed through subgroup ops (though the user can observe them + * through infinte loops anyway), so subgroup ops can't be supported in + * VS. + */ + .subgroupSupportedStages = + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT, + .subgroupSupportedOperations = + VK_SUBGROUP_FEATURE_BASIC_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | + VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | + VK_SUBGROUP_FEATURE_CLUSTERED_BIT | + VK_SUBGROUP_FEATURE_QUAD_BIT | + VK_SUBGROUP_FEATURE_ROTATE_BIT | + VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT, + .subgroupQuadOperationsInAllStages = false, + .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, + .maxMultiviewViewCount = 8, + .maxMultiviewInstanceIndex = UINT32_MAX, + .protectedNoFault = false, + .maxPerSetDescriptors = UINT16_MAX, + /* Our buffer size fields allow only this much */ + .maxMemoryAllocationSize = UINT32_MAX, + + /* Vulkan 1.2 properties */ + .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | + VK_RESOLVE_MODE_AVERAGE_BIT | + VK_RESOLVE_MODE_MIN_BIT | + VK_RESOLVE_MODE_MAX_BIT, + .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | + VK_RESOLVE_MODE_MIN_BIT | + VK_RESOLVE_MODE_MAX_BIT, + .independentResolveNone = true, + .independentResolve = true, + /* VK_KHR_driver_properties */ + .driverID = VK_DRIVER_ID_MESA_PANVK, + .conformanceVersion = get_conformance_version(), + .denormBehaviorIndependence = PAN_ARCH >= 9 ? + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE : + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + .shaderSignedZeroInfNanPreserveFloat16 = true, + .shaderSignedZeroInfNanPreserveFloat32 = true, + .shaderSignedZeroInfNanPreserveFloat64 = false, + .shaderDenormPreserveFloat16 = true, + .shaderDenormPreserveFloat32 = true, + .shaderDenormPreserveFloat64 = true, + .shaderDenormFlushToZeroFloat16 = true, + .shaderDenormFlushToZeroFloat32 = true, + .shaderDenormFlushToZeroFloat64 = true, + .shaderRoundingModeRTEFloat16 = true, + .shaderRoundingModeRTEFloat32 = true, + .shaderRoundingModeRTEFloat64 = false, + .shaderRoundingModeRTZFloat16 = true, + .shaderRoundingModeRTZFloat32 = true, + .shaderRoundingModeRTZFloat64 = false, + /* XXX: VK_EXT_descriptor_indexing */ + .maxUpdateAfterBindDescriptorsInAllPools = 0, + .shaderUniformBufferArrayNonUniformIndexingNative = false, + .shaderSampledImageArrayNonUniformIndexingNative = false, + .shaderStorageBufferArrayNonUniformIndexingNative = false, + .shaderStorageImageArrayNonUniformIndexingNative = false, + .shaderInputAttachmentArrayNonUniformIndexingNative = false, + .robustBufferAccessUpdateAfterBind = false, + .quadDivergentImplicitLod = false, + .maxPerStageDescriptorUpdateAfterBindSamplers = 0, + .maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0, + .maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0, + .maxPerStageDescriptorUpdateAfterBindSampledImages = 0, + .maxPerStageDescriptorUpdateAfterBindStorageImages = 0, + .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0, + .maxPerStageUpdateAfterBindResources = 0, + .maxDescriptorSetUpdateAfterBindSamplers = 0, + .maxDescriptorSetUpdateAfterBindUniformBuffers = 0, + .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0, + .maxDescriptorSetUpdateAfterBindStorageBuffers = 0, + .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0, + .maxDescriptorSetUpdateAfterBindSampledImages = 0, + .maxDescriptorSetUpdateAfterBindStorageImages = 0, + .maxDescriptorSetUpdateAfterBindInputAttachments = 0, + .filterMinmaxSingleComponentFormats = PAN_ARCH >= 10, + .filterMinmaxImageComponentMapping = PAN_ARCH >= 10, + .maxTimelineSemaphoreValueDifference = INT64_MAX, + .framebufferIntegerColorSampleCounts = sample_counts, + + /* Vulkan 1.3 properties */ + /* XXX: 1.3 support */ + + /* VK_EXT_subgroup_size_control */ + .minSubgroupSize = pan_subgroup_size(PAN_ARCH), + .maxSubgroupSize = pan_subgroup_size(PAN_ARCH), + .maxComputeWorkgroupSubgroups = + device->kmod.props.max_threads_per_wg / pan_subgroup_size(PAN_ARCH), + .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT, + + /* XXX: VK_EXT_inline_uniform_block */ + .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE, + .maxPerStageDescriptorInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, + .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, + .maxDescriptorSetInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, + .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, + .maxInlineUniformTotalSize = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE, + + /* VK_KHR_shader_integer_dot_product */ + .integerDotProduct8BitUnsignedAccelerated = true, + .integerDotProduct8BitSignedAccelerated = true, + .integerDotProduct4x8BitPackedUnsignedAccelerated = true, + .integerDotProduct4x8BitPackedSignedAccelerated = true, + + /* XXX: VK_EXT_texel_buffer_alignment */ + .storageTexelBufferOffsetAlignmentBytes = 64, + .storageTexelBufferOffsetSingleTexelAlignment = false, + .uniformTexelBufferOffsetAlignmentBytes = 64, + .uniformTexelBufferOffsetSingleTexelAlignment = false, + + /* VK_KHR_maintenance4 */ + .maxBufferSize = 1 << 30, + + /* VK_KHR_line_rasterization */ + .lineSubPixelPrecisionBits = 8, + + /* VK_EXT_custom_border_color */ + .maxCustomBorderColorSamplers = 32768, + + /* VK_EXT_graphics_pipeline_library */ + .graphicsPipelineLibraryFastLinking = true, + .graphicsPipelineLibraryIndependentInterpolationDecoration = true, + + /* VK_EXT_pipeline_robustness */ + .defaultRobustnessStorageBuffers = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, + .defaultRobustnessUniformBuffers = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, + .defaultRobustnessVertexInputs = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, + .defaultRobustnessImages = + VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT, + + /* VK_EXT_provoking_vertex */ + .provokingVertexModePerPipeline = false, + .transformFeedbackPreservesTriangleFanProvokingVertex = false, + + /* VK_KHR_vertex_attribute_divisor */ + /* We will have to restrict this a bit for multiview */ + .maxVertexAttribDivisor = UINT32_MAX, + .supportsNonZeroFirstInstance = true, + + /* VK_KHR_push_descriptor */ + .maxPushDescriptors = MAX_PUSH_DESCRIPTORS, + }; + + snprintf(properties->deviceName, sizeof(properties->deviceName), "%s", + device->name); + + memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE); + + const struct { + uint16_t vendor_id; + uint32_t device_id; + uint8_t pad[8]; + } dev_uuid = { + .vendor_id = ARM_VENDOR_ID, + .device_id = device->model->gpu_id, + }; + + STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE); + memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE); + STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE); + memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE); + + snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk"); + snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE, + "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); + + /* VK_EXT_physical_device_drm */ + if (device->drm.primary_rdev) { + properties->drmHasPrimary = true; + properties->drmPrimaryMajor = major(device->drm.primary_rdev); + properties->drmPrimaryMinor = minor(device->drm.primary_rdev); + } + if (device->drm.render_rdev) { + properties->drmHasRender = true; + properties->drmRenderMajor = major(device->drm.render_rdev); + properties->drmRenderMinor = minor(device->drm.render_rdev); + } + + /* VK_EXT_shader_module_identifier */ + STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == + sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); + memcpy(properties->shaderModuleIdentifierAlgorithmUUID, + vk_shaderModuleIdentifierAlgorithmUUID, + sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); +}