radv: reorder cmat properties according to performance
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

On GFX12, int8 is twice as fast as fp16/bf16.
On GFX11, they have the same throughput, but int8 at least still uses
less registers.

Also reorder 16bit accumulators before 32bit, because they use less
registers on GFX12.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37002>
This commit is contained in:
Georg Lehmann 2025-08-26 10:02:16 +02:00 committed by Marge Bot
parent 5a10142a9f
commit 8a5178755e

View file

@ -3054,6 +3054,11 @@ static void fill_array_sizes_structs(const struct radv_physical_device *pdev,
struct __vk_outarray *base,
void (*array_size_cb)(struct __vk_outarray *base, struct matrix_prop *prop))
{
/* The Vulkan spec says:
* If some types are preferred over other types (e.g. for performance),
* they should appear earlier in the list enumerated by
* vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR.
*/
struct matrix_prop prop;
if (pdev->info.gfx_level >= GFX12) {
@ -3068,19 +3073,6 @@ static void fill_array_sizes_structs(const struct radv_physical_device *pdev,
}
}
for (unsigned bfloat = 0; bfloat < 2; bfloat++) {
for (unsigned fp32 = 0; fp32 < 2; fp32++) {
prop.saturate = false;
prop.a_type = prop.b_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
prop.c_type = prop.r_type = fp32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : prop.a_type;
if (pdev->info.gfx_level < GFX12 && bfloat)
continue; /* BF16 isn't working precisely on GFX11. */
(*array_size_cb)(base, &prop);
}
}
for (unsigned asigned = 0; asigned < 2; asigned++) {
for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
for (unsigned csigned = 0; csigned < 2; csigned++) {
@ -3098,6 +3090,19 @@ static void fill_array_sizes_structs(const struct radv_physical_device *pdev,
}
}
}
for (unsigned fp32 = 0; fp32 < 2; fp32++) {
for (unsigned bfloat = 0; bfloat < 2; bfloat++) {
prop.saturate = false;
prop.a_type = prop.b_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
prop.c_type = prop.r_type = fp32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : prop.a_type;
if (pdev->info.gfx_level < GFX12 && bfloat)
continue; /* BF16 isn't working precisely on GFX11. */
(*array_size_cb)(base, &prop);
}
}
}
VKAPI_ATTR VkResult VKAPI_CALL