mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 02:58:05 +02:00
radv: reorder cmat properties according to performance
On GFX12, int8 is twice as fast as fp16/bf16. On GFX11, they have the same throughput, but int8 at least still uses less registers. Also reorder 16bit accumulators before 32bit, because they use less registers on GFX12. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37002>
This commit is contained in:
parent
5a10142a9f
commit
8a5178755e
1 changed files with 18 additions and 13 deletions
|
|
@ -3054,6 +3054,11 @@ static void fill_array_sizes_structs(const struct radv_physical_device *pdev,
|
|||
struct __vk_outarray *base,
|
||||
void (*array_size_cb)(struct __vk_outarray *base, struct matrix_prop *prop))
|
||||
{
|
||||
/* The Vulkan spec says:
|
||||
* If some types are preferred over other types (e.g. for performance),
|
||||
* they should appear earlier in the list enumerated by
|
||||
* vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR.
|
||||
*/
|
||||
struct matrix_prop prop;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
|
|
@ -3068,19 +3073,6 @@ static void fill_array_sizes_structs(const struct radv_physical_device *pdev,
|
|||
}
|
||||
}
|
||||
|
||||
for (unsigned bfloat = 0; bfloat < 2; bfloat++) {
|
||||
for (unsigned fp32 = 0; fp32 < 2; fp32++) {
|
||||
prop.saturate = false;
|
||||
prop.a_type = prop.b_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
|
||||
prop.c_type = prop.r_type = fp32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : prop.a_type;
|
||||
|
||||
if (pdev->info.gfx_level < GFX12 && bfloat)
|
||||
continue; /* BF16 isn't working precisely on GFX11. */
|
||||
|
||||
(*array_size_cb)(base, &prop);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned asigned = 0; asigned < 2; asigned++) {
|
||||
for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
|
||||
for (unsigned csigned = 0; csigned < 2; csigned++) {
|
||||
|
|
@ -3098,6 +3090,19 @@ static void fill_array_sizes_structs(const struct radv_physical_device *pdev,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned fp32 = 0; fp32 < 2; fp32++) {
|
||||
for (unsigned bfloat = 0; bfloat < 2; bfloat++) {
|
||||
prop.saturate = false;
|
||||
prop.a_type = prop.b_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
|
||||
prop.c_type = prop.r_type = fp32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : prop.a_type;
|
||||
|
||||
if (pdev->info.gfx_level < GFX12 && bfloat)
|
||||
continue; /* BF16 isn't working precisely on GFX11. */
|
||||
|
||||
(*array_size_cb)(base, &prop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue