From b8a7fba41f8578117d63ea37b2904f9f66d9fc63 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 18 Apr 2023 20:11:41 -0400 Subject: [PATCH] intel/compiler/gfx12.5+: Lower 64-bit cluster_broadcast with 32-bit ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For MTL (verx10 == 125), float64 is supported, but int64 is not. Therefore we need to lower cluster broadcast using 32-bit int ops. For gfx12.5+ platforms that support int64, the register regions used by cluster broadcast aren't supported by the 64-bit pipeline. On MTL, dEQP-VK.subgroups.clustered.*_double* and dEQP-VK.subgroups.clustered.*_dvec* were failing to validate the compiled shader in debug mode, and reportedly gpu-hanging in release mode. With this change dEQP-VK.subgroups.clustered.*_double* passed all 48 tests and dEQP-VK.subgroups.clustered.*_dvec* passed all 140 tests on MTL. Rework: * Move from generator to brw_fs_lower_regioning.cpp. (Suggested by Francisco) * Apply to verx10 >= 125.. (Suggested by Francisco) Cc: 23.1 Signed-off-by: Jordan Justen Reviewed-by: Marcin Ĺšlusarz (v1) Reviewed-by: Francisco Jerez Part-of: (cherry picked from commit fcb72ffd0c61e2b3226306fae37b85ab4982a39e) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs_lower_regioning.cpp | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 22c4e1f5f89..0657192dfe6 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -544,7 +544,7 @@ "description": "intel/compiler/gfx12.5+: Lower 64-bit cluster_broadcast with 32-bit ops", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index a30d1837799..a86e0926863 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -174,10 +174,17 @@ namespace { * integer DWord multiply, indirect addressing must not be * used." * + * For MTL (verx10 == 125), float64 is supported, but int64 is not. + * Therefore we need to lower cluster broadcast using 32-bit int ops. + * + * For gfx12.5+ platforms that support int64, the register regions + * used by cluster broadcast aren't supported by the 64-bit pipeline. + * * Work around the above and handle platforms that don't * support 64-bit types at all. */ - if ((!has_64bit || devinfo->platform == INTEL_PLATFORM_CHV || + if ((!has_64bit || devinfo->verx10 >= 125 || + devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4) return BRW_REGISTER_TYPE_UD; else