mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
intel/dev: Enable VK_KHR_cooperative_matrix on all Gfx9+ GPUs
Gfx12.5 (DG2) will use DPAS instructions to accelerate the implementation. Earlier platforms will use equivalent discrete instructions (basically subgroup operations). Gfx12 (Tigerlake) will use DP4A for 8-bit integer matrix multiplication. Older platforms, which lack DP4A, will use a suboptimal instruction sequence. There is plenty of room for improvement here. On DG2 (Gfx12.5) gets the following results from the CTS: Test run totals: Passed: 1642/13982 (11.7%) Failed: 0/13982 (0.0%) Not supported: 12340/13982 (88.3%) Warnings: 0/13982 (0.0%) Waived: 0/13982 (0.0%) On DG2 (Gfx12.5) with forced lowering, Raptor Lake (Gfx12) and Ice Lake (Gfx11): Test run totals: Passed: 1662/13982 (11.9%) Failed: 0/13982 (0.0%) Not supported: 12320/13982 (88.1%) Warnings: 0/13982 (0.0%) Waived: 0/13982 (0.0%) The difference in the number of tests run is due to saturatingAccumulation not being set on DG2 when DPAS is used. There is a comment in "intel/dev: Advertise integer configs with saturatingAccumulation too" that explains how this could be added should the need arise. v2: Prefix type names with INTEL_CMAT_. Suggested by Lionel. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>
This commit is contained in:
parent
8ea032b78e
commit
c6d44284aa
1 changed files with 27 additions and 3 deletions
|
|
@ -614,7 +614,13 @@ static const struct intel_device_info intel_device_info_chv = {
|
|||
GFX8_FEATURES, \
|
||||
GFX9_HW_INFO, \
|
||||
.has_sample_with_hiz = true, \
|
||||
.has_illegal_ccs_values = true
|
||||
.has_illegal_ccs_values = true, \
|
||||
.cooperative_matrix_configurations = { \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
|
||||
}
|
||||
|
||||
static const struct intel_device_info intel_device_info_skl_gt1 = {
|
||||
GFX9_FEATURES, .gt = 1,
|
||||
|
|
@ -840,7 +846,13 @@ static const struct intel_device_info intel_device_info_cfl_gt3 = {
|
|||
.has_illegal_ccs_values = true, \
|
||||
.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
|
||||
.num_subslices = _subslices, \
|
||||
.max_eus_per_subslice = 8
|
||||
.max_eus_per_subslice = 8, \
|
||||
.cooperative_matrix_configurations = { \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
|
||||
}
|
||||
|
||||
#define GFX11_URB_MIN_MAX_ENTRIES \
|
||||
.min_entries = { \
|
||||
|
|
@ -967,6 +979,12 @@ static const struct intel_device_info intel_device_info_ehl_2x4 = {
|
|||
.scanout = PAT_ENTRY(1, WC, NONE), \
|
||||
.writeback_incoherent = PAT_ENTRY(0, WB, 2WAY), \
|
||||
.writecombining = PAT_ENTRY(1, WC, NONE), \
|
||||
}, \
|
||||
.cooperative_matrix_configurations = { \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
|
||||
}
|
||||
|
||||
#define dual_subslices(args...) { args, }
|
||||
|
|
@ -1099,7 +1117,13 @@ static const struct intel_device_info intel_device_info_sg1 = {
|
|||
.has_lsc = true, \
|
||||
.has_local_mem = true, \
|
||||
.has_aux_map = false, \
|
||||
.simulator_id = 29
|
||||
.simulator_id = 29, \
|
||||
.cooperative_matrix_configurations = { \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
|
||||
{ SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
|
||||
}
|
||||
|
||||
#define DG2_FEATURES \
|
||||
/* (Sub)slice info comes from the kernel topology info */ \
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue