mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 08:20:12 +01:00
nak: fix MMA latencies on Ampere
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Fixes: 7a01953a39 ("nak: Add Ampere and Ada latency information")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37941>
This commit is contained in:
parent
cf4df97093
commit
e7dca5a6ca
1 changed files with 56 additions and 33 deletions
|
|
@ -602,8 +602,9 @@ impl RegLatencySM80 {
|
|||
| FP16 | FP16_Alu | FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 3),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -617,8 +618,9 @@ impl RegLatencySM80 {
|
|||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 1),
|
||||
Clmad => pred(has_pred, 5, 1),
|
||||
IMMA_88 | MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 12,
|
||||
IMMA_88 => 8,
|
||||
MMA_1x_collect => 11,
|
||||
MMA_2x_collect => 19,
|
||||
DMMA => 20,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -634,8 +636,9 @@ impl RegLatencySM80 {
|
|||
IMADWideWriteDH => pred(has_pred, 1, 1),
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 3),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -653,8 +656,9 @@ impl RegLatencySM80 {
|
|||
FP16 | FP16_Alu | FP16_F32 => pred(has_pred, 1, 2),
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 5, 3),
|
||||
Clmad => pred(has_pred, 5, 5),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 3),
|
||||
MMA_2x_collect => pred(has_pred, 12, 3),
|
||||
IMMA_88 => pred(has_pred, 8, 3),
|
||||
MMA_1x_collect => pred(has_pred, 11, 3),
|
||||
MMA_2x_collect => pred(has_pred, 19, 3),
|
||||
DMMA => pred(has_pred, 20, 3),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -671,8 +675,9 @@ impl RegLatencySM80 {
|
|||
| FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 5, 1),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -689,8 +694,9 @@ impl RegLatencySM80 {
|
|||
| FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 3),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -704,8 +710,9 @@ impl RegLatencySM80 {
|
|||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 2),
|
||||
Clmad => pred(has_pred, 5, 2),
|
||||
IMMA_88 | MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 12,
|
||||
IMMA_88 => 8,
|
||||
MMA_1x_collect => 11,
|
||||
MMA_2x_collect => 19,
|
||||
DMMA => 20,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -720,8 +727,9 @@ impl RegLatencySM80 {
|
|||
HFMA2_MMA => 2,
|
||||
RedirectedFP64 => 3,
|
||||
Clmad => pred(has_pred, 5, 1),
|
||||
IMMA_88 | MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 12,
|
||||
IMMA_88 => 8,
|
||||
MMA_1x_collect => 11,
|
||||
MMA_2x_collect => 19,
|
||||
DMMA => 20,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -736,8 +744,9 @@ impl RegLatencySM80 {
|
|||
HFMA2_MMA => 2,
|
||||
RedirectedFP64 => 2,
|
||||
Clmad => pred(has_pred, 4, 2),
|
||||
IMMA_88 | MMA_1x_collect => 7,
|
||||
MMA_2x_collect => 11,
|
||||
IMMA_88 => 7,
|
||||
MMA_1x_collect => 10,
|
||||
MMA_2x_collect => 18,
|
||||
DMMA => 19,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -750,8 +759,9 @@ impl RegLatencySM80 {
|
|||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 | Clmad => 2,
|
||||
IMMA_88 | MMA_1x_collect => 7,
|
||||
MMA_2x_collect => 11,
|
||||
IMMA_88 => 7,
|
||||
MMA_1x_collect => 10,
|
||||
MMA_2x_collect => 18,
|
||||
DMMA => 19,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -764,8 +774,9 @@ impl RegLatencySM80 {
|
|||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 | Clmad => 2,
|
||||
IMMA_88 | MMA_1x_collect => 4,
|
||||
MMA_2x_collect => 8,
|
||||
IMMA_88 => 4,
|
||||
MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 16,
|
||||
DMMA => 17,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -778,8 +789,9 @@ impl RegLatencySM80 {
|
|||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 | Clmad => 2,
|
||||
IMMA_88 | MMA_1x_collect => 4,
|
||||
MMA_2x_collect => 8,
|
||||
IMMA_88 => 4,
|
||||
MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 16,
|
||||
DMMA => 16,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -795,8 +807,9 @@ impl RegLatencySM80 {
|
|||
}
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 1, 9),
|
||||
Clmad => pred(has_pred, 1, 11),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 7, 6),
|
||||
MMA_2x_collect => pred(has_pred, 11, 6),
|
||||
IMMA_88 => pred(has_pred, 7, 6),
|
||||
MMA_1x_collect => pred(has_pred, 10, 5),
|
||||
MMA_2x_collect => pred(has_pred, 18, 5),
|
||||
DMMA => pred(has_pred, 19, 6),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -815,15 +828,25 @@ impl RegLatencySM80 {
|
|||
use RegLatencySM80::*;
|
||||
match writer {
|
||||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 => match reader {
|
||||
MMA_2x_collect => 7,
|
||||
_ => 1,
|
||||
},
|
||||
Clmad | IMMA_88 | MMA_1x_collect | MMA_2x_collect | DMMA | Cbu
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA => {
|
||||
match reader {
|
||||
MMA_2x_collect => 7,
|
||||
_ => 1,
|
||||
}
|
||||
}
|
||||
RedirectedFP64 => 1,
|
||||
Clmad | IMMA_88 | MMA_1x_collect | MMA_2x_collect | DMMA
|
||||
| Decoupled | DecoupledAgu => match reader {
|
||||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideReadAB
|
||||
| IMADWideReadCL | IMADWideReadCH => 2,
|
||||
| IMADWideReadCL | IMADWideReadCH | FP16 | FP16_Alu
|
||||
| FP16_F32 | HFMA2_MMA => 2,
|
||||
_ => 1,
|
||||
},
|
||||
Cbu => match reader {
|
||||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideReadAB
|
||||
| IMADWideReadCL | IMADWideReadCH | FP16 | FP16_Alu
|
||||
| FP16_F32 | HFMA2_MMA => 2,
|
||||
MMA_2x_collect => 7,
|
||||
_ => 1,
|
||||
},
|
||||
_ => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue