nak: improve fp16 latencies on Ampere

Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37941>
This commit is contained in:
Karol Herbst 2025-10-21 13:27:49 +02:00 committed by Marge Bot
parent 85480200f8
commit cf4df97093

View file

@ -347,17 +347,13 @@ impl RegLatencySM80 {
panic!("Illegal writer in sm80 raw");
}
},
FP16 | FP16_Alu => {
match writer {
FP16 => match writer {
CoupledAlu => 5,
CoupledDisp64 => 6,
CoupledFMA => 5,
IMADWideWriteDL => 3,
IMADWideWriteDH => 5,
// these next two are 4 in the spreadsheet, 5 passes test
// dEQP-VK.spirv_assembly.instruction.graphics.float16.arithmetic_1.fsign_vert
// dEQP-VK.glsl.builtin.precision_fp16_storage16b.faceforward.compute.vec3
FP16 => 5,
FP16 => 4,
FP16_Alu => 5,
FP16_F32 => 5,
HFMA2_MMA => 10,
@ -373,8 +369,30 @@ impl RegLatencySM80 {
_ => {
panic!("Illegal writer in sm80 raw");
}
},
FP16_Alu => match writer {
CoupledAlu => 5,
CoupledDisp64 => 6,
CoupledFMA => 5,
IMADWideWriteDL => 3,
IMADWideWriteDH => 5,
FP16 => 5,
FP16_Alu => 4,
FP16_F32 => 5,
HFMA2_MMA => 10,
RedirectedFP64 => 10,
Clmad => 12,
IMMA_88 => 13,
MMA_1x_collect => 16,
MMA_2x_collect => 24,
DMMA => 25,
Cbu => 1,
Decoupled => 1,
DecoupledAgu => 1,
_ => {
panic!("Illegal writer in sm80 raw");
}
}
},
FP16_F32 => match writer {
CoupledAlu => 5,
CoupledDisp64 => 6,