mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 13:40:16 +01:00
intel/executor: Update bfloat example
Elaborate on the packed/unpack restrictions, use ADD(x, 0.0f) as a workaround for F->BF conversion. Reviewed-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34506>
This commit is contained in:
parent
fbe5d559bd
commit
fafdd24285
1 changed files with 25 additions and 19 deletions
|
|
@ -3,30 +3,36 @@ local r = execute {
|
||||||
@id g3
|
@id g3
|
||||||
|
|
||||||
mov(8) g4<1>F g3<1>UD {A@1};
|
mov(8) g4<1>F g3<1>UD {A@1};
|
||||||
mov(8) g5<1>F g4.2<0,1,0>F {A@1};
|
mov(8) g5<1>F g4.1<0,1,0>F {A@1};
|
||||||
|
|
||||||
// Moving from F to BF (packed) doesn't work, but that's not much
|
// Converting F to unpacked BF works, but as will be
|
||||||
// of a problem because BFloat16 is a cropped version of Float32.
|
// illustrated, is not very useful.
|
||||||
//
|
|
||||||
// So instead of
|
|
||||||
//
|
|
||||||
// mov(8) g10<1>BF g4<1>F
|
|
||||||
//
|
|
||||||
// use MOV with UW and appropriate offset.
|
|
||||||
mov(8) g10<1>UW g4.1<2>UW {A@1};
|
|
||||||
|
|
||||||
mad(8) g11<1>BF g4<1>F g10<1>BF g5<1>F {A@1};
|
mov(8) g10<2>BF g4<1>F {A@1};
|
||||||
|
|
||||||
|
// With exception of DPAS, instructions need to have at
|
||||||
|
// least one non-BF operand and the operands must be packed.
|
||||||
|
|
||||||
|
mov(8) g11<1>UW g10<2>UW {A@1}; // Pack it!
|
||||||
add(8) g12<1>BF g11<1>BF g4<1>F {A@1};
|
add(8) g12<1>BF g11<1>BF g4<1>F {A@1};
|
||||||
|
|
||||||
// For similar reason as above, instead of
|
// Converting F to packed BF doesn't work, so add the value
|
||||||
//
|
// to 0.0f instead. This will preserve the NaN.
|
||||||
// mov(8) g20<1>F g12<1>BF
|
|
||||||
//
|
|
||||||
// use a SHL unpacking into UD.
|
|
||||||
shl(8) g20<1>UD g12<1>UW 16UW {A@1};
|
|
||||||
|
|
||||||
mov(8) g21<1>UD g20<1>F {A@1};
|
add(8) g20<1>BF g4<1>F 0F {A@1}; // F -> BF.
|
||||||
@write g3 g21
|
|
||||||
|
// Converting BF to F doesn't work, so for a packed source,
|
||||||
|
// shift-left the bits to expand it into an UD instead.
|
||||||
|
|
||||||
|
shl(8) g30<1>UD g20<1>UW 16UW {A@1}; // BF -> F.
|
||||||
|
|
||||||
|
mad(8) g40<1>BF g12<1>BF g20<1>BF g5<1>F {A@1};
|
||||||
|
add(8) g41<1>BF g40<1>BF g30<1>F {A@1};
|
||||||
|
|
||||||
|
shl(8) g42<1>UD g41<1>UW 16UW {A@1}; // BF -> F.
|
||||||
|
|
||||||
|
mov(8) g43<1>UD g42<1>F {A@1};
|
||||||
|
@write g3 g43
|
||||||
|
|
||||||
@eot
|
@eot
|
||||||
]]
|
]]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue