mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 05:00:09 +01:00
intel/executor: Update bfloat example
Elaborate on the packed/unpack restrictions, use ADD(x, 0.0f) as a workaround for F->BF conversion. Reviewed-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34506>
This commit is contained in:
parent
fbe5d559bd
commit
fafdd24285
1 changed files with 25 additions and 19 deletions
|
|
@ -3,30 +3,36 @@ local r = execute {
|
|||
@id g3
|
||||
|
||||
mov(8) g4<1>F g3<1>UD {A@1};
|
||||
mov(8) g5<1>F g4.2<0,1,0>F {A@1};
|
||||
mov(8) g5<1>F g4.1<0,1,0>F {A@1};
|
||||
|
||||
// Moving from F to BF (packed) doesn't work, but that's not much
|
||||
// of a problem because BFloat16 is a cropped version of Float32.
|
||||
//
|
||||
// So instead of
|
||||
//
|
||||
// mov(8) g10<1>BF g4<1>F
|
||||
//
|
||||
// use MOV with UW and appropriate offset.
|
||||
mov(8) g10<1>UW g4.1<2>UW {A@1};
|
||||
// Converting F to unpacked BF works, but as will be
|
||||
// illustrated, is not very useful.
|
||||
|
||||
mad(8) g11<1>BF g4<1>F g10<1>BF g5<1>F {A@1};
|
||||
mov(8) g10<2>BF g4<1>F {A@1};
|
||||
|
||||
// With exception of DPAS, instructions need to have at
|
||||
// least one non-BF operand and the operands must be packed.
|
||||
|
||||
mov(8) g11<1>UW g10<2>UW {A@1}; // Pack it!
|
||||
add(8) g12<1>BF g11<1>BF g4<1>F {A@1};
|
||||
|
||||
// For similar reason as above, instead of
|
||||
//
|
||||
// mov(8) g20<1>F g12<1>BF
|
||||
//
|
||||
// use a SHL unpacking into UD.
|
||||
shl(8) g20<1>UD g12<1>UW 16UW {A@1};
|
||||
// Converting F to packed BF doesn't work, so add the value
|
||||
// to 0.0f instead. This will preserve the NaN.
|
||||
|
||||
mov(8) g21<1>UD g20<1>F {A@1};
|
||||
@write g3 g21
|
||||
add(8) g20<1>BF g4<1>F 0F {A@1}; // F -> BF.
|
||||
|
||||
// Converting BF to F doesn't work, so for a packed source,
|
||||
// shift-left the bits to expand it into an UD instead.
|
||||
|
||||
shl(8) g30<1>UD g20<1>UW 16UW {A@1}; // BF -> F.
|
||||
|
||||
mad(8) g40<1>BF g12<1>BF g20<1>BF g5<1>F {A@1};
|
||||
add(8) g41<1>BF g40<1>BF g30<1>F {A@1};
|
||||
|
||||
shl(8) g42<1>UD g41<1>UW 16UW {A@1}; // BF -> F.
|
||||
|
||||
mov(8) g43<1>UD g42<1>F {A@1};
|
||||
@write g3 g43
|
||||
|
||||
@eot
|
||||
]]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue