diff --git a/src/intel/executor/examples/bfloat.lua b/src/intel/executor/examples/bfloat.lua
index a624b2e850f..b0cc7365711 100644
--- a/src/intel/executor/examples/bfloat.lua
+++ b/src/intel/executor/examples/bfloat.lua
@@ -3,30 +3,36 @@ local r = execute {
     @id      g3
 
     mov(8)   g4<1>F    g3<1>UD                       {A@1};
-    mov(8)   g5<1>F    g4.2<0,1,0>F                  {A@1};
+    mov(8)   g5<1>F    g4.1<0,1,0>F                  {A@1};
 
-    // Moving from F to BF (packed) doesn't work, but that's not much
-    // of a problem because BFloat16 is a cropped version of Float32.
-    //
-    // So instead of
-    //
-    //     mov(8)   g10<1>BF  g4<1>F
-    //
-    // use MOV with UW and appropriate offset.
-    mov(8)   g10<1>UW  g4.1<2>UW                     {A@1};
+    // Converting F to unpacked BF works, but as will be
+    // illustrated, is not very useful.
 
-    mad(8)   g11<1>BF  g4<1>F    g10<1>BF  g5<1>F    {A@1};
+    mov(8)   g10<2>BF  g4<1>F                        {A@1};
+
+    // With exception of DPAS, instructions need to have at
+    // least one non-BF operand and the operands must be packed.
+
+    mov(8)   g11<1>UW  g10<2>UW                      {A@1};  // Pack it!
     add(8)   g12<1>BF  g11<1>BF  g4<1>F              {A@1};
 
-    // For similar reason as above, instead of
-    //
-    //     mov(8)   g20<1>F   g12<1>BF
-    //
-    // use a SHL unpacking into UD.
-    shl(8)   g20<1>UD  g12<1>UW  16UW                {A@1};
+    // Converting F to packed BF doesn't work, so add the value
+    // to 0.0f instead.  This will preserve the NaN.
 
-    mov(8)   g21<1>UD  g20<1>F                       {A@1};
-    @write   g3        g21
+    add(8)   g20<1>BF  g4<1>F    0F                  {A@1}; // F -> BF.
+
+    // Converting BF to F doesn't work, so for a packed source,
+    // shift-left the bits to expand it into an UD instead.
+
+    shl(8)   g30<1>UD  g20<1>UW  16UW                {A@1}; // BF -> F.
+
+    mad(8)   g40<1>BF  g12<1>BF  g20<1>BF  g5<1>F    {A@1};
+    add(8)   g41<1>BF  g40<1>BF  g30<1>F             {A@1};
+
+    shl(8)   g42<1>UD  g41<1>UW  16UW                {A@1}; // BF -> F.
+
+    mov(8)   g43<1>UD  g42<1>F                       {A@1};
+    @write   g3        g43
 
     @eot
   ]]