brw: Fix encoding of 3-src dst in Xe2+
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Use FD20 macro that will account for the implicit LSB zero value and is
already used for sources.  For the new macro we need to use the entire
bit-range of the field (55-51), so remove the adjustments we used to
do prior to encoding and decoding.

Fixes assertion in vkpeak (https://github.com/nihui/vkpeak) when running
bf16 tests on BMG.  And the code now will correctly apply the subreg_nr
to the destination, e.g. a mad(32) gets splitted into two pieces, the
generation would not fill out the upper-part of the register

```
 mad(16)         g13<1>BF        g10<8,8,1>BF    g12<8,8,1>BF    g56<1,1,1>F { align1 1H A@5 };
-mad(16)         g13<1>BF        g10.16<8,8,1>BF g12.16<8,8,1>BF g57<1,1,1>F { align1 2H A@5 };
+mad(16)         g13.16<1>BF     g10.16<8,8,1>BF g12.16<8,8,1>BF g57<1,1,1>F { align1 2H A@5 };
```

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37236>
This commit is contained in:
Caio Oliveira 2025-09-07 11:19:40 -07:00 committed by Marge Bot
parent f75e886bf6
commit f65fbb23e2
3 changed files with 3 additions and 3 deletions

View file

@ -604,7 +604,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst,
to_3src_align1_dst_hstride(dest.hstride));

View file

@ -508,7 +508,7 @@ FC(3src_a1_src0_hstride, /* 9+ */ 70, 69, /* 12+ */ 65, 64, devinfo->ver
FDC(3src_a1_src0_vstride, /* 9+ */ 68, 67, /* 12+ */ 43, 43, 35, 35, devinfo->ver >= 10)
FC(3src_a1_src0_hw_type, /* 9+ */ 66, 64, /* 12+ */ 42, 40, devinfo->ver >= 10)
/* dst_reg_nr same in align16 */
FC(3src_a1_dst_subreg_nr, /* 9+ */ 55, 54, /* 12+ */ 55, 54, devinfo->ver >= 10)
FD20(3src_a1_dst_subreg_nr, /* 9+ */ 55, 51, /* 12+ */ 55, 51, /* 20+ */ 55, 51, -1)
FC(3src_a1_special_acc, /* 9+ */ 55, 52, /* 12+ */ 54, 51, devinfo->ver >= 10) /* aliases dst_subreg_nr */
/* Reserved 51:50 */
FC(3src_a1_dst_hstride, /* 9+ */ 49, 49, /* 12+ */ 48, 48, devinfo->ver >= 10)

View file

@ -2731,7 +2731,7 @@ brw_hw_decode_inst(const struct brw_isa_info *isa,
inst->dst.file = brw_eu_inst_3src_a1_dst_reg_file(devinfo, raw);
inst->dst.type = brw_eu_inst_3src_a1_dst_type(devinfo, raw);
inst->dst.nr = brw_eu_inst_3src_dst_reg_nr(devinfo, raw);
inst->dst.subnr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, raw) * 8;
inst->dst.subnr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, raw);
inst->dst.hstride = DST_STRIDE_3SRC(brw_eu_inst_3src_a1_dst_hstride(devinfo, raw));
inst->src[0].file = brw_eu_inst_3src_a1_src0_reg_file(devinfo, raw);