brw/emit: Fix align16 3src subregister encodings for HF types

Prior to Cherryview, align16 3src instruction sources had to have their
subregister number be DWord-aligned.  Cherryview added a discontiguous
bit in the encoding to represent bit 1 of the subregister number.  This
allows us to use packed HF sources.

Update the ISA encoding helpers to properly handle bit 1.  While we're
at it, make them take a full subregister number and adjust accordingly,
rather than making the callers divide or multiply by some alignment.

Note that the destination subregister must still be DWord aligned, so
HF destinations must be strided.

Thanks to Ian Romanick for discovering that we were botching this.

BSpec: 12054, 12081

v2 (idr): Fix ordering of high and low bit parameters to brw_inst_bits.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31834>
This commit is contained in:
Kenneth Graunke 2024-09-30 11:49:26 -07:00 committed by Marge Bot
parent 33cd5a49f1
commit d949d47f09
3 changed files with 34 additions and 20 deletions

View file

@ -957,7 +957,7 @@ dest_3src(FILE *file, const struct intel_device_info *devinfo,
subreg_nr = brw_inst_3src_a1_dst_subreg_nr(devinfo, inst);
} else {
type = brw_inst_3src_a16_dst_type(devinfo, inst);
subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst) * 4;
subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst);
}
subreg_nr /= brw_type_size_bytes(type);
@ -1264,7 +1264,7 @@ src0_3src(FILE *file, const struct intel_device_info *devinfo,
} else {
_file = FIXED_GRF;
reg_nr = brw_inst_3src_src0_reg_nr(devinfo, inst);
subreg_nr = brw_inst_3src_a16_src0_subreg_nr(devinfo, inst) * 4;
subreg_nr = brw_inst_3src_a16_src0_subreg_nr(devinfo, inst);
type = brw_inst_3src_a16_src_type(devinfo, inst);
if (brw_inst_3src_a16_src0_rep_ctrl(devinfo, inst)) {
@ -1330,7 +1330,7 @@ src1_3src(FILE *file, const struct intel_device_info *devinfo,
} else {
_file = FIXED_GRF;
reg_nr = brw_inst_3src_src1_reg_nr(devinfo, inst);
subreg_nr = brw_inst_3src_a16_src1_subreg_nr(devinfo, inst) * 4;
subreg_nr = brw_inst_3src_a16_src1_subreg_nr(devinfo, inst);
type = brw_inst_3src_a16_src_type(devinfo, inst);
if (brw_inst_3src_a16_src1_rep_ctrl(devinfo, inst)) {
@ -1413,7 +1413,7 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo,
} else {
_file = FIXED_GRF;
reg_nr = brw_inst_3src_src2_reg_nr(devinfo, inst);
subreg_nr = brw_inst_3src_a16_src2_subreg_nr(devinfo, inst) * 4;
subreg_nr = brw_inst_3src_a16_src2_subreg_nr(devinfo, inst);
type = brw_inst_3src_a16_src_type(devinfo, inst);
if (brw_inst_3src_a16_src2_rep_ctrl(devinfo, inst)) {

View file

@ -498,16 +498,6 @@ brw_alu2(struct brw_codegen *p, unsigned opcode,
return insn;
}
static int
get_3src_subreg_nr(struct brw_reg reg)
{
/* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
* use 32-bit units (components 0..7). Since they only support F/D/UD
* types, this doesn't lose any flexibility, but uses fewer bits.
*/
return reg.subnr / 4;
}
static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info *devinfo,
enum brw_vertical_stride vstride)
@ -672,7 +662,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
assert(src0.file == FIXED_GRF);
brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
@ -681,7 +671,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
assert(src1.file == FIXED_GRF);
brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
@ -690,7 +680,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
assert(src2.file == FIXED_GRF);
brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);

View file

@ -391,15 +391,12 @@ F(hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0)
* @{
*/
F(3src_src2_reg_nr, /* 9+ */ 125, 118, /* 12+ */ 127, 120) /* same in align1 */
F(3src_a16_src2_subreg_nr, /* 9+ */ 117, 115, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
F(3src_a16_src2_swizzle, /* 9+ */ 114, 107, /* 12+ */ -1, -1)
F(3src_a16_src2_rep_ctrl, /* 9+ */ 106, 106, /* 12+ */ -1, -1)
F(3src_src1_reg_nr, /* 9+ */ 104, 97, /* 12+ */ 111, 104) /* same in align1 */
F(3src_a16_src1_subreg_nr, /* 9+ */ 96, 94, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
F(3src_a16_src1_swizzle, /* 9+ */ 93, 86, /* 12+ */ -1, -1)
F(3src_a16_src1_rep_ctrl, /* 9+ */ 85, 85, /* 12+ */ -1, -1)
F(3src_src0_reg_nr, /* 9+ */ 83, 76, /* 12+ */ 79, 72) /* same in align1 */
F(3src_a16_src0_subreg_nr, /* 9+ */ 75, 73, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
F(3src_a16_src0_swizzle, /* 9+ */ 72, 65, /* 12+ */ -1, -1)
F(3src_a16_src0_rep_ctrl, /* 9+ */ 64, 64, /* 12+ */ -1, -1)
F(3src_dst_reg_nr, /* 9+ */ 63, 56, /* 12+ */ 63, 56) /* same in align1 */
@ -438,6 +435,33 @@ F(3src_swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8)
F(3src_hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0)
/** @} */
#define F_3SRC_A16_SUBREG_NR(srcN, src_base) \
static inline void \
brw_inst_set_3src_a16_##srcN##_subreg_nr(const struct \
intel_device_info *devinfo, \
brw_inst *inst, \
unsigned value) \
{ \
assert(devinfo->ver == 9); \
assert((value & ~0b11110) == 0); \
brw_inst_set_bits(inst, src_base + 11, src_base + 9, value >> 2); \
brw_inst_set_bits(inst, src_base + 20, src_base + 20, (value >> 1) & 1); \
} \
static inline unsigned \
brw_inst_3src_a16_##srcN##_subreg_nr(const struct \
intel_device_info *devinfo, \
const brw_inst *inst) \
{ \
assert(devinfo->ver == 9); \
return brw_inst_bits(inst, src_base + 11, src_base + 9) << 2 | \
brw_inst_bits(inst, src_base + 20, src_base + 20) << 1; \
}
F_3SRC_A16_SUBREG_NR(src0, 64)
F_3SRC_A16_SUBREG_NR(src1, 85)
F_3SRC_A16_SUBREG_NR(src2, 106)
#undef F_3SRC_A16_SUBREG_NR
#define REG_TYPE(reg) \
static inline void \
brw_inst_set_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \