mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 19:00:13 +01:00
brw/emit: Fix align16 3src subregister encodings for HF types
Prior to Cherryview, align16 3src instruction sources had to have their subregister number be DWord-aligned. Cherryview added a discontiguous bit in the encoding to represent bit 1 of the subregister number. This allows us to use packed HF sources. Update the ISA encoding helpers to properly handle bit 1. While we're at it, make them take a full subregister number and adjust accordingly, rather than making the callers divide or multiply by some alignment. Note that the destination subregister must still be DWord aligned, so HF destinations must be strided. Thanks to Ian Romanick for discovering that we were botching this. BSpec: 12054, 12081 v2 (idr): Fix ordering of high and low bit parameters to brw_inst_bits. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Tested-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31834>
This commit is contained in:
parent
33cd5a49f1
commit
d949d47f09
3 changed files with 34 additions and 20 deletions
|
|
@ -957,7 +957,7 @@ dest_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
subreg_nr = brw_inst_3src_a1_dst_subreg_nr(devinfo, inst);
|
||||
} else {
|
||||
type = brw_inst_3src_a16_dst_type(devinfo, inst);
|
||||
subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst) * 4;
|
||||
subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst);
|
||||
}
|
||||
subreg_nr /= brw_type_size_bytes(type);
|
||||
|
||||
|
|
@ -1264,7 +1264,7 @@ src0_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
} else {
|
||||
_file = FIXED_GRF;
|
||||
reg_nr = brw_inst_3src_src0_reg_nr(devinfo, inst);
|
||||
subreg_nr = brw_inst_3src_a16_src0_subreg_nr(devinfo, inst) * 4;
|
||||
subreg_nr = brw_inst_3src_a16_src0_subreg_nr(devinfo, inst);
|
||||
type = brw_inst_3src_a16_src_type(devinfo, inst);
|
||||
|
||||
if (brw_inst_3src_a16_src0_rep_ctrl(devinfo, inst)) {
|
||||
|
|
@ -1330,7 +1330,7 @@ src1_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
} else {
|
||||
_file = FIXED_GRF;
|
||||
reg_nr = brw_inst_3src_src1_reg_nr(devinfo, inst);
|
||||
subreg_nr = brw_inst_3src_a16_src1_subreg_nr(devinfo, inst) * 4;
|
||||
subreg_nr = brw_inst_3src_a16_src1_subreg_nr(devinfo, inst);
|
||||
type = brw_inst_3src_a16_src_type(devinfo, inst);
|
||||
|
||||
if (brw_inst_3src_a16_src1_rep_ctrl(devinfo, inst)) {
|
||||
|
|
@ -1413,7 +1413,7 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
} else {
|
||||
_file = FIXED_GRF;
|
||||
reg_nr = brw_inst_3src_src2_reg_nr(devinfo, inst);
|
||||
subreg_nr = brw_inst_3src_a16_src2_subreg_nr(devinfo, inst) * 4;
|
||||
subreg_nr = brw_inst_3src_a16_src2_subreg_nr(devinfo, inst);
|
||||
type = brw_inst_3src_a16_src_type(devinfo, inst);
|
||||
|
||||
if (brw_inst_3src_a16_src2_rep_ctrl(devinfo, inst)) {
|
||||
|
|
|
|||
|
|
@ -498,16 +498,6 @@ brw_alu2(struct brw_codegen *p, unsigned opcode,
|
|||
return insn;
|
||||
}
|
||||
|
||||
static int
|
||||
get_3src_subreg_nr(struct brw_reg reg)
|
||||
{
|
||||
/* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
|
||||
* use 32-bit units (components 0..7). Since they only support F/D/UD
|
||||
* types, this doesn't lose any flexibility, but uses fewer bits.
|
||||
*/
|
||||
return reg.subnr / 4;
|
||||
}
|
||||
|
||||
static enum gfx10_align1_3src_vertical_stride
|
||||
to_3src_align1_vstride(const struct intel_device_info *devinfo,
|
||||
enum brw_vertical_stride vstride)
|
||||
|
|
@ -672,7 +662,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
|||
|
||||
assert(src0.file == FIXED_GRF);
|
||||
brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
|
||||
brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
|
||||
brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
|
||||
brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
|
||||
brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
|
||||
brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
|
||||
|
|
@ -681,7 +671,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
|||
|
||||
assert(src1.file == FIXED_GRF);
|
||||
brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
|
||||
brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
|
||||
brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
|
||||
brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
|
||||
brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
|
||||
brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
|
||||
|
|
@ -690,7 +680,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
|||
|
||||
assert(src2.file == FIXED_GRF);
|
||||
brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
|
||||
brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
|
||||
brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
|
||||
brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
|
||||
brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
|
||||
brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
|
||||
|
|
|
|||
|
|
@ -391,15 +391,12 @@ F(hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0)
|
|||
* @{
|
||||
*/
|
||||
F(3src_src2_reg_nr, /* 9+ */ 125, 118, /* 12+ */ 127, 120) /* same in align1 */
|
||||
F(3src_a16_src2_subreg_nr, /* 9+ */ 117, 115, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
|
||||
F(3src_a16_src2_swizzle, /* 9+ */ 114, 107, /* 12+ */ -1, -1)
|
||||
F(3src_a16_src2_rep_ctrl, /* 9+ */ 106, 106, /* 12+ */ -1, -1)
|
||||
F(3src_src1_reg_nr, /* 9+ */ 104, 97, /* 12+ */ 111, 104) /* same in align1 */
|
||||
F(3src_a16_src1_subreg_nr, /* 9+ */ 96, 94, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
|
||||
F(3src_a16_src1_swizzle, /* 9+ */ 93, 86, /* 12+ */ -1, -1)
|
||||
F(3src_a16_src1_rep_ctrl, /* 9+ */ 85, 85, /* 12+ */ -1, -1)
|
||||
F(3src_src0_reg_nr, /* 9+ */ 83, 76, /* 12+ */ 79, 72) /* same in align1 */
|
||||
F(3src_a16_src0_subreg_nr, /* 9+ */ 75, 73, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
|
||||
F(3src_a16_src0_swizzle, /* 9+ */ 72, 65, /* 12+ */ -1, -1)
|
||||
F(3src_a16_src0_rep_ctrl, /* 9+ */ 64, 64, /* 12+ */ -1, -1)
|
||||
F(3src_dst_reg_nr, /* 9+ */ 63, 56, /* 12+ */ 63, 56) /* same in align1 */
|
||||
|
|
@ -438,6 +435,33 @@ F(3src_swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8)
|
|||
F(3src_hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0)
|
||||
/** @} */
|
||||
|
||||
#define F_3SRC_A16_SUBREG_NR(srcN, src_base) \
|
||||
static inline void \
|
||||
brw_inst_set_3src_a16_##srcN##_subreg_nr(const struct \
|
||||
intel_device_info *devinfo, \
|
||||
brw_inst *inst, \
|
||||
unsigned value) \
|
||||
{ \
|
||||
assert(devinfo->ver == 9); \
|
||||
assert((value & ~0b11110) == 0); \
|
||||
brw_inst_set_bits(inst, src_base + 11, src_base + 9, value >> 2); \
|
||||
brw_inst_set_bits(inst, src_base + 20, src_base + 20, (value >> 1) & 1); \
|
||||
} \
|
||||
static inline unsigned \
|
||||
brw_inst_3src_a16_##srcN##_subreg_nr(const struct \
|
||||
intel_device_info *devinfo, \
|
||||
const brw_inst *inst) \
|
||||
{ \
|
||||
assert(devinfo->ver == 9); \
|
||||
return brw_inst_bits(inst, src_base + 11, src_base + 9) << 2 | \
|
||||
brw_inst_bits(inst, src_base + 20, src_base + 20) << 1; \
|
||||
}
|
||||
|
||||
F_3SRC_A16_SUBREG_NR(src0, 64)
|
||||
F_3SRC_A16_SUBREG_NR(src1, 85)
|
||||
F_3SRC_A16_SUBREG_NR(src2, 106)
|
||||
#undef F_3SRC_A16_SUBREG_NR
|
||||
|
||||
#define REG_TYPE(reg) \
|
||||
static inline void \
|
||||
brw_inst_set_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue