From d949d47f092c33eabe1ecc4921171a895c471ce6 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 30 Sep 2024 11:49:26 -0700 Subject: [PATCH] brw/emit: Fix align16 3src subregister encodings for HF types Prior to Cherryview, align16 3src instruction sources had to have their subregister number be DWord-aligned. Cherryview added a discontiguous bit in the encoding to represent bit 1 of the subregister number. This allows us to use packed HF sources. Update the ISA encoding helpers to properly handle bit 1. While we're at it, make them take a full subregister number and adjust accordingly, rather than making the callers divide or multiply by some alignment. Note that the destination subregister must still be DWord aligned, so HF destinations must be strided. Thanks to Ian Romanick for discovering that we were botching this. BSpec: 12054, 12081 v2 (idr): Fix ordering of high and low bit parameters to brw_inst_bits. Reviewed-by: Ian Romanick Tested-by: Ian Romanick Part-of: --- src/intel/compiler/brw_disasm.c | 8 ++++---- src/intel/compiler/brw_eu_emit.c | 16 +++------------- src/intel/compiler/brw_inst.h | 30 +++++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 63a7141c7a1..09f3d1e2b8f 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -957,7 +957,7 @@ dest_3src(FILE *file, const struct intel_device_info *devinfo, subreg_nr = brw_inst_3src_a1_dst_subreg_nr(devinfo, inst); } else { type = brw_inst_3src_a16_dst_type(devinfo, inst); - subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst) * 4; + subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst); } subreg_nr /= brw_type_size_bytes(type); @@ -1264,7 +1264,7 @@ src0_3src(FILE *file, const struct intel_device_info *devinfo, } else { _file = FIXED_GRF; reg_nr = brw_inst_3src_src0_reg_nr(devinfo, inst); - subreg_nr = brw_inst_3src_a16_src0_subreg_nr(devinfo, inst) * 4; + subreg_nr = brw_inst_3src_a16_src0_subreg_nr(devinfo, inst); type = brw_inst_3src_a16_src_type(devinfo, inst); if (brw_inst_3src_a16_src0_rep_ctrl(devinfo, inst)) { @@ -1330,7 +1330,7 @@ src1_3src(FILE *file, const struct intel_device_info *devinfo, } else { _file = FIXED_GRF; reg_nr = brw_inst_3src_src1_reg_nr(devinfo, inst); - subreg_nr = brw_inst_3src_a16_src1_subreg_nr(devinfo, inst) * 4; + subreg_nr = brw_inst_3src_a16_src1_subreg_nr(devinfo, inst); type = brw_inst_3src_a16_src_type(devinfo, inst); if (brw_inst_3src_a16_src1_rep_ctrl(devinfo, inst)) { @@ -1413,7 +1413,7 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo, } else { _file = FIXED_GRF; reg_nr = brw_inst_3src_src2_reg_nr(devinfo, inst); - subreg_nr = brw_inst_3src_a16_src2_subreg_nr(devinfo, inst) * 4; + subreg_nr = brw_inst_3src_a16_src2_subreg_nr(devinfo, inst); type = brw_inst_3src_a16_src_type(devinfo, inst); if (brw_inst_3src_a16_src2_rep_ctrl(devinfo, inst)) { diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index be470ce4a76..12758b71534 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -498,16 +498,6 @@ brw_alu2(struct brw_codegen *p, unsigned opcode, return insn; } -static int -get_3src_subreg_nr(struct brw_reg reg) -{ - /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions - * use 32-bit units (components 0..7). Since they only support F/D/UD - * types, this doesn't lose any flexibility, but uses fewer bits. - */ - return reg.subnr / 4; -} - static enum gfx10_align1_3src_vertical_stride to_3src_align1_vstride(const struct intel_device_info *devinfo, enum brw_vertical_stride vstride) @@ -672,7 +662,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(src0.file == FIXED_GRF); brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); - brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0)); + brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr); brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); @@ -681,7 +671,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(src1.file == FIXED_GRF); brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle); - brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1)); + brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr); brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); @@ -690,7 +680,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(src2.file == FIXED_GRF); brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle); - brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2)); + brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr); brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 758a761aca5..8409f88257e 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -391,15 +391,12 @@ F(hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0) * @{ */ F(3src_src2_reg_nr, /* 9+ */ 125, 118, /* 12+ */ 127, 120) /* same in align1 */ -F(3src_a16_src2_subreg_nr, /* 9+ */ 117, 115, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */ F(3src_a16_src2_swizzle, /* 9+ */ 114, 107, /* 12+ */ -1, -1) F(3src_a16_src2_rep_ctrl, /* 9+ */ 106, 106, /* 12+ */ -1, -1) F(3src_src1_reg_nr, /* 9+ */ 104, 97, /* 12+ */ 111, 104) /* same in align1 */ -F(3src_a16_src1_subreg_nr, /* 9+ */ 96, 94, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */ F(3src_a16_src1_swizzle, /* 9+ */ 93, 86, /* 12+ */ -1, -1) F(3src_a16_src1_rep_ctrl, /* 9+ */ 85, 85, /* 12+ */ -1, -1) F(3src_src0_reg_nr, /* 9+ */ 83, 76, /* 12+ */ 79, 72) /* same in align1 */ -F(3src_a16_src0_subreg_nr, /* 9+ */ 75, 73, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */ F(3src_a16_src0_swizzle, /* 9+ */ 72, 65, /* 12+ */ -1, -1) F(3src_a16_src0_rep_ctrl, /* 9+ */ 64, 64, /* 12+ */ -1, -1) F(3src_dst_reg_nr, /* 9+ */ 63, 56, /* 12+ */ 63, 56) /* same in align1 */ @@ -438,6 +435,33 @@ F(3src_swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8) F(3src_hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0) /** @} */ +#define F_3SRC_A16_SUBREG_NR(srcN, src_base) \ +static inline void \ +brw_inst_set_3src_a16_##srcN##_subreg_nr(const struct \ + intel_device_info *devinfo, \ + brw_inst *inst, \ + unsigned value) \ +{ \ + assert(devinfo->ver == 9); \ + assert((value & ~0b11110) == 0); \ + brw_inst_set_bits(inst, src_base + 11, src_base + 9, value >> 2); \ + brw_inst_set_bits(inst, src_base + 20, src_base + 20, (value >> 1) & 1); \ +} \ +static inline unsigned \ +brw_inst_3src_a16_##srcN##_subreg_nr(const struct \ + intel_device_info *devinfo, \ + const brw_inst *inst) \ +{ \ + assert(devinfo->ver == 9); \ + return brw_inst_bits(inst, src_base + 11, src_base + 9) << 2 | \ + brw_inst_bits(inst, src_base + 20, src_base + 20) << 1; \ +} + +F_3SRC_A16_SUBREG_NR(src0, 64) +F_3SRC_A16_SUBREG_NR(src1, 85) +F_3SRC_A16_SUBREG_NR(src2, 106) +#undef F_3SRC_A16_SUBREG_NR + #define REG_TYPE(reg) \ static inline void \ brw_inst_set_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \