From e7179232c9d8968f45687c795323ef2b39719a2d Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 14 Aug 2024 16:41:05 -0700 Subject: [PATCH] intel/brw: Move encoding of Gfx11 3-src inside the inst helpers Create specific helper for register file encoding and handle it there. Use ad-hoc structs to let the macro take optional named arguments. Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_disasm.c | 28 ++------- src/intel/compiler/brw_eu_defines.h | 6 -- src/intel/compiler/brw_eu_emit.c | 25 ++------ src/intel/compiler/brw_inst.h | 94 ++++++++++++++++++++--------- 4 files changed, 75 insertions(+), 78 deletions(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index b1bb447f2c3..078b3191d29 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -1224,13 +1224,8 @@ src0_3src(FILE *file, const struct intel_device_info *devinfo, return 0; if (is_align1) { - if (devinfo->ver >= 12 && !brw_inst_3src_a1_src0_is_imm(devinfo, inst)) { - _file = brw_inst_3src_a1_src0_reg_file(devinfo, inst); - } else if (brw_inst_3src_a1_src0_reg_file(devinfo, inst) == - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE) { - _file = BRW_GENERAL_REGISTER_FILE; - } else { - _file = BRW_IMMEDIATE_VALUE; + _file = brw_inst_3src_a1_src0_reg_file(devinfo, inst); + if (_file == BRW_IMMEDIATE_VALUE) { uint16_t imm_val = brw_inst_3src_a1_src0_imm(devinfo, inst); enum brw_reg_type type = brw_inst_3src_a1_src0_type(devinfo, inst); @@ -1308,15 +1303,7 @@ src1_3src(FILE *file, const struct intel_device_info *devinfo, return 0; if (is_align1) { - if (devinfo->ver >= 12) { - _file = brw_inst_3src_a1_src1_reg_file(devinfo, inst); - } else if (brw_inst_3src_a1_src1_reg_file(devinfo, inst) == - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE) { - _file = BRW_GENERAL_REGISTER_FILE; - } else { - _file = BRW_ARCHITECTURE_REGISTER_FILE; - } - + _file = brw_inst_3src_a1_src1_reg_file(devinfo, inst); reg_nr = brw_inst_3src_src1_reg_nr(devinfo, inst); subreg_nr = brw_inst_3src_a1_src1_subreg_nr(devinfo, inst); type = brw_inst_3src_a1_src1_type(devinfo, inst); @@ -1382,13 +1369,8 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo, return 0; if (is_align1) { - if (devinfo->ver >= 12 && !brw_inst_3src_a1_src2_is_imm(devinfo, inst)) { - _file = brw_inst_3src_a1_src2_reg_file(devinfo, inst); - } else if (brw_inst_3src_a1_src2_reg_file(devinfo, inst) == - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE) { - _file = BRW_GENERAL_REGISTER_FILE; - } else { - _file = BRW_IMMEDIATE_VALUE; + _file = brw_inst_3src_a1_src2_reg_file(devinfo, inst); + if (_file == BRW_IMMEDIATE_VALUE) { uint16_t imm_val = brw_inst_3src_a1_src2_imm(devinfo, inst); enum brw_reg_type type = brw_inst_3src_a1_src2_type(devinfo, inst); diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index d0e43e2b0c3..76245f5ae1b 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -738,12 +738,6 @@ enum ENUM_PACKED brw_reg_file { BAD_FILE, }; -enum ENUM_PACKED gfx10_align1_3src_reg_file { - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE = 0, - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE = 1, /* src0, src2 */ - BRW_ALIGN1_3SRC_ACCUMULATOR = 1, /* dest, src1 */ -}; - /* CNL adds Align1 support for 3-src instructions. Bit 35 of the instruction * word is "Execution Datatype" which controls whether the instruction operates * on float or integer types. The register arguments have fields that offer diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 85cd1cf9e11..2b04b5ec661 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -575,15 +575,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR)); - STATIC_ASSERT((BRW_ARCHITECTURE_REGISTER_FILE ^ 1) == BRW_ALIGN1_3SRC_ACCUMULATOR); - STATIC_ASSERT((BRW_GENERAL_REGISTER_FILE ^ 1) == BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); - - /* Gfx10 and Gfx11 bit encoding for the register file is the inversion of - * the actual register file (see the STATIC_ASSERTs above). - */ - unsigned dst_reg_file = devinfo->ver >= 12 ? dest.file : dest.file ^ 1; - - brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dst_reg_file); + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file); brw_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest)); brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8); brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1); @@ -662,18 +654,9 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file); } } else { - brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, - src0.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); - brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, - src1.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_ACCUMULATOR); - brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, - src2.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file); + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file); + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file); } } else { diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 639d42abd2b..5553f91c85b 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -204,36 +204,74 @@ brw_inst_##name(const struct intel_device_info *devinfo, const brw_inst *inst)\ #define FDC(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12, assertions) \ FFDC(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12, assertions) - -/* Macro for the 2-bit register file field, which on Gfx12+ is stored as the - * variable length combination of an IsImm (hi12) bit and an additional file - * (lo12) bit. +/* Macro for storing register file field. See variant FF below for no + * assertions. + * + * In Gfx12+, either a single bit is available (ARF or GRF) or two bits are + * available. In that case the register file is stored as the variable length + * combination of an IsImm (hi12) bit and an additional file (lo12) bit. + * + * For some instructions in Gfx11, the encoding uses 0 for GRF, and 1 for + * either ARF (for accumulator) or IMM. */ -#define FI(name, hi9, lo9, hi12, lo12) \ +#define FFC(name, hi9, lo9, hi12, lo12, assertions, ...) \ static inline void \ brw_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_inst *inst, uint64_t value) \ + brw_inst *inst, enum brw_reg_file file) \ { \ - if (devinfo->ver >= 12) { \ + assert(assertions); \ + const struct { \ + bool _; /* Exists to avoid empty initializer. */ \ + bool grf_or_imm; \ + bool grf_or_acc; \ + } args = { ._ = false, __VA_ARGS__ }; \ + uint64_t value = (unsigned)file; \ + if (devinfo->ver < 12) { \ + if (devinfo->ver == 11 && args.grf_or_imm) { \ + assert(file == BRW_GENERAL_REGISTER_FILE || file == BRW_IMMEDIATE_VALUE); \ + value = file == BRW_GENERAL_REGISTER_FILE ? 0 : 1; \ + } else if (devinfo->ver == 11 && args.grf_or_acc) { \ + assert(file == BRW_GENERAL_REGISTER_FILE || file == BRW_ARCHITECTURE_REGISTER_FILE); \ + value = file == BRW_GENERAL_REGISTER_FILE ? 0 : 1; \ + } \ + brw_inst_set_bits(inst, hi9, lo9, value); \ + } else if (hi12 == lo12) { \ + brw_inst_set_bits(inst, hi12, lo12, value); \ + } else { \ brw_inst_set_bits(inst, hi12, hi12, value >> 1); \ if ((value >> 1) == 0) \ brw_inst_set_bits(inst, lo12, lo12, value & 1); \ - } else { \ - brw_inst_set_bits(inst, hi9, lo9, value); \ } \ } \ static inline uint64_t \ brw_inst_##name(const struct intel_device_info *devinfo, const brw_inst *inst)\ { \ - if (devinfo->ver >= 12) { \ - return (brw_inst_bits(inst, hi12, hi12) << 1) | \ - (brw_inst_bits(inst, hi12, hi12) == 0 ? \ - brw_inst_bits(inst, lo12, lo12) : 1); \ + assert(assertions); \ + const struct { \ + bool _; /* Exists to avoid empty initializer. */ \ + bool grf_or_imm; \ + bool grf_or_acc; \ + } args = { ._ = false, __VA_ARGS__ }; \ + uint64_t value; \ + if (devinfo->ver < 12) { \ + value = brw_inst_bits(inst, hi9, lo9); \ + if (devinfo->ver == 11 && args.grf_or_imm) \ + return value ? BRW_IMMEDIATE_VALUE : BRW_GENERAL_REGISTER_FILE; \ + else if (devinfo->ver == 11 && args.grf_or_acc) \ + return value ? BRW_ARCHITECTURE_REGISTER_FILE \ + : BRW_GENERAL_REGISTER_FILE; \ + } else if (hi12 == lo12) { \ + value = brw_inst_bits(inst, hi12, lo12); \ } else { \ - return brw_inst_bits(inst, hi9, lo9); \ + value = (brw_inst_bits(inst, hi12, hi12) << 1) | \ + (brw_inst_bits(inst, hi12, hi12) == 0 ? \ + brw_inst_bits(inst, lo12, lo12) : 1); \ } \ + return (enum brw_reg_file)value; \ } +#define FF(name, hi9, lo9, hi12, lo12, ...) FFC(name, hi9, lo9, hi12, lo12, true, __VA_ARGS__) + /* Macro for fields that become a constant in Gfx12+ not actually represented * in the instruction. */ @@ -274,7 +312,7 @@ FD20(src1_da1_subreg_nr, /* 9+ */ 100, 96, /* 12+ */ 103, 99, /* 20+ */ 103, 99, F(src1_da16_swiz_y, /* 9+ */ 99, 98, /* 12+ */ -1, -1) F(src1_da16_swiz_x, /* 9+ */ 97, 96, /* 12+ */ -1, -1) F(src1_reg_hw_type, /* 9+ */ 94, 91, /* 12+ */ 91, 88) -FI(src1_reg_file, /* 9+ */ 90, 89, /* 12+ */ 47, 98) +FF(src1_reg_file, /* 9+ */ 90, 89, /* 12+ */ 47, 98) F(src1_is_imm, /* 9+ */ -1, -1, /* 12+ */ 47, 47) FV20(src0_vstride, /* 9+ */ 88, 85, /* 12+ */ 87, 84, /* 20+ */ 86, 84) F(src0_width, /* 9+ */ 84, 82, /* 12+ */ 83, 81) @@ -300,10 +338,10 @@ F(dst_da16_subreg_nr, /* 9+ */ 52, 52, /* 12+ */ -1, -1) FD20(dst_da1_subreg_nr, /* 9+ */ 52, 48, /* 12+ */ 55, 51, /* 20+ */ 55, 51, 33) F(da16_writemask, /* 9+ */ 51, 48, /* 12+ */ -1, -1) /* Dst.ChanEn */ F(src0_reg_hw_type, /* 9+ */ 46, 43, /* 12+ */ 43, 40) -FI(src0_reg_file, /* 9+ */ 42, 41, /* 12+ */ 46, 66) +FF(src0_reg_file, /* 9+ */ 42, 41, /* 12+ */ 46, 66) F(src0_is_imm, /* 9+ */ -1, -1, /* 12+ */ 46, 46) F(dst_reg_hw_type, /* 9+ */ 40, 37, /* 12+ */ 39, 36) -F(dst_reg_file, /* 9+ */ 36, 35, /* 12+ */ 50, 50) +FF(dst_reg_file, /* 9+ */ 36, 35, /* 12+ */ 50, 50) F(mask_control, /* 9+ */ 34, 34, /* 12+ */ 31, 31) F20(flag_reg_nr, /* 9+ */ 33, 33, /* 12+ */ 23, 23, /* 20+ */ 23, 22) F20(flag_subreg_nr, /* 9+ */ 32, 32, /* 12+ */ 22, 22, /* 20+ */ 21, 21) @@ -429,15 +467,15 @@ FC(3src_a1_special_acc, /* 9+ */ 55, 52, /* 12+ */ 54, 51, devinfo->ver /* Reserved 51:50 */ FC(3src_a1_dst_hstride, /* 9+ */ 49, 49, /* 12+ */ 48, 48, devinfo->ver >= 10) FC(3src_a1_dst_hw_type, /* 9+ */ 48, 46, /* 12+ */ 38, 36, devinfo->ver >= 10) -FI(3src_a1_src2_reg_file, /* 9+ */ 45, 45, /* 12+ */ 47, 114) -FC(3src_a1_src1_reg_file, /* 9+ */ 44, 44, /* 12+ */ 98, 98, devinfo->ver >= 10) -FI(3src_a1_src0_reg_file, /* 9+ */ 43, 43, /* 12+ */ 46, 66) +FF(3src_a1_src2_reg_file, /* 9+ */ 45, 45, /* 12+ */ 47, 114, .grf_or_imm = true) +FFC(3src_a1_src1_reg_file, /* 9+ */ 44, 44, /* 12+ */ 98, 98, devinfo->ver >= 10, .grf_or_acc = true) +FF(3src_a1_src0_reg_file, /* 9+ */ 43, 43, /* 12+ */ 46, 66, .grf_or_imm = true) F(3src_a1_src2_is_imm, /* 9+ */ -1, -1, /* 12+ */ 47, 47) F(3src_a1_src0_is_imm, /* 9+ */ -1, -1, /* 12+ */ 46, 46) /* Source Modifier fields same in align16 */ -FC(3src_a1_dst_reg_file, /* 9+ */ 36, 36, /* 12+ */ 50, 50, devinfo->ver >= 10) +FFC(3src_a1_dst_reg_file, /* 9+ */ 36, 36, /* 12+ */ 50, 50, devinfo->ver >= 10, .grf_or_acc = true) FC(3src_a1_exec_type, /* 9+ */ 35, 35, /* 12+ */ 39, 39, devinfo->ver >= 10) /* Fields below this same in align16 */ /** @} */ @@ -531,20 +569,20 @@ brw_inst_set_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo, */ F(dpas_3src_src2_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 127, 120) F(dpas_3src_src2_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 119, 115) -F(dpas_3src_src2_reg_file, /* 9+ */ -1, -1, /* 12+ */ 114, 114) +FF(dpas_3src_src2_reg_file, /* 9+ */ -1, -1, /* 12+ */ 114, 114) F(dpas_3src_src1_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 111, 104) F(dpas_3src_src1_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 103, 99) -F(dpas_3src_src1_reg_file, /* 9+ */ -1, -1, /* 12+ */ 98, 98) +FF(dpas_3src_src1_reg_file, /* 9+ */ -1, -1, /* 12+ */ 98, 98) F(dpas_3src_src1_hw_type, /* 9+ */ -1, -1, /* 12+ */ 90, 88) F(dpas_3src_src1_subbyte, /* 9+ */ -1, -1, /* 12+ */ 87, 86) F(dpas_3src_src2_subbyte, /* 9+ */ -1, -1, /* 12+ */ 85, 84) F(dpas_3src_src2_hw_type, /* 9+ */ -1, -1, /* 12+ */ 82, 80) F(dpas_3src_src0_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 79, 72) F(dpas_3src_src0_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 71, 67) -F(dpas_3src_src0_reg_file, /* 9+ */ -1, -1, /* 12+ */ 66, 66) +FF(dpas_3src_src0_reg_file, /* 9+ */ -1, -1, /* 12+ */ 66, 66) F(dpas_3src_dst_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 63, 56) F(dpas_3src_dst_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 55, 51) -F(dpas_3src_dst_reg_file, /* 9+ */ -1, -1, /* 12+ */ 50, 50) +FF(dpas_3src_dst_reg_file, /* 9+ */ -1, -1, /* 12+ */ 50, 50) F(dpas_3src_sdepth, /* 9+ */ -1, -1, /* 12+ */ 49, 48) F(dpas_3src_rcount, /* 9+ */ -1, -1, /* 12+ */ 45, 43) F(dpas_3src_src0_hw_type, /* 9+ */ -1, -1, /* 12+ */ 42, 40) @@ -631,11 +669,11 @@ F(send_ex_desc_ia_subreg_nr, /* 9+ */ 82, 80, /* 12+ */ 42, 40) F(send_src0_address_mode, /* 9+ */ 79, 79, /* 12+ */ -1, -1) F(send_sel_reg32_desc, /* 9+ */ 77, 77, /* 12+ */ 48, 48) F(send_sel_reg32_ex_desc, /* 9+ */ 61, 61, /* 12+ */ 49, 49) -F(send_src0_reg_file, /* 9+ */ 42, 41, /* 12+ */ 66, 66) +FF(send_src0_reg_file, /* 9+ */ 42, 41, /* 12+ */ 66, 66) F(send_src1_reg_nr, /* 9+ */ 51, 44, /* 12+ */ 111, 104) FC(send_src1_len, /* 9+ */ -1, -1, /* 12+ */ 103, 99, devinfo->verx10 >= 125) -F(send_src1_reg_file, /* 9+ */ 36, 36, /* 12+ */ 98, 98) -F(send_dst_reg_file, /* 9+ */ 35, 35, /* 12+ */ 50, 50) +FF(send_src1_reg_file, /* 9+ */ 36, 36, /* 12+ */ 98, 98) +FF(send_dst_reg_file, /* 9+ */ 35, 35, /* 12+ */ 50, 50) FC(send_ex_bso, /* 9+ */ -1, -1, /* 12+ */ 39, 39, devinfo->verx10 >= 125) /** @} */