From 24569b80792e8942a7961ee4efea5a54c07494e5 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Fri, 9 Feb 2024 21:06:58 -0800 Subject: [PATCH] intel/elk: Remove DPAS opcode Reviewed-by: Ian Romanick Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/elk/elk_compiler.c | 8 - src/intel/compiler/elk/elk_compiler.h | 9 -- src/intel/compiler/elk/elk_disasm.c | 115 +------------ src/intel/compiler/elk/elk_eu.c | 1 - src/intel/compiler/elk/elk_eu.h | 4 - src/intel/compiler/elk/elk_eu_compact.c | 105 +++--------- src/intel/compiler/elk/elk_eu_emit.c | 64 -------- src/intel/compiler/elk/elk_eu_opcodes.h | 1 - src/intel/compiler/elk/elk_eu_validate.c | 152 +----------------- src/intel/compiler/elk/elk_fs.cpp | 38 ----- src/intel/compiler/elk/elk_fs.h | 2 - src/intel/compiler/elk/elk_fs_builder.h | 21 --- src/intel/compiler/elk/elk_fs_generator.cpp | 19 --- .../compiler/elk/elk_fs_lower_regioning.cpp | 3 +- src/intel/compiler/elk/elk_fs_nir.cpp | 60 ------- src/intel/compiler/elk/elk_gram.y | 2 +- src/intel/compiler/elk/elk_inst.h | 61 ------- src/intel/compiler/elk/elk_ir.h | 10 -- src/intel/compiler/elk/elk_ir_fs.h | 1 - src/intel/compiler/elk/elk_ir_performance.cpp | 32 +--- .../elk/elk_schedule_instructions.cpp | 15 -- src/intel/compiler/elk/elk_shader.cpp | 8 - 22 files changed, 28 insertions(+), 703 deletions(-) diff --git a/src/intel/compiler/elk/elk_compiler.c b/src/intel/compiler/elk/elk_compiler.c index d0e93efe5db..fcd035e6e87 100644 --- a/src/intel/compiler/elk/elk_compiler.c +++ b/src/intel/compiler/elk/elk_compiler.c @@ -52,12 +52,6 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) /* Default to the sampler since that's what we've done since forever */ compiler->indirect_ubos_use_sampler = true; - compiler->lower_dpas = devinfo->verx10 < 125 || - intel_device_info_is_mtl(devinfo) || - (intel_device_info_is_arl(devinfo) && - devinfo->platform != INTEL_PLATFORM_ARL_H) || - debug_get_bool_option("INTEL_LOWER_DPAS", false); - /* There is no vec4 mode on Gfx10+, and we don't use it at all on Gfx8+. */ for (int i = MESA_SHADER_VERTEX; i < MESA_ALL_SHADER_STAGES; i++) { compiler->scalar_stage[i] = devinfo->ver >= 8 || @@ -175,8 +169,6 @@ elk_get_compiler_config_value(const struct elk_compiler *compiler) insert_u64_bit(&config, compiler->precise_trig); bits++; - insert_u64_bit(&config, compiler->lower_dpas); - bits++; uint64_t mask = DEBUG_DISK_CACHE_MASK; bits += util_bitcount64(mask); diff --git a/src/intel/compiler/elk/elk_compiler.h b/src/intel/compiler/elk/elk_compiler.h index fd45dc3a7d8..a3f26a847dc 100644 --- a/src/intel/compiler/elk/elk_compiler.h +++ b/src/intel/compiler/elk/elk_compiler.h @@ -131,14 +131,6 @@ struct elk_compiler { */ bool use_bindless_sampler_offset; - /** - * Should DPAS instructions be lowered? - * - * This will be set for all platforms before Gfx12.5. It may also be set - * platforms that support DPAS for testing purposes. - */ - bool lower_dpas; - /** * Calling the ra_allocate function after each register spill can take * several minutes. This option speeds up shader compilation by spilling @@ -1218,7 +1210,6 @@ struct elk_cs_prog_data { bool uses_num_work_groups; bool uses_inline_data; bool uses_btd_stack_ids; - bool uses_systolic; uint8_t generate_local_id; enum intel_compute_walk_order walk_order; diff --git a/src/intel/compiler/elk/elk_disasm.c b/src/intel/compiler/elk/elk_disasm.c index e6403a473e0..15fdb42f783 100644 --- a/src/intel/compiler/elk/elk_disasm.c +++ b/src/intel/compiler/elk/elk_disasm.c @@ -810,13 +810,6 @@ static const char* const xe2_lsc_cache_store[] = { [XE2_LSC_CACHE_STORE_L1WB_L3WB] = "L1WB_L3WB", }; -static const char* const dpas_systolic_depth[4] = { - [0] = "16", - [1] = "2", - [2] = "4", - [3] = "8" -}; - static int column; static int @@ -1057,27 +1050,6 @@ dest_3src(FILE *file, const struct intel_device_info *devinfo, return 0; } -static int -dest_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const elk_inst *inst) -{ - uint32_t reg_file = elk_inst_dpas_3src_dst_reg_file(devinfo, inst); - - if (reg(file, reg_file, elk_inst_dpas_3src_dst_reg_nr(devinfo, inst)) == -1) - return 0; - - enum elk_reg_type type = elk_inst_dpas_3src_dst_type(devinfo, inst); - unsigned subreg_nr = elk_inst_dpas_3src_dst_subreg_nr(devinfo, inst); - - if (subreg_nr) - format(file, ".%u", subreg_nr); - string(file, "<1>"); - - string(file, elk_reg_type_to_letters(type)); - - return 0; -} - static int src_align1_region(FILE *file, unsigned _vert_stride, unsigned _width, @@ -1552,69 +1524,6 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo, return err; } -static int -src0_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const elk_inst *inst) -{ - uint32_t reg_file = elk_inst_dpas_3src_src0_reg_file(devinfo, inst); - - if (reg(file, reg_file, elk_inst_dpas_3src_src0_reg_nr(devinfo, inst)) == -1) - return 0; - - unsigned subreg_nr = elk_inst_dpas_3src_src0_subreg_nr(devinfo, inst); - enum elk_reg_type type = elk_inst_dpas_3src_src0_type(devinfo, inst); - - if (subreg_nr) - format(file, ".%d", subreg_nr); - src_align1_region(file, 1, 1, 0); - - string(file, elk_reg_type_to_letters(type)); - - return 0; -} - -static int -src1_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const elk_inst *inst) -{ - uint32_t reg_file = elk_inst_dpas_3src_src1_reg_file(devinfo, inst); - - if (reg(file, reg_file, elk_inst_dpas_3src_src1_reg_nr(devinfo, inst)) == -1) - return 0; - - unsigned subreg_nr = elk_inst_dpas_3src_src1_subreg_nr(devinfo, inst); - enum elk_reg_type type = elk_inst_dpas_3src_src1_type(devinfo, inst); - - if (subreg_nr) - format(file, ".%d", subreg_nr); - src_align1_region(file, 1, 1, 0); - - string(file, elk_reg_type_to_letters(type)); - - return 0; -} - -static int -src2_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const elk_inst *inst) -{ - uint32_t reg_file = elk_inst_dpas_3src_src2_reg_file(devinfo, inst); - - if (reg(file, reg_file, elk_inst_dpas_3src_src2_reg_nr(devinfo, inst)) == -1) - return 0; - - unsigned subreg_nr = elk_inst_dpas_3src_src2_subreg_nr(devinfo, inst); - enum elk_reg_type type = elk_inst_dpas_3src_src2_type(devinfo, inst); - - if (subreg_nr) - format(file, ".%d", subreg_nr); - src_align1_region(file, 1, 1, 0); - - string(file, elk_reg_type_to_letters(type)); - - return 0; -} - static int imm(FILE *file, const struct elk_isa_info *isa, enum elk_reg_type type, const elk_inst *inst) @@ -1885,7 +1794,7 @@ swsb(FILE *file, const struct elk_isa_info *isa, const elk_inst *inst) const uint32_t x = elk_inst_swsb(devinfo, inst); const bool is_unordered = opcode == ELK_OPCODE_SEND || opcode == ELK_OPCODE_SENDC || - opcode == ELK_OPCODE_MATH || opcode == ELK_OPCODE_DPAS || + opcode == ELK_OPCODE_MATH || (devinfo->has_64bit_float_via_math_pipe && inst_has_type(isa, inst, ELK_REGISTER_TYPE_DF)); const struct tgl_swsb swsb = tgl_swsb_decode(devinfo, is_unordered, x); @@ -2026,15 +1935,6 @@ elk_disassemble_inst(FILE *file, const struct elk_isa_info *isa, err |= control(file, "function", sync_function, elk_inst_cond_modifier(devinfo, inst), NULL); - } else if (opcode == ELK_OPCODE_DPAS) { - string(file, "."); - - err |= control(file, "systolic depth", dpas_systolic_depth, - elk_inst_dpas_3src_sdepth(devinfo, inst), NULL); - - const unsigned rcount = elk_inst_dpas_3src_rcount(devinfo, inst) + 1; - - format(file, "x%d", rcount); } else if (!is_send(opcode) && (devinfo->ver < 12 || elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE || @@ -2106,19 +2006,6 @@ elk_disassemble_inst(FILE *file, const struct elk_isa_info *isa, } else if (opcode == ELK_OPCODE_JMPI) { pad(file, 16); err |= src1(file, isa, inst); - } else if (opcode == ELK_OPCODE_DPAS) { - pad(file, 16); - err |= dest_dpas_3src(file, devinfo, inst); - - pad(file, 32); - err |= src0_dpas_3src(file, devinfo, inst); - - pad(file, 48); - err |= src1_dpas_3src(file, devinfo, inst); - - pad(file, 64); - err |= src2_dpas_3src(file, devinfo, inst); - } else if (desc && desc->nsrc == 3) { pad(file, 16); err |= dest_3src(file, devinfo, inst); diff --git a/src/intel/compiler/elk/elk_eu.c b/src/intel/compiler/elk/elk_eu.c index 35c86aa6933..028b0263486 100644 --- a/src/intel/compiler/elk/elk_eu.c +++ b/src/intel/compiler/elk/elk_eu.c @@ -744,7 +744,6 @@ static const struct elk_opcode_desc opcode_descs[] = { { ELK_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) }, { ELK_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) }, { ELK_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) }, - { ELK_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) }, { ELK_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) }, { ELK_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) }, { ELK_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) }, diff --git a/src/intel/compiler/elk/elk_eu.h b/src/intel/compiler/elk/elk_eu.h index 85331de72a3..5c86775fe79 100644 --- a/src/intel/compiler/elk/elk_eu.h +++ b/src/intel/compiler/elk/elk_eu.h @@ -1910,10 +1910,6 @@ void elk_CMPN(struct elk_codegen *p, struct elk_reg src0, struct elk_reg src1); -elk_inst *elk_DPAS(struct elk_codegen *p, enum elk_gfx12_systolic_depth sdepth, - unsigned rcount, struct elk_reg dest, struct elk_reg src0, - struct elk_reg src1, struct elk_reg src2); - void elk_untyped_atomic(struct elk_codegen *p, struct elk_reg dst, diff --git a/src/intel/compiler/elk/elk_eu_compact.c b/src/intel/compiler/elk/elk_eu_compact.c index 57b5f6e3bd3..6f0e218f6ac 100644 --- a/src/intel/compiler/elk/elk_eu_compact.c +++ b/src/intel/compiler/elk/elk_eu_compact.c @@ -1095,25 +1095,6 @@ static const uint64_t xe2_3src_control_index_table[16] = { 0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df */ }; -static const uint64_t xe2_3src_dpas_control_index_table[16] = { - 0b0000000000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub Atomic */ - 0b0000000100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :b Atomic */ - 0b0000100000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :ub Atomic */ - 0b0000100100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b Atomic */ - 0b0000000000111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub */ - 0b0000100100111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b */ - 0b0000101101111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf Atomic */ - 0b0000101101111101101001000000000100, /* dpas.8x* (16|M0) grf:f :bf :bf :bf Atomic */ - 0b0000101101111010110101000000000100, /* dpas.8x* (16|M0) grf:bf :f :bf :bf Atomic */ - 0b0000101101111101110101000000000100, /* dpas.8x* (16|M0) grf:bf :bf :bf :bf Atomic */ - 0b0000101101111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf */ - 0b0000001001111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf Atomic */ - 0b0000001001111001101001000000000100, /* dpas.8x* (16|M0) grf:f :hf :hf :hf Atomic */ - 0b0000001001111010100101000000000100, /* dpas.8x* (16|M0) grf:hf :f :hf :hf Atomic */ - 0b0000001001111001100101000000000100, /* dpas.8x* (16|M0) grf:hf :hf :hf :hf Atomic */ - 0b0000001001111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf */ -}; - static const uint32_t gfx12_3src_source_index_table[32] = { 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */ 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */ @@ -1206,28 +1187,6 @@ static const uint32_t xe2_3src_source_index_table[16] = { 0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */ }; -static const uint32_t xe2_3src_dpas_source_index_table[16] = { - 0b100100000000100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b] - * dpas.*x1 grf:[f,bf] grf:bf grf:bf - * dpas.*x1 grf:[f,hf] grf:hf grf:hf - */ - 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */ - 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */ - 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */ - 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */ - 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */ - 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */ - 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */ - 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */ - 0b100100000000100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[ub,b] */ - 0b100100000010100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[u4,s4] */ - 0b100100001000100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[ub,b] */ - 0b100100001010100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[u4,s4] */ - 0b100100010100100000010, /* dpas.*x2 grf:d grf:[u2,s2] grf:[u2,s2] */ - 0b100100000000100001110, /* dpas.*x8 grf:d grf:[ub,b] grf:[ub,b] */ - 0b100100001010100001110, /* dpas.*x8 grf:d grf:[u4,s4] grf:[u4,s4] */ -}; - static const uint32_t gfx12_3src_subreg_table[32] = { 0b00000000000000000000, /* .0 .0 .0 .0 */ 0b00100000000000000000, /* .0 .0 .0 .4 */ @@ -1530,13 +1489,12 @@ set_src1_index(const struct compaction_state *c, elk_compact_inst *dst, static bool set_3src_control_index(const struct intel_device_info *devinfo, - elk_compact_inst *dst, const elk_inst *src, - bool is_dpas) + elk_compact_inst *dst, const elk_inst *src) { assert(devinfo->ver >= 8); if (devinfo->ver >= 20) { - assert(is_dpas || !elk_inst_bits(src, 49, 49)); + assert(!elk_inst_bits(src, 49, 49)); const uint64_t uncompacted = /* 34b/Xe2+ */ (elk_inst_bits(src, 95, 92) << 30) | /* 4b */ @@ -1556,13 +1514,8 @@ set_3src_control_index(const struct intel_device_info *devinfo, (elk_inst_bits(src, 23, 21) << 3) | /* 3b */ (elk_inst_bits(src, 20, 18)); /* 3b */ - /* The bits used to index the tables for 3src and 3src-dpas - * are the same, so just need to pick the right one. - */ - const uint64_t *table = is_dpas ? xe2_3src_dpas_control_index_table : - xe2_3src_control_index_table; - const unsigned size = is_dpas ? ARRAY_SIZE(xe2_3src_dpas_control_index_table) : - ARRAY_SIZE(xe2_3src_control_index_table); + const uint64_t *table = xe2_3src_control_index_table; + const unsigned size = ARRAY_SIZE(xe2_3src_control_index_table); for (unsigned i = 0; i < size; i++) { if (table[i] == uncompacted) { elk_compact_inst_set_3src_control_index(devinfo, dst, i); @@ -1646,8 +1599,7 @@ set_3src_control_index(const struct intel_device_info *devinfo, static bool set_3src_source_index(const struct intel_device_info *devinfo, - elk_compact_inst *dst, const elk_inst *src, - bool is_dpas) + elk_compact_inst *dst, const elk_inst *src) { assert(devinfo->ver >= 8); @@ -1669,17 +1621,12 @@ set_3src_source_index(const struct intel_device_info *devinfo, (elk_inst_bits(src, 43, 43) << 1) | /* 1b */ (elk_inst_bits(src, 35, 35)); /* 1b */ - /* In Xe2, the bits used to index the tables for 3src and 3src-dpas - * are the same, so just need to pick the right one. - */ const uint32_t *three_src_source_index_table = - devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table : - xe2_3src_source_index_table) : + devinfo->ver >= 20 ? xe2_3src_source_index_table : devinfo->verx10 >= 125 ? xehp_3src_source_index_table : gfx12_3src_source_index_table; const uint32_t three_src_source_index_table_len = - devinfo->ver >= 20 ? (is_dpas ? ARRAY_SIZE(xe2_3src_dpas_source_index_table) : - ARRAY_SIZE(xe2_3src_source_index_table)) : + devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_source_index_table) : devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) : ARRAY_SIZE(gfx12_3src_source_index_table); @@ -1785,18 +1732,18 @@ has_unmapped_bits(const struct elk_isa_info *isa, const elk_inst *src) static bool has_3src_unmapped_bits(const struct intel_device_info *devinfo, - const elk_inst *src, bool is_dpas) + const elk_inst *src) { /* Check for three-source instruction bits that don't map to any of the * fields of the compacted instruction. All of them seem to be reserved * bits currently. */ if (devinfo->ver >= 20) { - assert(is_dpas || !elk_inst_bits(src, 49, 49)); + assert(!elk_inst_bits(src, 49, 49)); assert(!elk_inst_bits(src, 33, 33)); assert(!elk_inst_bits(src, 7, 7)); } else if (devinfo->ver >= 12) { - assert(is_dpas || !elk_inst_bits(src, 49, 49)); + assert(!elk_inst_bits(src, 49, 49)); assert(!elk_inst_bits(src, 7, 7)); } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) { assert(!elk_inst_bits(src, 127, 127) && @@ -1823,8 +1770,7 @@ elk_try_compact_3src_instruction(const struct elk_isa_info *isa, const struct intel_device_info *devinfo = isa->devinfo; assert(devinfo->ver >= 8); - bool is_dpas = elk_inst_opcode(isa, src) == ELK_OPCODE_DPAS; - if (has_3src_unmapped_bits(devinfo, src, is_dpas)) + if (has_3src_unmapped_bits(devinfo, src)) return false; #define compact(field) \ @@ -1834,10 +1780,10 @@ elk_try_compact_3src_instruction(const struct elk_isa_info *isa, compact(hw_opcode); - if (!set_3src_control_index(devinfo, dst, src, is_dpas)) + if (!set_3src_control_index(devinfo, dst, src)) return false; - if (!set_3src_source_index(devinfo, dst, src, is_dpas)) + if (!set_3src_source_index(devinfo, dst, src)) return false; if (devinfo->ver >= 12) { @@ -2395,16 +2341,14 @@ set_uncompacted_src1(const struct compaction_state *c, elk_inst *dst, static void set_uncompacted_3src_control_index(const struct compaction_state *c, - elk_inst *dst, elk_compact_inst *src, - bool is_dpas) + elk_inst *dst, elk_compact_inst *src) { const struct intel_device_info *devinfo = c->isa->devinfo; assert(devinfo->ver >= 8); if (devinfo->ver >= 20) { uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src); - uint64_t uncompacted = is_dpas ? xe2_3src_dpas_control_index_table[compacted] : - xe2_3src_control_index_table[compacted]; + uint64_t uncompacted = xe2_3src_control_index_table[compacted]; elk_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf); elk_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7); @@ -2482,8 +2426,7 @@ set_uncompacted_3src_control_index(const struct compaction_state *c, static void set_uncompacted_3src_source_index(const struct intel_device_info *devinfo, - elk_inst *dst, elk_compact_inst *src, - bool is_dpas) + elk_inst *dst, elk_compact_inst *src) { assert(devinfo->ver >= 8); @@ -2491,8 +2434,7 @@ set_uncompacted_3src_source_index(const struct intel_device_info *devinfo, if (devinfo->ver >= 12) { const uint32_t *three_src_source_index_table = - devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table : - xe2_3src_source_index_table) : + devinfo->ver >= 20 ? xe2_3src_source_index_table : devinfo->verx10 >= 125 ? xehp_3src_source_index_table : gfx12_3src_source_index_table; uint32_t uncompacted = three_src_source_index_table[compacted]; @@ -2550,7 +2492,7 @@ set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo, static void elk_uncompact_3src_instruction(const struct compaction_state *c, - elk_inst *dst, elk_compact_inst *src, bool is_dpas) + elk_inst *dst, elk_compact_inst *src) { const struct intel_device_info *devinfo = c->isa->devinfo; assert(devinfo->ver >= 8); @@ -2563,8 +2505,8 @@ elk_uncompact_3src_instruction(const struct compaction_state *c, uncompact(hw_opcode); if (devinfo->ver >= 12) { - set_uncompacted_3src_control_index(c, dst, src, is_dpas); - set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas); + set_uncompacted_3src_control_index(c, dst, src); + set_uncompacted_3src_source_index(devinfo, dst, src); set_uncompacted_3src_subreg_index(devinfo, dst, src); uncompact(debug_control); @@ -2574,8 +2516,8 @@ elk_uncompact_3src_instruction(const struct compaction_state *c, uncompact(src1_reg_nr); uncompact(src2_reg_nr); } else { - set_uncompacted_3src_control_index(c, dst, src, is_dpas); - set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas); + set_uncompacted_3src_control_index(c, dst, src); + set_uncompacted_3src_source_index(devinfo, dst, src); uncompact(dst_reg_nr); uncompact_a16(src0_rep_ctrl); @@ -2607,8 +2549,7 @@ uncompact_instruction(const struct compaction_state *c, elk_inst *dst, const enum elk_opcode opcode = elk_opcode_decode(c->isa, elk_compact_inst_3src_hw_opcode(devinfo, src)); if (elk_is_3src(c->isa, opcode)) { - const bool is_dpas = opcode == ELK_OPCODE_DPAS; - elk_uncompact_3src_instruction(c, dst, src, is_dpas); + elk_uncompact_3src_instruction(c, dst, src); return; } } diff --git a/src/intel/compiler/elk/elk_eu_emit.c b/src/intel/compiler/elk/elk_eu_emit.c index 86f7d08f6b2..c9df9ec175e 100644 --- a/src/intel/compiler/elk/elk_eu_emit.c +++ b/src/intel/compiler/elk/elk_eu_emit.c @@ -975,61 +975,6 @@ elk_alu3(struct elk_codegen *p, unsigned opcode, struct elk_reg dest, return inst; } -static elk_inst * -elk_dpas_three_src(struct elk_codegen *p, enum elk_gfx12_systolic_depth opcode, - unsigned sdepth, unsigned rcount, struct elk_reg dest, - struct elk_reg src0, struct elk_reg src1, struct elk_reg src2) -{ - const struct intel_device_info *devinfo = p->devinfo; - elk_inst *inst = next_insn(p, opcode); - - assert(dest.file == ELK_GENERAL_REGISTER_FILE); - elk_inst_set_dpas_3src_dst_reg_file(devinfo, inst, - ELK_GENERAL_REGISTER_FILE); - elk_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, dest.nr); - elk_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, dest.subnr); - - if (elk_reg_type_is_floating_point(dest.type)) { - elk_inst_set_dpas_3src_exec_type(devinfo, inst, - ELK_ALIGN1_3SRC_EXEC_TYPE_FLOAT); - } else { - elk_inst_set_dpas_3src_exec_type(devinfo, inst, - ELK_ALIGN1_3SRC_EXEC_TYPE_INT); - } - - elk_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth); - elk_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1); - - elk_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type); - elk_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type); - elk_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type); - elk_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type); - - assert(src0.file == ELK_GENERAL_REGISTER_FILE || - (src0.file == ELK_ARCHITECTURE_REGISTER_FILE && - src0.nr == ELK_ARF_NULL)); - - elk_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file); - elk_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, src0.nr); - elk_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, src0.subnr); - - assert(src1.file == ELK_GENERAL_REGISTER_FILE); - - elk_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file); - elk_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, src1.nr); - elk_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, src1.subnr); - elk_inst_set_dpas_3src_src1_subbyte(devinfo, inst, ELK_SUB_BYTE_PRECISION_NONE); - - assert(src2.file == ELK_GENERAL_REGISTER_FILE); - - elk_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file); - elk_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, src2.nr); - elk_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, src2.subnr); - elk_inst_set_dpas_3src_src2_subbyte(devinfo, inst, ELK_SUB_BYTE_PRECISION_NONE); - - return inst; -} - /*********************************************************************** * Convenience routines. */ @@ -1261,15 +1206,6 @@ elk_PLN(struct elk_codegen *p, struct elk_reg dest, return elk_alu2(p, ELK_OPCODE_PLN, dest, src0, src1); } -elk_inst * -elk_DPAS(struct elk_codegen *p, enum elk_gfx12_systolic_depth sdepth, - unsigned rcount, struct elk_reg dest, struct elk_reg src0, - struct elk_reg src1, struct elk_reg src2) -{ - return elk_dpas_three_src(p, ELK_OPCODE_DPAS, sdepth, rcount, dest, src0, - src1, src2); -} - elk_inst * elk_F32TO16(struct elk_codegen *p, struct elk_reg dst, struct elk_reg src) { diff --git a/src/intel/compiler/elk/elk_eu_opcodes.h b/src/intel/compiler/elk/elk_eu_opcodes.h index 0d79a72eb16..4993ddb6cd7 100644 --- a/src/intel/compiler/elk/elk_eu_opcodes.h +++ b/src/intel/compiler/elk/elk_eu_opcodes.h @@ -88,7 +88,6 @@ enum elk_opcode { ELK_OPCODE_DP2, ELK_OPCODE_DP4A, /**< Gfx12+ */ ELK_OPCODE_LINE, - ELK_OPCODE_DPAS, /**< Gfx12.5+ */ ELK_OPCODE_PLN, /**< G45+ */ ELK_OPCODE_MAD, /**< Gfx6+ */ ELK_OPCODE_LRP, /**< Gfx6+ */ diff --git a/src/intel/compiler/elk/elk_eu_validate.c b/src/intel/compiler/elk/elk_eu_validate.c index 76a8e3b96a3..828a2a35615 100644 --- a/src/intel/compiler/elk/elk_eu_validate.c +++ b/src/intel/compiler/elk/elk_eu_validate.c @@ -639,10 +639,7 @@ general_restrictions_based_on_operand_types(const struct elk_isa_info *isa, return error_msg; if (devinfo->ver >= 11) { - /* A register type of B or UB for DPAS actually means 4 bytes packed into - * a D or UD, so it is allowed. - */ - if (num_sources == 3 && elk_inst_opcode(isa, inst) != ELK_OPCODE_DPAS) { + if (num_sources == 3) { ERROR_IF(elk_reg_type_to_size(elk_inst_3src_a1_src1_type(devinfo, inst)) == 1 || elk_reg_type_to_size(elk_inst_3src_a1_src2_type(devinfo, inst)) == 1, "Byte data type is not supported for src1/2 register regioning. This includes " @@ -2416,153 +2413,6 @@ instruction_restrictions(const struct elk_isa_info *isa, } } - if (elk_inst_opcode(isa, inst) == ELK_OPCODE_DPAS) { - ERROR_IF(elk_inst_dpas_3src_sdepth(devinfo, inst) != ELK_SYSTOLIC_DEPTH_8, - "Systolic depth must be 8."); - - const unsigned sdepth = 8; - - const enum elk_reg_type dst_type = - elk_inst_dpas_3src_dst_type(devinfo, inst); - const enum elk_reg_type src0_type = - elk_inst_dpas_3src_src0_type(devinfo, inst); - const enum elk_reg_type src1_type = - elk_inst_dpas_3src_src1_type(devinfo, inst); - const enum elk_reg_type src2_type = - elk_inst_dpas_3src_src2_type(devinfo, inst); - - const enum gfx12_sub_byte_precision src1_sub_byte = - elk_inst_dpas_3src_src1_subbyte(devinfo, inst); - - if (src1_type != ELK_REGISTER_TYPE_B && src1_type != ELK_REGISTER_TYPE_UB) { - ERROR_IF(src1_sub_byte != ELK_SUB_BYTE_PRECISION_NONE, - "Sub-byte precision must be None for source type larger than Byte."); - } else { - ERROR_IF(src1_sub_byte != ELK_SUB_BYTE_PRECISION_NONE && - src1_sub_byte != ELK_SUB_BYTE_PRECISION_4BIT && - src1_sub_byte != ELK_SUB_BYTE_PRECISION_2BIT, - "Invalid sub-byte precision."); - } - - const enum gfx12_sub_byte_precision src2_sub_byte = - elk_inst_dpas_3src_src2_subbyte(devinfo, inst); - - if (src2_type != ELK_REGISTER_TYPE_B && src2_type != ELK_REGISTER_TYPE_UB) { - ERROR_IF(src2_sub_byte != ELK_SUB_BYTE_PRECISION_NONE, - "Sub-byte precision must be None."); - } else { - ERROR_IF(src2_sub_byte != ELK_SUB_BYTE_PRECISION_NONE && - src2_sub_byte != ELK_SUB_BYTE_PRECISION_4BIT && - src2_sub_byte != ELK_SUB_BYTE_PRECISION_2BIT, - "Invalid sub-byte precision."); - } - - const unsigned src1_bits_per_element = - (8 * elk_reg_type_to_size(src1_type)) >> - elk_inst_dpas_3src_src1_subbyte(devinfo, inst); - - const unsigned src2_bits_per_element = - (8 * elk_reg_type_to_size(src2_type)) >> - elk_inst_dpas_3src_src2_subbyte(devinfo, inst); - - /* The MAX2(1, ...) is just to prevent possible division by 0 later. */ - const unsigned ops_per_chan = - MAX2(1, 32 / MAX2(src1_bits_per_element, src2_bits_per_element)); - - ERROR_IF(elk_inst_exec_size(devinfo, inst) != ELK_EXECUTE_8, - "DPAS execution size must be 8."); - - const unsigned exec_size = 8; - - const unsigned dst_subnr = elk_inst_dpas_3src_dst_subreg_nr(devinfo, inst); - const unsigned src0_subnr = elk_inst_dpas_3src_src0_subreg_nr(devinfo, inst); - const unsigned src1_subnr = elk_inst_dpas_3src_src1_subreg_nr(devinfo, inst); - const unsigned src2_subnr = elk_inst_dpas_3src_src2_subreg_nr(devinfo, inst); - - /* Until HF is supported as dst type, this is effectively subnr == 0. */ - ERROR_IF(dst_subnr % exec_size != 0, - "Destination subregister offset must be a multiple of ExecSize."); - - /* Until HF is supported as src0 type, this is effectively subnr == 0. */ - ERROR_IF(src0_subnr % exec_size != 0, - "Src0 subregister offset must be a multiple of ExecSize."); - - ERROR_IF(src1_subnr != 0, - "Src1 subregister offsets must be 0."); - - /* In nearly all cases, this effectively requires that src2.subnr be - * 0. It is only when src1 is 8 bits and src2 is 2 or 4 bits that the - * ops_per_chan value can allow non-zero src2.subnr. - */ - ERROR_IF(src2_subnr % (sdepth * ops_per_chan) != 0, - "Src2 subregister offset must be a multiple of SystolicDepth " - "times OPS_PER_CHAN."); - - ERROR_IF(dst_subnr * type_sz(dst_type) >= REG_SIZE, - "Destination subregister specifies next register."); - - ERROR_IF(src0_subnr * type_sz(src0_type) >= REG_SIZE, - "Src0 subregister specifies next register."); - - ERROR_IF((src1_subnr * type_sz(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE, - "Src1 subregister specifies next register."); - - ERROR_IF((src2_subnr * type_sz(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE, - "Src2 subregister specifies next register."); - - if (elk_inst_3src_atomic_control(devinfo, inst)) { - /* FINISHME: When we start emitting DPAS with Atomic set, figure out - * a way to validate it. Also add a test in test_eu_validate.cpp. - */ - ERROR_IF(true, - "When instruction option Atomic is used it must be follwed by a " - "DPAS instruction."); - } - - if (elk_inst_dpas_3src_exec_type(devinfo, inst) == - ELK_ALIGN1_3SRC_EXEC_TYPE_FLOAT) { - ERROR_IF(dst_type != ELK_REGISTER_TYPE_F, - "DPAS destination type must be F."); - ERROR_IF(src0_type != ELK_REGISTER_TYPE_F, - "DPAS src0 type must be F."); - ERROR_IF(src1_type != ELK_REGISTER_TYPE_HF, - "DPAS src1 type must be HF."); - ERROR_IF(src2_type != ELK_REGISTER_TYPE_HF, - "DPAS src2 type must be HF."); - } else { - ERROR_IF(dst_type != ELK_REGISTER_TYPE_D && - dst_type != ELK_REGISTER_TYPE_UD, - "DPAS destination type must be D or UD."); - ERROR_IF(src0_type != ELK_REGISTER_TYPE_D && - src0_type != ELK_REGISTER_TYPE_UD, - "DPAS src0 type must be D or UD."); - ERROR_IF(src1_type != ELK_REGISTER_TYPE_B && - src1_type != ELK_REGISTER_TYPE_UB, - "DPAS src1 base type must be B or UB."); - ERROR_IF(src2_type != ELK_REGISTER_TYPE_B && - src2_type != ELK_REGISTER_TYPE_UB, - "DPAS src2 base type must be B or UB."); - - if (elk_reg_type_is_unsigned_integer(dst_type)) { - ERROR_IF(!elk_reg_type_is_unsigned_integer(src0_type) || - !elk_reg_type_is_unsigned_integer(src1_type) || - !elk_reg_type_is_unsigned_integer(src2_type), - "If any source datatype is signed, destination datatype " - "must be signed."); - } - } - - /* FINISHME: Additional restrictions mentioned in the Bspec that are not - * yet enforced here: - * - * - General Accumulator registers access is not supported. This is - * currently enforced in elk_dpas_three_src (elk_eu_emit.c). - * - * - Given any combination of datatypes in the sources of a DPAS - * instructions, the boundaries of a register should not be crossed. - */ - } - return error_msg; } diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index 309b356e6b3..968540f46be 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -391,21 +391,6 @@ elk_fs_inst::has_source_and_destination_hazard() const default: return !is_uniform(src[0]); } - case ELK_OPCODE_DPAS: - /* This is overly conservative. The actual hazard is more complicated to - * describe. When the repeat count is N, the single instruction behaves - * like N instructions with a repeat count of one, but the destination - * and source registers are incremented (in somewhat complex ways) for - * each instruction. - * - * This means the source and destination register is actually a range of - * registers. The hazard exists of an earlier iteration would write a - * register that should be read by a later iteration. - * - * There may be some advantage to properly modeling this, but for now, - * be overly conservative. - */ - return rcount > 1; default: /* The SIMD16 compressed instruction * @@ -855,9 +840,6 @@ elk_fs_inst::components_read(unsigned i) const else return 1; - case ELK_OPCODE_DPAS: - unreachable("Do not use components_read() for DPAS."); - default: return 1; } @@ -918,26 +900,6 @@ elk_fs_inst::size_read(int arg) const } break; - case ELK_OPCODE_DPAS: - switch (arg) { - case 0: - if (src[0].type == ELK_REGISTER_TYPE_HF) { - return rcount * REG_SIZE / 2; - } else { - return rcount * REG_SIZE; - } - case 1: - return sdepth * REG_SIZE; - case 2: - /* This is simpler than the formula described in the Bspec, but it - * covers all of the cases that we support on DG2. - */ - return rcount * REG_SIZE; - default: - unreachable("Invalid source number."); - } - break; - case ELK_SHADER_OPCODE_TEX: case ELK_FS_OPCODE_TXB: case ELK_SHADER_OPCODE_TXD: diff --git a/src/intel/compiler/elk/elk_fs.h b/src/intel/compiler/elk/elk_fs.h index 188b53a3c0c..571b3086103 100644 --- a/src/intel/compiler/elk/elk_fs.h +++ b/src/intel/compiler/elk/elk_fs.h @@ -592,8 +592,6 @@ void elk_emit_predicate_on_sample_mask(const elk::fs_builder &bld, elk_fs_inst * int elk_get_subgroup_id_param_index(const intel_device_info *devinfo, const elk_stage_prog_data *prog_data); -bool elk_lower_dpas(elk_fs_visitor &v); - void nir_to_elk(elk_fs_visitor *s); #endif /* ELK_FS_H */ diff --git a/src/intel/compiler/elk/elk_fs_builder.h b/src/intel/compiler/elk/elk_fs_builder.h index a457d6d6a63..ae2108e5926 100644 --- a/src/intel/compiler/elk/elk_fs_builder.h +++ b/src/intel/compiler/elk/elk_fs_builder.h @@ -834,27 +834,6 @@ namespace elk { return inst; } - instruction * - DPAS(const dst_reg &dst, const src_reg &src0, const src_reg &src1, const src_reg &src2, - unsigned sdepth, unsigned rcount) const - { - assert(_dispatch_width == 8); - assert(sdepth == 8); - assert(rcount == 1 || rcount == 2 || rcount == 4 || rcount == 8); - - instruction *inst = emit(ELK_OPCODE_DPAS, dst, src0, src1, src2); - inst->sdepth = sdepth; - inst->rcount = rcount; - - if (dst.type == ELK_REGISTER_TYPE_HF) { - inst->size_written = rcount * REG_SIZE / 2; - } else { - inst->size_written = rcount * REG_SIZE; - } - - return inst; - } - elk_fs_visitor *shader; elk_fs_inst *BREAK() { return emit(ELK_OPCODE_BREAK); } diff --git a/src/intel/compiler/elk/elk_fs_generator.cpp b/src/intel/compiler/elk/elk_fs_generator.cpp index 82dadd7cfd4..67c112775ac 100644 --- a/src/intel/compiler/elk/elk_fs_generator.cpp +++ b/src/intel/compiler/elk/elk_fs_generator.cpp @@ -1599,19 +1599,6 @@ elk_fs_generator::enable_debug(const char *shader_name) this->shader_name = shader_name; } -static elk_gfx12_systolic_depth -translate_systolic_depth(unsigned d) -{ - /* Could also return (ffs(d) - 1) & 3. */ - switch (d) { - case 2: return ELK_SYSTOLIC_DEPTH_2; - case 4: return ELK_SYSTOLIC_DEPTH_4; - case 8: return ELK_SYSTOLIC_DEPTH_8; - case 16: return ELK_SYSTOLIC_DEPTH_16; - default: unreachable("Invalid systolic depth."); - } -} - int elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, struct shader_stats shader_stats, @@ -1820,12 +1807,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, elk_LINE(p, dst, src[0], src[1]); break; - case ELK_OPCODE_DPAS: - assert(devinfo->verx10 >= 125); - elk_DPAS(p, translate_systolic_depth(inst->sdepth), inst->rcount, - dst, src[0], src[1], src[2]); - break; - case ELK_OPCODE_MAD: assert(devinfo->ver >= 6); if (devinfo->ver < 10) diff --git a/src/intel/compiler/elk/elk_fs_lower_regioning.cpp b/src/intel/compiler/elk/elk_fs_lower_regioning.cpp index 5b01ec2c10d..e1431795837 100644 --- a/src/intel/compiler/elk/elk_fs_lower_regioning.cpp +++ b/src/intel/compiler/elk/elk_fs_lower_regioning.cpp @@ -253,8 +253,7 @@ namespace { has_invalid_src_region(const intel_device_info *devinfo, const elk_fs_inst *inst, unsigned i) { - if (is_send(inst) || inst->is_math() || inst->is_control_source(i) || - inst->opcode == ELK_OPCODE_DPAS) { + if (is_send(inst) || inst->is_math() || inst->is_control_source(i)) { return false; } diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp index 4e56903f0a8..f17e57a7d64 100644 --- a/src/intel/compiler/elk/elk_fs_nir.cpp +++ b/src/intel/compiler/elk/elk_fs_nir.cpp @@ -4547,66 +4547,6 @@ fs_nir_emit_cs_intrinsic(nir_to_elk_state &ntb, break; } - case nir_intrinsic_dpas_intel: { - const unsigned sdepth = nir_intrinsic_systolic_depth(instr); - const unsigned rcount = nir_intrinsic_repeat_count(instr); - - const elk_reg_type dest_type = - elk_type_for_nir_type(devinfo, nir_intrinsic_dest_type(instr)); - const elk_reg_type src_type = - elk_type_for_nir_type(devinfo, nir_intrinsic_src_type(instr)); - - dest = retype(dest, dest_type); - elk_fs_reg src2 = retype(get_nir_src(ntb, instr->src[2]), dest_type); - const elk_fs_reg dest_hf = dest; - - fs_builder bld8 = bld.exec_all().group(8, 0); - fs_builder bld16 = bld.exec_all().group(16, 0); - - /* DG2 cannot have the destination or source 0 of DPAS be float16. It is - * still advantageous to support these formats for memory and bandwidth - * savings. - * - * The float16 source must be expanded to float32. - */ - if (devinfo->verx10 == 125 && dest_type == ELK_REGISTER_TYPE_HF && - !s.compiler->lower_dpas) { - dest = bld8.vgrf(ELK_REGISTER_TYPE_F, rcount); - - if (src2.file != ARF) { - const elk_fs_reg src2_hf = src2; - - src2 = bld8.vgrf(ELK_REGISTER_TYPE_F, rcount); - - for (unsigned i = 0; i < 4; i++) { - bld16.MOV(byte_offset(src2, REG_SIZE * i * 2), - byte_offset(src2_hf, REG_SIZE * i)); - } - } else { - src2 = retype(src2, ELK_REGISTER_TYPE_F); - } - } - - bld8.DPAS(dest, - src2, - retype(get_nir_src(ntb, instr->src[1]), src_type), - retype(get_nir_src(ntb, instr->src[0]), src_type), - sdepth, - rcount) - ->saturate = nir_intrinsic_saturate(instr); - - /* Compact the destination to float16 (from float32). */ - if (!dest.equals(dest_hf)) { - for (unsigned i = 0; i < 4; i++) { - bld16.MOV(byte_offset(dest_hf, REG_SIZE * i), - byte_offset(dest, REG_SIZE * i * 2)); - } - } - - cs_prog_data->uses_systolic = true; - break; - } - default: fs_nir_emit_intrinsic(ntb, bld, instr); break; diff --git a/src/intel/compiler/elk/elk_gram.y b/src/intel/compiler/elk/elk_gram.y index 32864d5f55d..b54d148f2a2 100644 --- a/src/intel/compiler/elk/elk_gram.y +++ b/src/intel/compiler/elk/elk_gram.y @@ -400,7 +400,7 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t %token ADD ADD3 ADDC AND ASR AVG %token BFE BFI1 BFI2 BFB BFREV BRC BRD BREAK %token CALL CALLA CASE CBIT CMP CMPN CONT CSEL -%token DIM DO DPAS DPASW DP2 DP3 DP4 DP4A DPH +%token DIM DO DP2 DP3 DP4 DP4A DPH %token ELSE ENDIF F16TO32 F32TO16 FBH FBL FORK FRC %token GOTO %token HALT diff --git a/src/intel/compiler/elk/elk_inst.h b/src/intel/compiler/elk/elk_inst.h index c546d54c691..cd557f8ee3f 100644 --- a/src/intel/compiler/elk/elk_inst.h +++ b/src/intel/compiler/elk/elk_inst.h @@ -616,67 +616,6 @@ elk_inst_set_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo, } /** @} */ -/** - * Three-source systolic instructions: - * @{ - */ -F(dpas_3src_src2_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 127, 120) -F(dpas_3src_src2_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 119, 115) -F(dpas_3src_src2_reg_file, /* 4+ */ -1, -1, /* 12+ */ 114, 114) -F(dpas_3src_src1_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 111, 104) -F(dpas_3src_src1_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 103, 99) -F(dpas_3src_src1_reg_file, /* 4+ */ -1, -1, /* 12+ */ 98, 98) -F(dpas_3src_src1_hw_type, /* 4+ */ -1, -1, /* 12+ */ 90, 88) -F(dpas_3src_src1_subbyte, /* 4+ */ -1, -1, /* 12+ */ 87, 86) -F(dpas_3src_src2_subbyte, /* 4+ */ -1, -1, /* 12+ */ 85, 84) -F(dpas_3src_src2_hw_type, /* 4+ */ -1, -1, /* 12+ */ 82, 80) -F(dpas_3src_src0_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 79, 72) -F(dpas_3src_src0_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 71, 67) -F(dpas_3src_src0_reg_file, /* 4+ */ -1, -1, /* 12+ */ 66, 66) -F(dpas_3src_dst_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 63, 56) -F(dpas_3src_dst_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 55, 51) -F(dpas_3src_dst_reg_file, /* 4+ */ -1, -1, /* 12+ */ 50, 50) -F(dpas_3src_sdepth, /* 4+ */ -1, -1, /* 12+ */ 49, 48) -F(dpas_3src_rcount, /* 4+ */ -1, -1, /* 12+ */ 45, 43) -F(dpas_3src_src0_hw_type, /* 4+ */ -1, -1, /* 12+ */ 42, 40) -F(dpas_3src_exec_type, /* 4+ */ -1, -1, /* 12+ */ 39, 39) -F(dpas_3src_dst_hw_type, /* 4+ */ -1, -1, /* 12+ */ 38, 36) -/** @} */ - -#define REG_TYPE(reg) \ -static inline void \ -elk_inst_set_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \ - elk_inst *inst, enum elk_reg_type type) \ -{ \ - UNUSED enum gfx10_align1_3src_exec_type exec_type = \ - (enum gfx10_align1_3src_exec_type) elk_inst_dpas_3src_exec_type(devinfo,\ - inst); \ - if (elk_reg_type_is_floating_point(type)) { \ - assert(exec_type == ELK_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \ - } else { \ - assert(exec_type == ELK_ALIGN1_3SRC_EXEC_TYPE_INT); \ - } \ - unsigned hw_type = elk_reg_type_to_a1_hw_3src_type(devinfo, type); \ - elk_inst_set_dpas_3src_##reg##_hw_type(devinfo, inst, hw_type); \ -} \ - \ -static inline enum elk_reg_type \ -elk_inst_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \ - const elk_inst *inst) \ -{ \ - enum gfx10_align1_3src_exec_type exec_type = \ - (enum gfx10_align1_3src_exec_type) elk_inst_dpas_3src_exec_type(devinfo,\ - inst); \ - unsigned hw_type = elk_inst_dpas_3src_##reg##_hw_type(devinfo, inst); \ - return elk_a1_hw_3src_type_to_reg_type(devinfo, hw_type, exec_type); \ -} - -REG_TYPE(dst) -REG_TYPE(src0) -REG_TYPE(src1) -REG_TYPE(src2) -#undef REG_TYPE - /** * Flow control instruction bits: * @{ diff --git a/src/intel/compiler/elk/elk_ir.h b/src/intel/compiler/elk/elk_ir.h index 8e4e42dfcb7..22cf9c18c07 100644 --- a/src/intel/compiler/elk/elk_ir.h +++ b/src/intel/compiler/elk/elk_ir.h @@ -199,16 +199,6 @@ struct elk_backend_instruction { */ unsigned flag_subreg:3; - /** - * Systolic depth used by DPAS instruction. - */ - unsigned sdepth:4; - - /** - * Repeat count used by DPAS instruction. - */ - unsigned rcount:4; - /** The number of hardware registers used for a message header. */ uint8_t header_size; }; diff --git a/src/intel/compiler/elk/elk_ir_fs.h b/src/intel/compiler/elk/elk_ir_fs.h index 45917252b93..96e22f9f089 100644 --- a/src/intel/compiler/elk/elk_ir_fs.h +++ b/src/intel/compiler/elk/elk_ir_fs.h @@ -576,7 +576,6 @@ static inline bool is_unordered(const intel_device_info *devinfo, const elk_fs_inst *inst) { return is_send(inst) || (devinfo->ver < 20 && inst->is_math()) || - inst->opcode == ELK_OPCODE_DPAS || (devinfo->has_64bit_float_via_math_pipe && (get_exec_type(inst) == ELK_REGISTER_TYPE_DF || inst->dst.type == ELK_REGISTER_TYPE_DF)); diff --git a/src/intel/compiler/elk/elk_ir_performance.cpp b/src/intel/compiler/elk/elk_ir_performance.cpp index 05b2dfd3744..23f4d181901 100644 --- a/src/intel/compiler/elk/elk_ir_performance.cpp +++ b/src/intel/compiler/elk/elk_ir_performance.cpp @@ -148,8 +148,6 @@ namespace { !elk_reg_type_is_floating_point(tx) && type_sz(tx) == 4 && type_sz(inst->src[0].type) == type_sz(inst->src[1].type)) tx = elk_int_type(8, tx == ELK_REGISTER_TYPE_D); - - rcount = inst->opcode == ELK_OPCODE_DPAS ? inst->rcount : 0; } instruction_info(const struct elk_isa_info *isa, @@ -157,7 +155,7 @@ namespace { isa(isa), devinfo(isa->devinfo), op(inst->opcode), td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)), tx(get_exec_type(inst)), sx(0), ss(0), sc(0), - desc(inst->desc), sfid(inst->sfid), rcount(0) + desc(inst->desc), sfid(inst->sfid) { /* Compute the maximum source size. */ for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) @@ -197,8 +195,6 @@ namespace { uint32_t desc; /** Send message shared function ID. */ uint8_t sfid; - /** Repeat count for DPAS instructions. */ - uint8_t rcount; }; /** @@ -509,32 +505,6 @@ namespace { else abort(); - case ELK_OPCODE_DPAS: { - unsigned ld; - - switch (info.rcount) { - case 1: - ld = 21; - break; - case 2: - ld = 22; - break; - case 8: - default: - ld = 32; - break; - } - - /* DPAS cannot write the accumulator or the flags, so pass UINT_MAX - * for la and lf. - */ - if (devinfo->verx10 >= 125) - return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, - 0, ld, UINT_MAX, UINT_MAX, 0, 0); - else - abort(); - } - case ELK_SHADER_OPCODE_RCP: case ELK_SHADER_OPCODE_RSQ: case ELK_SHADER_OPCODE_SQRT: diff --git a/src/intel/compiler/elk/elk_schedule_instructions.cpp b/src/intel/compiler/elk/elk_schedule_instructions.cpp index 2183d5418cd..175010c257b 100644 --- a/src/intel/compiler/elk/elk_schedule_instructions.cpp +++ b/src/intel/compiler/elk/elk_schedule_instructions.cpp @@ -617,21 +617,6 @@ elk_schedule_node::set_latency_gfx7(const struct elk_isa_info *isa) } break; - case ELK_OPCODE_DPAS: - switch (inst->rcount) { - case 1: - latency = 21; - break; - case 2: - latency = 22; - break; - case 8: - default: - latency = 32; - break; - } - break; - default: /* 2 cycles: * mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q }; diff --git a/src/intel/compiler/elk/elk_shader.cpp b/src/intel/compiler/elk/elk_shader.cpp index 12a64620159..9cd16bccc91 100644 --- a/src/intel/compiler/elk/elk_shader.cpp +++ b/src/intel/compiler/elk/elk_shader.cpp @@ -165,13 +165,6 @@ elk_instruction_name(const struct elk_isa_info *isa, enum elk_opcode op) if (devinfo->ver > 7 && op == ELK_OPCODE_F16TO32) return "f16to32"; - /* DPAS instructions may transiently exist on platforms that do not - * support DPAS. They will eventually be lowered, but in the meantime it - * must be possible to query the instruction name. - */ - if (devinfo->verx10 < 125 && op == ELK_OPCODE_DPAS) - return "dpas"; - assert(elk_opcode_desc(isa, op)->name); return elk_opcode_desc(isa, op)->name; case ELK_FS_OPCODE_FB_WRITE: @@ -944,7 +937,6 @@ elk_backend_instruction::can_do_source_mods() const case ELK_OPCODE_ROR: case ELK_OPCODE_SUBB: case ELK_OPCODE_DP4A: - case ELK_OPCODE_DPAS: case ELK_SHADER_OPCODE_BROADCAST: case ELK_SHADER_OPCODE_CLUSTER_BROADCAST: case ELK_SHADER_OPCODE_MOV_INDIRECT: