diff --git a/src/intel/compiler/elk/elk_asm.h b/src/intel/compiler/elk/elk_asm.h index 47b9d309dbd..9ad5942797a 100644 --- a/src/intel/compiler/elk/elk_asm.h +++ b/src/intel/compiler/elk/elk_asm.h @@ -64,17 +64,8 @@ struct predicate { unsigned flag_subreg_nr:1; }; -enum instoption_type { - INSTOPTION_FLAG, - INSTOPTION_DEP_INFO, -}; - struct instoption { - enum instoption_type type; - union { - unsigned uint_value; - struct tgl_swsb depinfo_value; - }; + unsigned uint_value; }; struct options { @@ -91,7 +82,6 @@ struct options { unsigned qtr_ctrl:2; unsigned nib_ctrl:1; unsigned is_compr:1; - struct tgl_swsb depinfo; }; struct msgdesc { diff --git a/src/intel/compiler/elk/elk_disasm.c b/src/intel/compiler/elk/elk_disasm.c index b6391379fc3..df8aff2e2c4 100644 --- a/src/intel/compiler/elk/elk_disasm.c +++ b/src/intel/compiler/elk/elk_disasm.c @@ -516,15 +516,6 @@ static const char *const math_function[16] = { [GFX8_MATH_FUNCTION_RSQRTM] = "rsqrtm", }; -static const char *const sync_function[16] = { - [TGL_SYNC_NOP] = "nop", - [TGL_SYNC_ALLRD] = "allrd", - [TGL_SYNC_ALLWR] = "allwr", - [TGL_SYNC_FENCE] = "fence", - [TGL_SYNC_BAR] = "bar", - [TGL_SYNC_HOST] = "host", -}; - static const char *const math_saturate[2] = { [0] = "", [1] = "sat" @@ -1803,11 +1794,6 @@ elk_disassemble_inst(FILE *file, const struct elk_isa_info *isa, err |= control(file, "function", math_function, elk_inst_math_function(devinfo, inst), NULL); - } else if (opcode == ELK_OPCODE_SYNC) { - string(file, " "); - err |= control(file, "function", sync_function, - elk_inst_cond_modifier(devinfo, inst), NULL); - } else if (!is_send(opcode) && (devinfo->ver < 12 || elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE || diff --git a/src/intel/compiler/elk/elk_eu.c b/src/intel/compiler/elk/elk_eu.c index 98e69e74379..3676091f5bb 100644 --- a/src/intel/compiler/elk/elk_eu.c +++ b/src/intel/compiler/elk/elk_eu.c @@ -149,12 +149,6 @@ elk_get_default_access_mode(struct elk_codegen *p) return p->current->access_mode; } -struct tgl_swsb -elk_get_default_swsb(struct elk_codegen *p) -{ - return p->current->swsb; -} - void elk_set_default_exec_size(struct elk_codegen *p, unsigned value) { @@ -301,11 +295,6 @@ void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value) p->current->acc_wr_control = value; } -void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value) -{ - p->current->swsb = value; -} - void elk_push_insn_state( struct elk_codegen *p ) { assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]); @@ -647,7 +636,6 @@ elk_disassemble(const struct elk_isa_info *isa, static const struct elk_opcode_desc opcode_descs[] = { /* IR, HW, name, nsrc, ndst, gfx_vers */ { ELK_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL }, - { ELK_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) }, { ELK_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) }, { ELK_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) }, { ELK_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) }, diff --git a/src/intel/compiler/elk/elk_eu.h b/src/intel/compiler/elk/elk_eu.h index b92cf525222..a19a274e56f 100644 --- a/src/intel/compiler/elk/elk_eu.h +++ b/src/intel/compiler/elk/elk_eu.h @@ -64,9 +64,6 @@ struct elk_insn_state { /* One of ELK_MASK_* */ unsigned mask_control:1; - /* Scheduling info for Gfx12+ */ - struct tgl_swsb swsb; - bool saturate:1; /* One of ELK_ALIGN_* */ @@ -158,7 +155,6 @@ void elk_push_insn_state( struct elk_codegen *p ); unsigned elk_get_default_exec_size(struct elk_codegen *p); unsigned elk_get_default_group(struct elk_codegen *p); unsigned elk_get_default_access_mode(struct elk_codegen *p); -struct tgl_swsb elk_get_default_swsb(struct elk_codegen *p); void elk_set_default_exec_size(struct elk_codegen *p, unsigned value); void elk_set_default_mask_control( struct elk_codegen *p, unsigned value ); void elk_set_default_saturate( struct elk_codegen *p, bool enable ); @@ -174,7 +170,6 @@ void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse); void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg); void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value); -void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value); void elk_init_codegen(const struct elk_isa_info *isa, struct elk_codegen *p, void *mem_ctx); @@ -1879,8 +1874,6 @@ void elk_NOP(struct elk_codegen *p); void elk_WAIT(struct elk_codegen *p); -void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func); - /* Special case: there is never a destination, execution size will be * taken from src0: */ diff --git a/src/intel/compiler/elk/elk_eu_defines.h b/src/intel/compiler/elk/elk_eu_defines.h index 904886af2a8..b5671db07c8 100644 --- a/src/intel/compiler/elk/elk_eu_defines.h +++ b/src/intel/compiler/elk/elk_eu_defines.h @@ -502,285 +502,6 @@ enum ENUM_PACKED elk_width { ELK_WIDTH_16 = 4, }; -/** - * Gfx12+ SWSB SBID synchronization mode. - * - * This is represented as a bitmask including any required SBID token - * synchronization modes, used to synchronize out-of-order instructions. Only - * the strongest mode of the mask will be provided to the hardware in the SWSB - * field of an actual hardware instruction, but virtual instructions may be - * able to take into account multiple of them. - */ -enum tgl_sbid_mode { - TGL_SBID_NULL = 0, - TGL_SBID_SRC = 1, - TGL_SBID_DST = 2, - TGL_SBID_SET = 4 -}; - - -enum gfx12_sub_byte_precision { - ELK_SUB_BYTE_PRECISION_NONE = 0, - - /** 4 bits. Signedness determined by base type */ - ELK_SUB_BYTE_PRECISION_4BIT = 1, - - /** 2 bits. Signedness determined by base type */ - ELK_SUB_BYTE_PRECISION_2BIT = 2, -}; - -enum elk_gfx12_systolic_depth { - ELK_SYSTOLIC_DEPTH_16 = 0, - ELK_SYSTOLIC_DEPTH_2 = 1, - ELK_SYSTOLIC_DEPTH_4 = 2, - ELK_SYSTOLIC_DEPTH_8 = 3, -}; - -#ifdef __cplusplus -/** - * Allow bitwise arithmetic of tgl_sbid_mode enums. - */ -inline tgl_sbid_mode -operator|(tgl_sbid_mode x, tgl_sbid_mode y) -{ - return tgl_sbid_mode(unsigned(x) | unsigned(y)); -} - -inline tgl_sbid_mode -operator&(tgl_sbid_mode x, tgl_sbid_mode y) -{ - return tgl_sbid_mode(unsigned(x) & unsigned(y)); -} - -inline tgl_sbid_mode & -operator|=(tgl_sbid_mode &x, tgl_sbid_mode y) -{ - return x = x | y; -} - -#endif - -/** - * TGL+ SWSB RegDist synchronization pipeline. - * - * On TGL all instructions that use the RegDist synchronization mechanism are - * considered to be executed as a single in-order pipeline, therefore only the - * TGL_PIPE_FLOAT pipeline is applicable. On XeHP+ platforms there are two - * additional asynchronous ALU pipelines (which still execute instructions - * in-order and use the RegDist synchronization mechanism). TGL_PIPE_NONE - * doesn't provide any RegDist pipeline synchronization information and allows - * the hardware to infer the pipeline based on the source types of the - * instruction. TGL_PIPE_ALL can be used when synchronization with all ALU - * pipelines is intended. - */ -enum tgl_pipe { - TGL_PIPE_NONE = 0, - TGL_PIPE_FLOAT, - TGL_PIPE_INT, - TGL_PIPE_LONG, - TGL_PIPE_MATH, - TGL_PIPE_ALL -}; - -/** - * Logical representation of the SWSB scheduling information of a hardware - * instruction. The binary representation is slightly more compact. - */ -struct tgl_swsb { - unsigned regdist : 3; - enum tgl_pipe pipe : 3; - unsigned sbid : 5; - enum tgl_sbid_mode mode : 3; -}; - -/** - * Construct a scheduling annotation with a single RegDist dependency. This - * synchronizes with the completion of the d-th previous in-order instruction. - * The index is one-based, zero causes a no-op tgl_swsb to be constructed. - */ -static inline struct tgl_swsb -tgl_swsb_regdist(unsigned d) -{ - const struct tgl_swsb swsb = { d, d ? TGL_PIPE_ALL : TGL_PIPE_NONE }; - assert(swsb.regdist == d); - return swsb; -} - -/** - * Construct a scheduling annotation that synchronizes with the specified SBID - * token. - */ -static inline struct tgl_swsb -tgl_swsb_sbid(enum tgl_sbid_mode mode, unsigned sbid) -{ - const struct tgl_swsb swsb = { 0, TGL_PIPE_NONE, sbid, mode }; - assert(swsb.sbid == sbid); - return swsb; -} - -/** - * Construct a no-op scheduling annotation. - */ -static inline struct tgl_swsb -tgl_swsb_null(void) -{ - return tgl_swsb_regdist(0); -} - -/** - * Return a scheduling annotation that allocates the same SBID synchronization - * token as \p swsb. In addition it will synchronize against a previous - * in-order instruction if \p regdist is non-zero. - */ -static inline struct tgl_swsb -tgl_swsb_dst_dep(struct tgl_swsb swsb, unsigned regdist) -{ - swsb.regdist = regdist; - swsb.mode = swsb.mode & TGL_SBID_SET; - swsb.pipe = (regdist ? TGL_PIPE_ALL : TGL_PIPE_NONE); - return swsb; -} - -/** - * Return a scheduling annotation that synchronizes against the same SBID and - * RegDist dependencies as \p swsb, but doesn't allocate any SBID token. - */ -static inline struct tgl_swsb -tgl_swsb_src_dep(struct tgl_swsb swsb) -{ - swsb.mode = swsb.mode & (TGL_SBID_SRC | TGL_SBID_DST); - return swsb; -} - -/** - * Convert the provided tgl_swsb to the hardware's binary representation of an - * SWSB annotation. - */ -static inline uint32_t -tgl_swsb_encode(const struct intel_device_info *devinfo, struct tgl_swsb swsb) -{ - if (!swsb.mode) { - const unsigned pipe = devinfo->verx10 < 125 ? 0 : - swsb.pipe == TGL_PIPE_FLOAT ? 0x10 : - swsb.pipe == TGL_PIPE_INT ? 0x18 : - swsb.pipe == TGL_PIPE_LONG ? 0x20 : - swsb.pipe == TGL_PIPE_MATH ? 0x28 : - swsb.pipe == TGL_PIPE_ALL ? 0x8 : 0; - return pipe | swsb.regdist; - - } else if (swsb.regdist) { - if (devinfo->ver >= 20) { - if ((swsb.mode & TGL_SBID_SET)) { - assert(swsb.pipe == TGL_PIPE_ALL || - swsb.pipe == TGL_PIPE_INT || swsb.pipe == TGL_PIPE_FLOAT); - return (swsb.pipe == TGL_PIPE_INT ? 0x300 : - swsb.pipe == TGL_PIPE_FLOAT ? 0x200 : 0x100) | - swsb.regdist << 5 | swsb.sbid; - } else { - assert(!(swsb.mode & ~(TGL_SBID_DST | TGL_SBID_SRC))); - return (swsb.pipe == TGL_PIPE_ALL ? 0x300 : - swsb.mode == TGL_SBID_SRC ? 0x200 : 0x100) | - swsb.regdist << 5 | swsb.sbid; - } - } else { - assert(!(swsb.sbid & ~0xfu)); - return 0x80 | swsb.regdist << 4 | swsb.sbid; - } - - } else { - if (devinfo->ver >= 20) { - return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0xc0 : - swsb.mode & TGL_SBID_DST ? 0x80 : 0xa0); - } else { - assert(!(swsb.sbid & ~0xfu)); - return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0x40 : - swsb.mode & TGL_SBID_DST ? 0x20 : 0x30); - } - } -} - -/** - * Convert the provided binary representation of an SWSB annotation to a - * tgl_swsb. - */ -static inline struct tgl_swsb -tgl_swsb_decode(const struct intel_device_info *devinfo, - const bool is_unordered, const uint32_t x) -{ - if (devinfo->ver >= 20) { - if (x & 0x300) { - if (is_unordered) { - const struct tgl_swsb swsb = { - (x & 0xe0u) >> 5, - ((x & 0x300) == 0x300 ? TGL_PIPE_INT : - (x & 0x300) == 0x200 ? TGL_PIPE_FLOAT : - TGL_PIPE_ALL), - x & 0x1fu, - TGL_SBID_SET - }; - return swsb; - } else { - const struct tgl_swsb swsb = { - (x & 0xe0u) >> 5, - ((x & 0x300) == 0x300 ? TGL_PIPE_ALL : TGL_PIPE_NONE), - x & 0x1fu, - ((x & 0x300) == 0x200 ? TGL_SBID_SRC : TGL_SBID_DST) - }; - return swsb; - } - - } else if ((x & 0xe0) == 0x80) { - return tgl_swsb_sbid(TGL_SBID_DST, x & 0x1f); - } else if ((x & 0xe0) == 0xa0) { - return tgl_swsb_sbid(TGL_SBID_SRC, x & 0x1fu); - } else if ((x & 0xe0) == 0xc0) { - return tgl_swsb_sbid(TGL_SBID_SET, x & 0x1fu); - } else { - const struct tgl_swsb swsb = { x & 0x7u, - ((x & 0x38) == 0x10 ? TGL_PIPE_FLOAT : - (x & 0x38) == 0x18 ? TGL_PIPE_INT : - (x & 0x38) == 0x20 ? TGL_PIPE_LONG : - (x & 0x38) == 0x28 ? TGL_PIPE_MATH : - (x & 0x38) == 0x8 ? TGL_PIPE_ALL : - TGL_PIPE_NONE) }; - return swsb; - } - - } else { - if (x & 0x80) { - const struct tgl_swsb swsb = { (x & 0x70u) >> 4, TGL_PIPE_NONE, - x & 0xfu, - is_unordered ? - TGL_SBID_SET : TGL_SBID_DST }; - return swsb; - } else if ((x & 0x70) == 0x20) { - return tgl_swsb_sbid(TGL_SBID_DST, x & 0xfu); - } else if ((x & 0x70) == 0x30) { - return tgl_swsb_sbid(TGL_SBID_SRC, x & 0xfu); - } else if ((x & 0x70) == 0x40) { - return tgl_swsb_sbid(TGL_SBID_SET, x & 0xfu); - } else { - const struct tgl_swsb swsb = { x & 0x7u, - ((x & 0x78) == 0x10 ? TGL_PIPE_FLOAT : - (x & 0x78) == 0x18 ? TGL_PIPE_INT : - (x & 0x78) == 0x50 ? TGL_PIPE_LONG : - (x & 0x78) == 0x8 ? TGL_PIPE_ALL : - TGL_PIPE_NONE) }; - assert(devinfo->verx10 >= 125 || swsb.pipe == TGL_PIPE_NONE); - return swsb; - } - } -} - -enum tgl_sync_function { - TGL_SYNC_NOP = 0x0, - TGL_SYNC_ALLRD = 0x2, - TGL_SYNC_ALLWR = 0x3, - TGL_SYNC_FENCE = 0xd, - TGL_SYNC_BAR = 0xe, - TGL_SYNC_HOST = 0xf -}; - /** * Message target: Shared Function ID for where to SEND a message. * diff --git a/src/intel/compiler/elk/elk_eu_emit.c b/src/intel/compiler/elk/elk_eu_emit.c index 9590d1f916f..89ef83b7f9d 100644 --- a/src/intel/compiler/elk/elk_eu_emit.c +++ b/src/intel/compiler/elk/elk_eu_emit.c @@ -1127,12 +1127,6 @@ void elk_NOP(struct elk_codegen *p) elk_inst_set_opcode(p->isa, insn, ELK_OPCODE_NOP); } -void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func) -{ - elk_inst *insn = next_insn(p, ELK_OPCODE_SYNC); - elk_inst_set_cond_modifier(p->devinfo, insn, func); -} - /*********************************************************************** * Comparisons, if/else/endif */ @@ -1990,7 +1984,6 @@ void elk_oword_block_write_scratch(struct elk_codegen *p, (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : ELK_SFID_DATAPORT_WRITE); - const struct tgl_swsb swsb = elk_get_default_swsb(p); uint32_t msg_type; if (devinfo->ver >= 6) @@ -2010,13 +2003,11 @@ void elk_oword_block_write_scratch(struct elk_codegen *p, elk_set_default_exec_size(p, ELK_EXECUTE_8); elk_set_default_mask_control(p, ELK_MASK_DISABLE); elk_set_default_compression_control(p, ELK_COMPRESSION_NONE); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); elk_MOV(p, mrf, retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ elk_set_default_exec_size(p, ELK_EXECUTE_1); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, retype(elk_vec1_reg(ELK_MESSAGE_REGISTER_FILE, mrf.nr, @@ -2024,7 +2015,6 @@ void elk_oword_block_write_scratch(struct elk_codegen *p, elk_imm_ud(offset)); elk_pop_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { @@ -2098,7 +2088,6 @@ elk_oword_block_read_scratch(struct elk_codegen *p, unsigned offset) { const struct intel_device_info *devinfo = p->devinfo; - const struct tgl_swsb swsb = elk_get_default_swsb(p); if (devinfo->ver >= 6) offset /= 16; @@ -2125,7 +2114,6 @@ elk_oword_block_read_scratch(struct elk_codegen *p, { elk_push_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); elk_set_default_exec_size(p, ELK_EXECUTE_8); elk_set_default_compression_control(p, ELK_COMPRESSION_NONE); elk_set_default_mask_control(p, ELK_MASK_DISABLE); @@ -2134,11 +2122,9 @@ elk_oword_block_read_scratch(struct elk_codegen *p, /* set message header global offset field (reg 0, element 2) */ elk_set_default_exec_size(p, ELK_EXECUTE_1); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, get_element_ud(mrf, 2), elk_imm_ud(offset)); elk_pop_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { @@ -2215,7 +2201,6 @@ void elk_oword_block_read(struct elk_codegen *p, (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_CONSTANT_CACHE : ELK_SFID_DATAPORT_READ); const unsigned exec_size = 1 << elk_get_default_exec_size(p); - const struct tgl_swsb swsb = elk_get_default_swsb(p); /* On newer hardware, offset is in units of owords. */ if (devinfo->ver >= 6) @@ -2231,12 +2216,10 @@ void elk_oword_block_read(struct elk_codegen *p, elk_push_insn_state(p); elk_set_default_exec_size(p, ELK_EXECUTE_8); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); elk_MOV(p, mrf, retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ elk_set_default_exec_size(p, ELK_EXECUTE_1); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, retype(elk_vec1_reg(ELK_MESSAGE_REGISTER_FILE, mrf.nr, @@ -2244,8 +2227,6 @@ void elk_oword_block_read(struct elk_codegen *p, elk_imm_ud(offset)); elk_pop_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); - elk_inst *insn = next_insn(p, ELK_OPCODE_SEND); elk_inst_set_sfid(devinfo, insn, target_cache); @@ -2444,7 +2425,6 @@ void elk_adjust_sampler_state_pointer(struct elk_codegen *p, elk_push_insn_state(p); elk_AND(p, temp, get_element_ud(sampler_index, 0), elk_imm_ud(0x0f0)); - elk_set_default_swsb(p, tgl_swsb_regdist(1)); elk_SHL(p, temp, temp, elk_imm_ud(4)); elk_ADD(p, get_element_ud(header, 3), @@ -2527,7 +2507,6 @@ elk_send_indirect_message(struct elk_codegen *p, elk_set_src0(p, send, retype(payload, ELK_REGISTER_TYPE_UD)); elk_set_desc(p, send, desc.ud | desc_imm); } else { - const struct tgl_swsb swsb = elk_get_default_swsb(p); struct elk_reg addr = retype(elk_address_reg(0), ELK_REGISTER_TYPE_UD); elk_push_insn_state(p); @@ -2536,7 +2515,6 @@ elk_send_indirect_message(struct elk_codegen *p, elk_set_default_exec_size(p, ELK_EXECUTE_1); elk_set_default_predicate_control(p, ELK_PREDICATE_NONE); elk_set_default_flag_reg(p, 0, 0); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Load the indirect descriptor to an address register using OR so the * caller can specify additional descriptor bits with the desc_imm @@ -2546,7 +2524,6 @@ elk_send_indirect_message(struct elk_codegen *p, elk_pop_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); send = next_insn(p, ELK_OPCODE_SEND); elk_set_src0(p, send, retype(payload, ELK_REGISTER_TYPE_UD)); elk_set_src1(p, send, addr); @@ -2566,7 +2543,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p, unsigned desc_imm) { if (surface.file != ELK_IMMEDIATE_VALUE) { - const struct tgl_swsb swsb = elk_get_default_swsb(p); struct elk_reg addr = retype(elk_address_reg(0), ELK_REGISTER_TYPE_UD); elk_push_insn_state(p); @@ -2575,7 +2551,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p, elk_set_default_exec_size(p, ELK_EXECUTE_1); elk_set_default_predicate_control(p, ELK_PREDICATE_NONE); elk_set_default_flag_reg(p, 0, 0); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Mask out invalid bits from the surface index to avoid hangs e.g. when * some surface array is accessed out of bounds. @@ -2588,7 +2563,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p, elk_pop_insn_state(p); surface = addr; - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } elk_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false); @@ -3161,7 +3135,6 @@ elk_broadcast(struct elk_codegen *p, if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) { elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0), subscript(src, ELK_REGISTER_TYPE_D, 0)); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1), subscript(src, ELK_REGISTER_TYPE_D, 1)); } else { @@ -3205,15 +3178,12 @@ elk_broadcast(struct elk_codegen *p, * register is above this limit. */ if (offset >= limit) { - elk_set_default_swsb(p, tgl_swsb_regdist(1)); elk_ADD(p, addr, addr, elk_imm_ud(offset - offset % limit)); offset = offset % limit; } elk_pop_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - /* Use indirect addressing to fetch the specified component. */ if (type_sz(src.type) > 4 && (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo) || @@ -3233,7 +3203,6 @@ elk_broadcast(struct elk_codegen *p, elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0), retype(elk_vec1_indirect(addr.subnr, offset), ELK_REGISTER_TYPE_D)); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1), retype(elk_vec1_indirect(addr.subnr, offset + 4), ELK_REGISTER_TYPE_D)); @@ -3326,27 +3295,18 @@ elk_float_controls_mode(struct elk_codegen *p, * does not ensure execution pipeline coherency. Software must set the * thread control field to ‘switch’ for an instruction that uses * control register as an explicit operand." - * - * On Gfx12+ this is implemented in terms of SWSB annotations instead. */ - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - elk_inst *inst = elk_AND(p, elk_cr0_reg(0), elk_cr0_reg(0), elk_imm_ud(~mask)); elk_inst_set_exec_size(p->devinfo, inst, ELK_EXECUTE_1); - if (p->devinfo->ver < 12) - elk_inst_set_thread_control(p->devinfo, inst, ELK_THREAD_SWITCH); + elk_inst_set_thread_control(p->devinfo, inst, ELK_THREAD_SWITCH); if (mode) { elk_inst *inst_or = elk_OR(p, elk_cr0_reg(0), elk_cr0_reg(0), elk_imm_ud(mode)); elk_inst_set_exec_size(p->devinfo, inst_or, ELK_EXECUTE_1); - if (p->devinfo->ver < 12) - elk_inst_set_thread_control(p->devinfo, inst_or, ELK_THREAD_SWITCH); + elk_inst_set_thread_control(p->devinfo, inst_or, ELK_THREAD_SWITCH); } - - if (p->devinfo->ver >= 12) - elk_SYNC(p, TGL_SYNC_NOP); } void diff --git a/src/intel/compiler/elk/elk_eu_opcodes.h b/src/intel/compiler/elk/elk_eu_opcodes.h index c1b7b32158f..3f6c2e71ed8 100644 --- a/src/intel/compiler/elk/elk_eu_opcodes.h +++ b/src/intel/compiler/elk/elk_eu_opcodes.h @@ -13,7 +13,6 @@ extern "C" { enum elk_opcode { /* These are the actual hardware instructions. */ ELK_OPCODE_ILLEGAL, - ELK_OPCODE_SYNC, ELK_OPCODE_MOV, ELK_OPCODE_SEL, ELK_OPCODE_MOVI, /**< G45+ */ diff --git a/src/intel/compiler/elk/elk_eu_validate.c b/src/intel/compiler/elk/elk_eu_validate.c index 2c5e654f35d..92192762274 100644 --- a/src/intel/compiler/elk/elk_eu_validate.c +++ b/src/intel/compiler/elk/elk_eu_validate.c @@ -293,7 +293,7 @@ sources_not_null(const struct elk_isa_info *isa, if (num_sources == 3) return (struct string){}; - if (num_sources >= 1 && elk_inst_opcode(isa, inst) != ELK_OPCODE_SYNC) + if (num_sources >= 1) ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); if (num_sources == 2) diff --git a/src/intel/compiler/elk/elk_fs_generator.cpp b/src/intel/compiler/elk/elk_fs_generator.cpp index b9b78d37ee8..000466de4bf 100644 --- a/src/intel/compiler/elk/elk_fs_generator.cpp +++ b/src/intel/compiler/elk/elk_fs_generator.cpp @@ -480,7 +480,6 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst, if (type_sz(reg.type) > 4 && !devinfo->has_64bit_float) { elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0), subscript(reg, ELK_REGISTER_TYPE_D, 0)); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1), subscript(reg, ELK_REGISTER_TYPE_D, 1)); } else { @@ -544,16 +543,11 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst, insn = elk_MOV(p, addr, elk_imm_uw(imm_byte_offset)); elk_inst_set_mask_control(devinfo, insn, ELK_MASK_DISABLE); elk_inst_set_pred_control(devinfo, insn, ELK_PREDICATE_NONE); - if (devinfo->ver >= 12) - elk_set_default_swsb(p, tgl_swsb_null()); - else - elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); + elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); } insn = elk_ADD(p, addr, indirect_byte_offset, elk_imm_uw(imm_byte_offset)); - if (devinfo->ver >= 12) - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - else if (devinfo->ver >= 7) + if (devinfo->ver >= 7) elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); if (type_sz(reg.type) > 4 && @@ -577,7 +571,6 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst, */ elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0), retype(elk_VxH_indirect(0, 0), ELK_REGISTER_TYPE_D)); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1), retype(elk_VxH_indirect(0, 4), ELK_REGISTER_TYPE_D)); } else { @@ -708,28 +701,20 @@ elk_fs_generator::generate_shuffle(elk_fs_inst *inst, insn = elk_MOV(p, addr, elk_imm_uw(src_start_offset)); elk_inst_set_mask_control(devinfo, insn, ELK_MASK_DISABLE); elk_inst_set_pred_control(devinfo, insn, ELK_PREDICATE_NONE); - if (devinfo->ver >= 12) - elk_set_default_swsb(p, tgl_swsb_null()); - else - elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); + elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); insn = elk_SHL(p, addr, group_idx, elk_imm_uw(util_logbase2(type_sz(src.type)) + src.hstride - 1)); - if (devinfo->ver >= 12) - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - else - elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); + elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); /* Add on the register start offset */ elk_ADD(p, addr, addr, elk_imm_uw(src_start_offset)); elk_MOV(p, suboffset(dst, group << (dst.hstride - 1)), retype(elk_VxH_indirect(0, 0), src.type)); } - - elk_set_default_swsb(p, tgl_swsb_null()); } } @@ -790,12 +775,8 @@ elk_fs_generator::generate_quad_swizzle(const elk_fs_inst *inst, 4 * inst->dst.stride, 1, 4 * inst->dst.stride), stride(suboffset(src, ELK_GET_SWZ(swiz, c)), 4, 1, 0)); - if (devinfo->ver < 12) { - elk_inst_set_no_dd_clear(devinfo, insn, c < 3); - elk_inst_set_no_dd_check(devinfo, insn, c > 0); - } - - elk_set_default_swsb(p, tgl_swsb_null()); + elk_inst_set_no_dd_clear(devinfo, insn, c < 3); + elk_inst_set_no_dd_check(devinfo, insn, c > 0); } break; @@ -847,12 +828,7 @@ void elk_fs_generator::generate_barrier(elk_fs_inst *, struct elk_reg src) { elk_barrier(p, src); - if (devinfo->ver >= 12) { - elk_set_default_swsb(p, tgl_swsb_null()); - elk_SYNC(p, TGL_SYNC_BAR); - } else { - elk_WAIT(p); - } + elk_WAIT(p); } bool @@ -1144,18 +1120,15 @@ elk_fs_generator::generate_tex(elk_fs_inst *inst, struct elk_reg dst, /* Set up an implied move from g0 to the MRF. */ src = retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UW); } else { - const tgl_swsb swsb = elk_get_default_swsb(p); assert(inst->base_mrf != -1); struct elk_reg header_reg = elk_message_reg(inst->base_mrf); elk_push_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); elk_set_default_exec_size(p, ELK_EXECUTE_8); elk_set_default_mask_control(p, ELK_MASK_DISABLE); elk_set_default_compression_control(p, ELK_COMPRESSION_NONE); /* Explicitly set up the message header by copying g0 to the MRF. */ elk_MOV(p, header_reg, elk_vec8_grf(0, 0)); - elk_set_default_swsb(p, tgl_swsb_regdist(1)); elk_set_default_exec_size(p, ELK_EXECUTE_1); if (inst->offset) { @@ -1165,7 +1138,6 @@ elk_fs_generator::generate_tex(elk_fs_inst *inst, struct elk_reg dst, } elk_pop_insn_state(p); - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } } @@ -1302,7 +1274,6 @@ elk_fs_generator::generate_ddy(const elk_fs_inst *inst, elk_ADD(p, byte_offset(dst, g * type_size), negate(byte_offset(src, g * type_size)), byte_offset(src, (g + 2) * type_size)); - elk_set_default_swsb(p, tgl_swsb_null()); } elk_pop_insn_state(p); } else { @@ -1365,7 +1336,6 @@ elk_fs_generator::generate_scratch_write(elk_fs_inst *inst, struct elk_reg src) const unsigned lower_size = inst->force_writemask_all ? inst->exec_size : MIN2(16, inst->exec_size); const unsigned block_size = 4 * lower_size / REG_SIZE; - const tgl_swsb swsb = elk_get_default_swsb(p); assert(inst->mlen != 0); elk_push_insn_state(p); @@ -1375,17 +1345,9 @@ elk_fs_generator::generate_scratch_write(elk_fs_inst *inst, struct elk_reg src) for (unsigned i = 0; i < inst->exec_size / lower_size; i++) { elk_set_default_group(p, inst->group + lower_size * i); - if (i > 0) { - assert(swsb.mode & TGL_SBID_SET); - elk_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_SRC, swsb.sbid)); - } else { - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - } - elk_MOV(p, elk_uvec_mrf(lower_size, inst->base_mrf + 1, 0), retype(offset(src, block_size * i), ELK_REGISTER_TYPE_UD)); - elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); elk_oword_block_write_scratch(p, elk_message_reg(inst->base_mrf), block_size, inst->offset + block_size * REG_SIZE * i); @@ -1459,10 +1421,7 @@ elk_fs_generator::generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst) dst.type = ELK_REGISTER_TYPE_UD; elk_inst *insn = elk_MOV(p, dst, elk_imm_ud(0)); - if (devinfo->ver >= 12) - elk_set_default_swsb(p, tgl_swsb_null()); - else - elk_inst_set_no_dd_clear(p->devinfo, insn, true); + elk_inst_set_no_dd_clear(p->devinfo, insn, true); /* Copy the per-thread scratch space size from g0.3[3:0] */ elk_set_default_exec_size(p, ELK_EXECUTE_1); @@ -1588,7 +1547,6 @@ elk_fs_generator::generate_set_sample_id(elk_fs_inst *inst, elk_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1); elk_inst_set_group(devinfo, insn, inst->group + lower_size * i); elk_inst_set_compression(devinfo, insn, lower_size > 8); - elk_set_default_swsb(p, tgl_swsb_null()); } } @@ -1625,7 +1583,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, struct elk_reg src[4], dst; unsigned int last_insn_offset = p->next_insn_offset; bool multiple_instructions_emitted = false; - tgl_swsb swsb = inst->sched; /* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the * "Register Region Restrictions" section: for BDW, SKL: @@ -1663,10 +1620,8 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, elk_set_default_mask_control(p, ELK_MASK_DISABLE); elk_set_default_predicate_control(p, ELK_PREDICATE_NONE); elk_set_default_flag_reg(p, 0, 0); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); elk_MOV(p, elk_acc_reg(8), elk_imm_f(0.0f)); last_insn_offset = p->next_insn_offset; - swsb = tgl_swsb_dst_dep(swsb, 1); } if (!is_accum_used && !inst->eot) { @@ -1674,24 +1629,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, inst->dst.is_accumulator(); } - /* Wa_14013672992: - * - * Always use @1 SWSB for EOT. - */ - if (inst->eot && intel_needs_workaround(devinfo, 14013672992)) { - if (tgl_swsb_src_dep(swsb).mode) { - elk_set_default_exec_size(p, ELK_EXECUTE_1); - elk_set_default_mask_control(p, ELK_MASK_DISABLE); - elk_set_default_predicate_control(p, ELK_PREDICATE_NONE); - elk_set_default_flag_reg(p, 0, 0); - elk_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - elk_SYNC(p, TGL_SYNC_NOP); - last_insn_offset = p->next_insn_offset; - } - - swsb = tgl_swsb_dst_dep(swsb, 1); - } - if (unlikely(debug_flag)) elk_disasm_annotate(elk_disasm_info, inst, p->next_insn_offset); @@ -1759,7 +1696,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, } else { elk_set_default_acc_write_control(p, inst->writes_accumulator); } - elk_set_default_swsb(p, swsb); unsigned exec_size = inst->exec_size; if (devinfo->verx10 == 70 && @@ -1775,13 +1711,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, assert(inst->mlen <= ELK_MAX_MSG_LENGTH * reg_unit(devinfo)); switch (inst->opcode) { - case ELK_OPCODE_SYNC: - assert(src[0].file == ELK_IMMEDIATE_VALUE); - elk_SYNC(p, tgl_sync_function(src[0].ud)); - - if (tgl_sync_function(src[0].ud) == TGL_SYNC_NOP) - ++sync_nop_count; - break; case ELK_OPCODE_MOV: elk_MOV(p, dst, src[0]); break; @@ -2148,33 +2077,23 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, } case ELK_FS_OPCODE_SCHEDULING_FENCE: - if (inst->sources == 0 && swsb.regdist == 0 && - swsb.mode == TGL_SBID_NULL) { + if (inst->sources == 0) { if (unlikely(debug_flag)) elk_disasm_info->use_tail = true; break; } - if (devinfo->ver >= 12) { - /* Use the available SWSB information to stall. A single SYNC is - * sufficient since if there were multiple dependencies, the - * scoreboard algorithm already injected other SYNCs before this - * instruction. + for (unsigned i = 0; i < inst->sources; i++) { + /* Emit a MOV to force a stall until the instruction producing the + * registers finishes. */ - elk_SYNC(p, TGL_SYNC_NOP); - } else { - for (unsigned i = 0; i < inst->sources; i++) { - /* Emit a MOV to force a stall until the instruction producing the - * registers finishes. - */ - elk_MOV(p, retype(elk_null_reg(), ELK_REGISTER_TYPE_UW), - retype(src[i], ELK_REGISTER_TYPE_UW)); - } - - if (inst->sources > 1) - multiple_instructions_emitted = true; + elk_MOV(p, retype(elk_null_reg(), ELK_REGISTER_TYPE_UW), + retype(src[i], ELK_REGISTER_TYPE_UW)); } + if (inst->sources > 1) + multiple_instructions_emitted = true; + break; case ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL: @@ -2209,7 +2128,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, elk_set_default_mask_control(p, ELK_MASK_DISABLE); elk_MOV(p, dst, src[1]); elk_set_default_mask_control(p, ELK_MASK_ENABLE); - elk_set_default_swsb(p, tgl_swsb_null()); elk_MOV(p, dst, src[0]); break; @@ -2298,21 +2216,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, break; case ELK_SHADER_OPCODE_READ_SR_REG: - if (devinfo->ver >= 12) { - /* There is a SWSB restriction that requires that any time sr0 is - * accessed both the instruction doing the access and the next one - * have SWSB set to RegDist(1). - */ - if (elk_get_default_swsb(p).mode != TGL_SBID_NULL) - elk_SYNC(p, TGL_SYNC_NOP); - assert(src[0].file == ELK_IMMEDIATE_VALUE); - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - elk_MOV(p, dst, elk_sr0_reg(src[0].ud)); - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - elk_AND(p, dst, dst, elk_imm_ud(0xffffffff)); - } else { - elk_MOV(p, dst, elk_sr0_reg(src[0].ud)); - } + elk_MOV(p, dst, elk_sr0_reg(src[0].ud)); break; default: @@ -2339,14 +2243,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, elk_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check); } } - - /* When enabled, insert sync NOP after every instruction and make sure - * that current instruction depends on the previous instruction. - */ - if (INTEL_DEBUG(DEBUG_SWSB_STALL) && devinfo->ver >= 12) { - elk_set_default_swsb(p, tgl_swsb_regdist(1)); - elk_SYNC(p, TGL_SYNC_NOP); - } } elk_set_uip_jip(p, start_offset); diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp index b2e2e604b03..e61b048acc8 100644 --- a/src/intel/compiler/elk/elk_fs_nir.cpp +++ b/src/intel/compiler/elk/elk_fs_nir.cpp @@ -5244,50 +5244,6 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb, const fs_builder ubld = bld.group(8, 0); - /* A memory barrier with acquire semantics requires us to - * guarantee that memory operations of the specified storage - * class sequenced-after the barrier aren't reordered before the - * barrier, nor before any previous atomic operation - * sequenced-before the barrier which may be synchronizing this - * acquire barrier with a prior release sequence. - * - * In order to guarantee the latter we must make sure that any - * such previous operation has completed execution before - * invalidating the relevant caches, since otherwise some cache - * could be polluted by a concurrent thread after its - * invalidation but before the previous atomic completes, which - * could lead to a violation of the expected memory ordering if - * a subsequent memory read hits the polluted cacheline, which - * would return a stale value read from memory before the - * completion of the atomic sequenced-before the barrier. - * - * This ordering inversion can be avoided trivially if the - * operations we need to order are all handled by a single - * in-order cache, since the flush implied by the memory fence - * occurs after any pending operations have completed, however - * that doesn't help us when dealing with multiple caches - * processing requests out of order, in which case we need to - * explicitly stall the EU until any pending memory operations - * have executed. - * - * Note that that might be somewhat heavy handed in some cases. - * In particular when this memory fence was inserted by - * spirv_to_nir() lowering an atomic with acquire semantics into - * an atomic+barrier sequence we could do a better job by - * synchronizing with respect to that one atomic *only*, but - * that would require additional information not currently - * available to the backend. - * - * XXX - Use an alternative workaround on IVB and ICL, since - * SYNC.ALLWR is only available on Gfx12+. - */ - if (devinfo->ver >= 12 && - (!nir_intrinsic_has_memory_scope(instr) || - (nir_intrinsic_memory_semantics(instr) & NIR_MEMORY_ACQUIRE))) { - ubld.exec_all().group(1, 0).emit( - ELK_OPCODE_SYNC, ubld.null_reg_ud(), elk_imm_ud(TGL_SYNC_ALLWR)); - } - if (devinfo->has_lsc) { assert(devinfo->verx10 >= 125); uint32_t desc = @@ -5308,16 +5264,6 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb, if (slm_fence) { assert(opcode == ELK_SHADER_OPCODE_MEMORY_FENCE); - if (intel_needs_workaround(devinfo, 14014063774)) { - /* Wa_14014063774 - * - * Before SLM fence compiler needs to insert SYNC.ALLWR in order - * to avoid the SLM data race. - */ - ubld.exec_all().group(1, 0).emit( - ELK_OPCODE_SYNC, ubld.null_reg_ud(), - elk_imm_ud(TGL_SYNC_ALLWR)); - } fence_regs[fence_regs_count++] = emit_fence(ubld, opcode, GFX12_SFID_SLM, desc, true /* commit_enable */, diff --git a/src/intel/compiler/elk/elk_gram.y b/src/intel/compiler/elk/elk_gram.y index 573a3b6607d..50b5c701132 100644 --- a/src/intel/compiler/elk/elk_gram.y +++ b/src/intel/compiler/elk/elk_gram.y @@ -349,7 +349,6 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t struct options options; struct instoption instoption; struct msgdesc msgdesc; - struct tgl_swsb depinfo; elk_inst *instruction; } @@ -395,7 +394,7 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t %token OR %token PLN POP PUSH %token RET RNDD RNDE RNDU RNDZ -%token SAD2 SADA2 SEL SHL SHR SMOV SUBB SYNC +%token SAD2 SADA2 SEL SHL SHR SMOV SUBB %token SEND SENDC %token WAIT WHILE %token XOR @@ -404,11 +403,6 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t %token COS EXP FDIV INV INVM INTDIV INTDIVMOD INTMOD LOG POW RSQ %token RSQRTM SIN SINCOS SQRT -/* sync instruction */ -%token ALLRD ALLWR FENCE BAR HOST -%type sync_function -%type sync_arg - /* shared functions for send */ %token CONST CRE DATA DP_DATA_1 GATEWAY MATH PIXEL_INTERP READ RENDER SAMPLER %token THREAD_SPAWNER URB VME WRITE DP_SAMPLER RT_ACCEL SLM TGM UGM @@ -544,33 +538,11 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t %type jumplabeltarget %type jumplabel -/* SWSB */ -%token REG_DIST_CURRENT -%token REG_DIST_FLOAT -%token REG_DIST_INT -%token REG_DIST_LONG -%token REG_DIST_ALL -%token SBID_ALLOC -%token SBID_WAIT_SRC -%token SBID_WAIT_DST - -%type depinfo - %code { static void add_instruction_option(struct options *options, struct instoption opt) { - if (opt.type == INSTOPTION_DEP_INFO) { - if (opt.depinfo_value.regdist) { - options->depinfo.regdist = opt.depinfo_value.regdist; - options->depinfo.pipe = opt.depinfo_value.pipe; - } else { - options->depinfo.sbid = opt.depinfo_value.sbid; - options->depinfo.mode = opt.depinfo_value.mode; - } - return; - } switch (opt.uint_value) { case ALIGN1: options->access_mode = ELK_ALIGN_1; @@ -687,7 +659,6 @@ instruction: | ternaryinstruction | sendinstruction | illegalinstruction - | syncinstruction ; relocatableinstruction: @@ -1443,54 +1414,6 @@ loopinstruction: } ; -/* sync instruction */ -syncinstruction: - predicate SYNC sync_function execsize sync_arg instoptions - { - if (p->devinfo->ver < 12) { - error(&@2, "sync instruction is supported only on gfx12+\n"); - } - - if ($5.file == ELK_IMMEDIATE_VALUE && - $3 != TGL_SYNC_ALLRD && - $3 != TGL_SYNC_ALLWR) { - error(&@2, "Only allrd and allwr support immediate argument\n"); - } - - elk_set_default_access_mode(p, $6.access_mode); - elk_SYNC(p, $3); - i965_asm_set_instruction_options(p, $6); - elk_inst_set_exec_size(p->devinfo, elk_last_inst, $4); - elk_set_src0(p, elk_last_inst, $5); - elk_inst_set_eot(p->devinfo, elk_last_inst, $6.end_of_thread); - elk_inst_set_qtr_control(p->devinfo, elk_last_inst, $6.qtr_ctrl); - elk_inst_set_nib_control(p->devinfo, elk_last_inst, $6.nib_ctrl); - - elk_pop_insn_state(p); - } - ; - -sync_function: - NOP { $$ = TGL_SYNC_NOP; } - | ALLRD - | ALLWR - | FENCE - | BAR - | HOST - ; - -sync_arg: - nullreg region reg_type - { - $$ = $1; - $$.vstride = $2.vstride; - $$.width = $2.width; - $$.hstride = $2.hstride; - $$.type = $3; - } - | immreg - ; - /* Relative location */ relativelocation2: immreg @@ -2367,84 +2290,33 @@ instoption_list: } ; -depinfo: - REG_DIST_CURRENT - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = TGL_PIPE_NONE; - } - | REG_DIST_FLOAT - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = TGL_PIPE_FLOAT; - } - | REG_DIST_INT - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = TGL_PIPE_INT; - } - | REG_DIST_LONG - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = TGL_PIPE_LONG; - } - | REG_DIST_ALL - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = TGL_PIPE_ALL; - } - | SBID_ALLOC - { - memset(&$$, 0, sizeof($$)); - $$.sbid = $1; - $$.mode = TGL_SBID_SET; - } - | SBID_WAIT_SRC - { - memset(&$$, 0, sizeof($$)); - $$.sbid = $1; - $$.mode = TGL_SBID_SRC; - } - | SBID_WAIT_DST - { - memset(&$$, 0, sizeof($$)); - $$.sbid = $1; - $$.mode = TGL_SBID_DST; - } - instoption: - ALIGN1 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN1;} - | ALIGN16 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN16; } - | ACCWREN { $$.type = INSTOPTION_FLAG; $$.uint_value = ACCWREN; } - | SECHALF { $$.type = INSTOPTION_FLAG; $$.uint_value = SECHALF; } - | COMPR { $$.type = INSTOPTION_FLAG; $$.uint_value = COMPR; } - | COMPR4 { $$.type = INSTOPTION_FLAG; $$.uint_value = COMPR4; } - | BREAKPOINT { $$.type = INSTOPTION_FLAG; $$.uint_value = BREAKPOINT; } - | NODDCLR { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCLR; } - | NODDCHK { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCHK; } - | MASK_DISABLE { $$.type = INSTOPTION_FLAG; $$.uint_value = MASK_DISABLE; } - | EOT { $$.type = INSTOPTION_FLAG; $$.uint_value = EOT; } - | SWITCH { $$.type = INSTOPTION_FLAG; $$.uint_value = SWITCH; } - | ATOMIC { $$.type = INSTOPTION_FLAG; $$.uint_value = ATOMIC; } - | CMPTCTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = CMPTCTRL; } - | WECTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = WECTRL; } - | QTR_2Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2Q; } - | QTR_3Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_3Q; } - | QTR_4Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_4Q; } - | QTR_2H { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2H; } - | QTR_2N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2N; } - | QTR_3N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_3N; } - | QTR_4N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_4N; } - | QTR_5N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_5N; } - | QTR_6N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_6N; } - | QTR_7N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_7N; } - | QTR_8N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_8N; } - | depinfo { $$.type = INSTOPTION_DEP_INFO; $$.depinfo_value = $1; } + ALIGN1 { $$.uint_value = ALIGN1;} + | ALIGN16 { $$.uint_value = ALIGN16; } + | ACCWREN { $$.uint_value = ACCWREN; } + | SECHALF { $$.uint_value = SECHALF; } + | COMPR { $$.uint_value = COMPR; } + | COMPR4 { $$.uint_value = COMPR4; } + | BREAKPOINT { $$.uint_value = BREAKPOINT; } + | NODDCLR { $$.uint_value = NODDCLR; } + | NODDCHK { $$.uint_value = NODDCHK; } + | MASK_DISABLE { $$.uint_value = MASK_DISABLE; } + | EOT { $$.uint_value = EOT; } + | SWITCH { $$.uint_value = SWITCH; } + | ATOMIC { $$.uint_value = ATOMIC; } + | CMPTCTRL { $$.uint_value = CMPTCTRL; } + | WECTRL { $$.uint_value = WECTRL; } + | QTR_2Q { $$.uint_value = QTR_2Q; } + | QTR_3Q { $$.uint_value = QTR_3Q; } + | QTR_4Q { $$.uint_value = QTR_4Q; } + | QTR_2H { $$.uint_value = QTR_2H; } + | QTR_2N { $$.uint_value = QTR_2N; } + | QTR_3N { $$.uint_value = QTR_3N; } + | QTR_4N { $$.uint_value = QTR_4N; } + | QTR_5N { $$.uint_value = QTR_5N; } + | QTR_6N { $$.uint_value = QTR_6N; } + | QTR_7N { $$.uint_value = QTR_7N; } + | QTR_8N { $$.uint_value = QTR_8N; } ; %% diff --git a/src/intel/compiler/elk/elk_ir_fs.h b/src/intel/compiler/elk/elk_ir_fs.h index 96e22f9f089..c2af4ef447e 100644 --- a/src/intel/compiler/elk/elk_ir_fs.h +++ b/src/intel/compiler/elk/elk_ir_fs.h @@ -427,8 +427,6 @@ public: bool last_rt:1; bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ bool keep_payload_trailing_zeros; - - tgl_swsb sched; /**< Scheduling info. */ }; /** diff --git a/src/intel/compiler/elk/elk_ir_performance.cpp b/src/intel/compiler/elk/elk_ir_performance.cpp index 4fb6b7b3455..43a651899f6 100644 --- a/src/intel/compiler/elk/elk_ir_performance.cpp +++ b/src/intel/compiler/elk/elk_ir_performance.cpp @@ -295,7 +295,6 @@ namespace { const struct intel_device_info *devinfo = info.devinfo; switch (info.op) { - case ELK_OPCODE_SYNC: case ELK_OPCODE_SEL: case ELK_OPCODE_NOT: case ELK_OPCODE_AND: @@ -1285,38 +1284,6 @@ namespace { return intel_eu_dependency_id(EU_DEPENDENCY_ID_FLAG0 + i); } - /** - * Return the dependency ID corresponding to the SBID read completion - * condition of a Gfx12+ SWSB. - */ - enum intel_eu_dependency_id - tgl_swsb_rd_dependency_id(tgl_swsb swsb) - { - if (swsb.mode) { - assert(swsb.sbid < - EU_NUM_DEPENDENCY_IDS - EU_DEPENDENCY_ID_SBID_RD0); - return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_RD0 + swsb.sbid); - } else { - return EU_NUM_DEPENDENCY_IDS; - } - } - - /** - * Return the dependency ID corresponding to the SBID write completion - * condition of a Gfx12+ SWSB. - */ - enum intel_eu_dependency_id - tgl_swsb_wr_dependency_id(tgl_swsb swsb) - { - if (swsb.mode) { - assert(swsb.sbid < - EU_DEPENDENCY_ID_SBID_RD0 - EU_DEPENDENCY_ID_SBID_WR0); - return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_WR0 + swsb.sbid); - } else { - return EU_NUM_DEPENDENCY_IDS; - } - } - /** * Return the implicit accumulator register accessed by channel \p i of the * instruction. @@ -1398,12 +1365,6 @@ namespace { } } - /* Stall on any SBID dependencies. */ - if (inst->sched.mode & (TGL_SBID_SET | TGL_SBID_DST)) - stall_on_dependency(st, tgl_swsb_wr_dependency_id(inst->sched)); - else if (inst->sched.mode & TGL_SBID_SRC) - stall_on_dependency(st, tgl_swsb_rd_dependency_id(inst->sched)); - /* Execute the instruction. */ execute_instruction(st, perf); @@ -1446,12 +1407,6 @@ namespace { mark_write_dependency(st, perf, flag_dependency_id(i)); } } - - /* Mark any SBID dependencies. */ - if (inst->sched.mode & TGL_SBID_SET) { - mark_read_dependency(st, perf, tgl_swsb_rd_dependency_id(inst->sched)); - mark_write_dependency(st, perf, tgl_swsb_wr_dependency_id(inst->sched)); - } } /** diff --git a/src/intel/compiler/elk/elk_lex.l b/src/intel/compiler/elk/elk_lex.l index a83e4bd9457..e7b702b1f2a 100644 --- a/src/intel/compiler/elk/elk_lex.l +++ b/src/intel/compiler/elk/elk_lex.l @@ -127,7 +127,6 @@ subb { yylval.integer = ELK_OPCODE_SUBB; return SUBB; } wait { yylval.integer = ELK_OPCODE_WAIT; return WAIT; } while { yylval.integer = ELK_OPCODE_WHILE; return WHILE; } xor { yylval.integer = ELK_OPCODE_XOR; return XOR; } -sync { yylval.integer = ELK_OPCODE_SYNC; return SYNC; } /* extended math functions */ cos { yylval.integer = ELK_MATH_FUNCTION_COS; return COS; } @@ -156,13 +155,6 @@ sin { yylval.integer = ELK_MATH_FUNCTION_SIN; return SIN; } sqrt { yylval.integer = ELK_MATH_FUNCTION_SQRT; return SQRT; } sincos { yylval.integer = ELK_MATH_FUNCTION_SINCOS; return SINCOS; } - /* sync instruction */ -allrd { yylval.integer = TGL_SYNC_ALLRD; return ALLRD; } -allwr { yylval.integer = TGL_SYNC_ALLWR; return ALLWR; } -fence { yylval.integer = TGL_SYNC_FENCE; return FENCE; } -bar { yylval.integer = TGL_SYNC_BAR; return BAR; } -host { yylval.integer = TGL_SYNC_HOST; return HOST; } - /* shared functions for send instruction */ sampler { return SAMPLER; } dp_sampler { return DP_SAMPLER; } @@ -419,17 +411,6 @@ sr[0-9]+ { yylval.integer = atoi(yytext + 2); return STATEREG; } return JUMP_LABEL; } - /* SWSB */ -"@"[1-7] { yylval.integer = atoi(yytext + 1); return REG_DIST_CURRENT; } -"F@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_FLOAT; } -"I@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_INT; } -"L@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_LONG; } -"A@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_ALL; } - -"$"[0-9]* { yylval.integer = atoi(yytext + 1); return SBID_ALLOC; } -"$"[0-9]*".src" { yylval.integer = atoi(yytext + 1); return SBID_WAIT_SRC; } -"$"[0-9]*".dst" { yylval.integer = atoi(yytext + 1); return SBID_WAIT_DST; } - \n { yycolumn = 1; } . { diff --git a/src/intel/compiler/elk/elk_shader.cpp b/src/intel/compiler/elk/elk_shader.cpp index 38b84362ef0..f2c597b3a4c 100644 --- a/src/intel/compiler/elk/elk_shader.cpp +++ b/src/intel/compiler/elk/elk_shader.cpp @@ -1061,7 +1061,6 @@ elk_backend_instruction::has_side_effects() const case ELK_SHADER_OPCODE_SEND: return send_has_side_effects; - case ELK_OPCODE_SYNC: case ELK_VEC4_OPCODE_UNTYPED_ATOMIC: case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE: