intel/elk: Remove SYNC opcode and SWSB annotations

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
Caio Oliveira 2024-02-10 08:16:19 -08:00 committed by Marge Bot
parent 37cd18e30e
commit 43b2261ab5
15 changed files with 49 additions and 765 deletions

View file

@ -64,17 +64,8 @@ struct predicate {
unsigned flag_subreg_nr:1;
};
enum instoption_type {
INSTOPTION_FLAG,
INSTOPTION_DEP_INFO,
};
struct instoption {
enum instoption_type type;
union {
unsigned uint_value;
struct tgl_swsb depinfo_value;
};
unsigned uint_value;
};
struct options {
@ -91,7 +82,6 @@ struct options {
unsigned qtr_ctrl:2;
unsigned nib_ctrl:1;
unsigned is_compr:1;
struct tgl_swsb depinfo;
};
struct msgdesc {

View file

@ -516,15 +516,6 @@ static const char *const math_function[16] = {
[GFX8_MATH_FUNCTION_RSQRTM] = "rsqrtm",
};
static const char *const sync_function[16] = {
[TGL_SYNC_NOP] = "nop",
[TGL_SYNC_ALLRD] = "allrd",
[TGL_SYNC_ALLWR] = "allwr",
[TGL_SYNC_FENCE] = "fence",
[TGL_SYNC_BAR] = "bar",
[TGL_SYNC_HOST] = "host",
};
static const char *const math_saturate[2] = {
[0] = "",
[1] = "sat"
@ -1803,11 +1794,6 @@ elk_disassemble_inst(FILE *file, const struct elk_isa_info *isa,
err |= control(file, "function", math_function,
elk_inst_math_function(devinfo, inst), NULL);
} else if (opcode == ELK_OPCODE_SYNC) {
string(file, " ");
err |= control(file, "function", sync_function,
elk_inst_cond_modifier(devinfo, inst), NULL);
} else if (!is_send(opcode) &&
(devinfo->ver < 12 ||
elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE ||

View file

@ -149,12 +149,6 @@ elk_get_default_access_mode(struct elk_codegen *p)
return p->current->access_mode;
}
struct tgl_swsb
elk_get_default_swsb(struct elk_codegen *p)
{
return p->current->swsb;
}
void
elk_set_default_exec_size(struct elk_codegen *p, unsigned value)
{
@ -301,11 +295,6 @@ void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value)
p->current->acc_wr_control = value;
}
void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value)
{
p->current->swsb = value;
}
void elk_push_insn_state( struct elk_codegen *p )
{
assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]);
@ -647,7 +636,6 @@ elk_disassemble(const struct elk_isa_info *isa,
static const struct elk_opcode_desc opcode_descs[] = {
/* IR, HW, name, nsrc, ndst, gfx_vers */
{ ELK_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
{ ELK_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
{ ELK_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
{ ELK_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
{ ELK_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },

View file

@ -64,9 +64,6 @@ struct elk_insn_state {
/* One of ELK_MASK_* */
unsigned mask_control:1;
/* Scheduling info for Gfx12+ */
struct tgl_swsb swsb;
bool saturate:1;
/* One of ELK_ALIGN_* */
@ -158,7 +155,6 @@ void elk_push_insn_state( struct elk_codegen *p );
unsigned elk_get_default_exec_size(struct elk_codegen *p);
unsigned elk_get_default_group(struct elk_codegen *p);
unsigned elk_get_default_access_mode(struct elk_codegen *p);
struct tgl_swsb elk_get_default_swsb(struct elk_codegen *p);
void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
void elk_set_default_saturate( struct elk_codegen *p, bool enable );
@ -174,7 +170,6 @@ void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate
void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value);
void elk_init_codegen(const struct elk_isa_info *isa,
struct elk_codegen *p, void *mem_ctx);
@ -1879,8 +1874,6 @@ void elk_NOP(struct elk_codegen *p);
void elk_WAIT(struct elk_codegen *p);
void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func);
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/

View file

@ -502,285 +502,6 @@ enum ENUM_PACKED elk_width {
ELK_WIDTH_16 = 4,
};
/**
* Gfx12+ SWSB SBID synchronization mode.
*
* This is represented as a bitmask including any required SBID token
* synchronization modes, used to synchronize out-of-order instructions. Only
* the strongest mode of the mask will be provided to the hardware in the SWSB
* field of an actual hardware instruction, but virtual instructions may be
* able to take into account multiple of them.
*/
enum tgl_sbid_mode {
TGL_SBID_NULL = 0,
TGL_SBID_SRC = 1,
TGL_SBID_DST = 2,
TGL_SBID_SET = 4
};
enum gfx12_sub_byte_precision {
ELK_SUB_BYTE_PRECISION_NONE = 0,
/** 4 bits. Signedness determined by base type */
ELK_SUB_BYTE_PRECISION_4BIT = 1,
/** 2 bits. Signedness determined by base type */
ELK_SUB_BYTE_PRECISION_2BIT = 2,
};
enum elk_gfx12_systolic_depth {
ELK_SYSTOLIC_DEPTH_16 = 0,
ELK_SYSTOLIC_DEPTH_2 = 1,
ELK_SYSTOLIC_DEPTH_4 = 2,
ELK_SYSTOLIC_DEPTH_8 = 3,
};
#ifdef __cplusplus
/**
* Allow bitwise arithmetic of tgl_sbid_mode enums.
*/
inline tgl_sbid_mode
operator|(tgl_sbid_mode x, tgl_sbid_mode y)
{
return tgl_sbid_mode(unsigned(x) | unsigned(y));
}
inline tgl_sbid_mode
operator&(tgl_sbid_mode x, tgl_sbid_mode y)
{
return tgl_sbid_mode(unsigned(x) & unsigned(y));
}
inline tgl_sbid_mode &
operator|=(tgl_sbid_mode &x, tgl_sbid_mode y)
{
return x = x | y;
}
#endif
/**
* TGL+ SWSB RegDist synchronization pipeline.
*
* On TGL all instructions that use the RegDist synchronization mechanism are
* considered to be executed as a single in-order pipeline, therefore only the
* TGL_PIPE_FLOAT pipeline is applicable. On XeHP+ platforms there are two
* additional asynchronous ALU pipelines (which still execute instructions
* in-order and use the RegDist synchronization mechanism). TGL_PIPE_NONE
* doesn't provide any RegDist pipeline synchronization information and allows
* the hardware to infer the pipeline based on the source types of the
* instruction. TGL_PIPE_ALL can be used when synchronization with all ALU
* pipelines is intended.
*/
enum tgl_pipe {
TGL_PIPE_NONE = 0,
TGL_PIPE_FLOAT,
TGL_PIPE_INT,
TGL_PIPE_LONG,
TGL_PIPE_MATH,
TGL_PIPE_ALL
};
/**
* Logical representation of the SWSB scheduling information of a hardware
* instruction. The binary representation is slightly more compact.
*/
struct tgl_swsb {
unsigned regdist : 3;
enum tgl_pipe pipe : 3;
unsigned sbid : 5;
enum tgl_sbid_mode mode : 3;
};
/**
* Construct a scheduling annotation with a single RegDist dependency. This
* synchronizes with the completion of the d-th previous in-order instruction.
* The index is one-based, zero causes a no-op tgl_swsb to be constructed.
*/
static inline struct tgl_swsb
tgl_swsb_regdist(unsigned d)
{
const struct tgl_swsb swsb = { d, d ? TGL_PIPE_ALL : TGL_PIPE_NONE };
assert(swsb.regdist == d);
return swsb;
}
/**
* Construct a scheduling annotation that synchronizes with the specified SBID
* token.
*/
static inline struct tgl_swsb
tgl_swsb_sbid(enum tgl_sbid_mode mode, unsigned sbid)
{
const struct tgl_swsb swsb = { 0, TGL_PIPE_NONE, sbid, mode };
assert(swsb.sbid == sbid);
return swsb;
}
/**
* Construct a no-op scheduling annotation.
*/
static inline struct tgl_swsb
tgl_swsb_null(void)
{
return tgl_swsb_regdist(0);
}
/**
* Return a scheduling annotation that allocates the same SBID synchronization
* token as \p swsb. In addition it will synchronize against a previous
* in-order instruction if \p regdist is non-zero.
*/
static inline struct tgl_swsb
tgl_swsb_dst_dep(struct tgl_swsb swsb, unsigned regdist)
{
swsb.regdist = regdist;
swsb.mode = swsb.mode & TGL_SBID_SET;
swsb.pipe = (regdist ? TGL_PIPE_ALL : TGL_PIPE_NONE);
return swsb;
}
/**
* Return a scheduling annotation that synchronizes against the same SBID and
* RegDist dependencies as \p swsb, but doesn't allocate any SBID token.
*/
static inline struct tgl_swsb
tgl_swsb_src_dep(struct tgl_swsb swsb)
{
swsb.mode = swsb.mode & (TGL_SBID_SRC | TGL_SBID_DST);
return swsb;
}
/**
* Convert the provided tgl_swsb to the hardware's binary representation of an
* SWSB annotation.
*/
static inline uint32_t
tgl_swsb_encode(const struct intel_device_info *devinfo, struct tgl_swsb swsb)
{
if (!swsb.mode) {
const unsigned pipe = devinfo->verx10 < 125 ? 0 :
swsb.pipe == TGL_PIPE_FLOAT ? 0x10 :
swsb.pipe == TGL_PIPE_INT ? 0x18 :
swsb.pipe == TGL_PIPE_LONG ? 0x20 :
swsb.pipe == TGL_PIPE_MATH ? 0x28 :
swsb.pipe == TGL_PIPE_ALL ? 0x8 : 0;
return pipe | swsb.regdist;
} else if (swsb.regdist) {
if (devinfo->ver >= 20) {
if ((swsb.mode & TGL_SBID_SET)) {
assert(swsb.pipe == TGL_PIPE_ALL ||
swsb.pipe == TGL_PIPE_INT || swsb.pipe == TGL_PIPE_FLOAT);
return (swsb.pipe == TGL_PIPE_INT ? 0x300 :
swsb.pipe == TGL_PIPE_FLOAT ? 0x200 : 0x100) |
swsb.regdist << 5 | swsb.sbid;
} else {
assert(!(swsb.mode & ~(TGL_SBID_DST | TGL_SBID_SRC)));
return (swsb.pipe == TGL_PIPE_ALL ? 0x300 :
swsb.mode == TGL_SBID_SRC ? 0x200 : 0x100) |
swsb.regdist << 5 | swsb.sbid;
}
} else {
assert(!(swsb.sbid & ~0xfu));
return 0x80 | swsb.regdist << 4 | swsb.sbid;
}
} else {
if (devinfo->ver >= 20) {
return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0xc0 :
swsb.mode & TGL_SBID_DST ? 0x80 : 0xa0);
} else {
assert(!(swsb.sbid & ~0xfu));
return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0x40 :
swsb.mode & TGL_SBID_DST ? 0x20 : 0x30);
}
}
}
/**
* Convert the provided binary representation of an SWSB annotation to a
* tgl_swsb.
*/
static inline struct tgl_swsb
tgl_swsb_decode(const struct intel_device_info *devinfo,
const bool is_unordered, const uint32_t x)
{
if (devinfo->ver >= 20) {
if (x & 0x300) {
if (is_unordered) {
const struct tgl_swsb swsb = {
(x & 0xe0u) >> 5,
((x & 0x300) == 0x300 ? TGL_PIPE_INT :
(x & 0x300) == 0x200 ? TGL_PIPE_FLOAT :
TGL_PIPE_ALL),
x & 0x1fu,
TGL_SBID_SET
};
return swsb;
} else {
const struct tgl_swsb swsb = {
(x & 0xe0u) >> 5,
((x & 0x300) == 0x300 ? TGL_PIPE_ALL : TGL_PIPE_NONE),
x & 0x1fu,
((x & 0x300) == 0x200 ? TGL_SBID_SRC : TGL_SBID_DST)
};
return swsb;
}
} else if ((x & 0xe0) == 0x80) {
return tgl_swsb_sbid(TGL_SBID_DST, x & 0x1f);
} else if ((x & 0xe0) == 0xa0) {
return tgl_swsb_sbid(TGL_SBID_SRC, x & 0x1fu);
} else if ((x & 0xe0) == 0xc0) {
return tgl_swsb_sbid(TGL_SBID_SET, x & 0x1fu);
} else {
const struct tgl_swsb swsb = { x & 0x7u,
((x & 0x38) == 0x10 ? TGL_PIPE_FLOAT :
(x & 0x38) == 0x18 ? TGL_PIPE_INT :
(x & 0x38) == 0x20 ? TGL_PIPE_LONG :
(x & 0x38) == 0x28 ? TGL_PIPE_MATH :
(x & 0x38) == 0x8 ? TGL_PIPE_ALL :
TGL_PIPE_NONE) };
return swsb;
}
} else {
if (x & 0x80) {
const struct tgl_swsb swsb = { (x & 0x70u) >> 4, TGL_PIPE_NONE,
x & 0xfu,
is_unordered ?
TGL_SBID_SET : TGL_SBID_DST };
return swsb;
} else if ((x & 0x70) == 0x20) {
return tgl_swsb_sbid(TGL_SBID_DST, x & 0xfu);
} else if ((x & 0x70) == 0x30) {
return tgl_swsb_sbid(TGL_SBID_SRC, x & 0xfu);
} else if ((x & 0x70) == 0x40) {
return tgl_swsb_sbid(TGL_SBID_SET, x & 0xfu);
} else {
const struct tgl_swsb swsb = { x & 0x7u,
((x & 0x78) == 0x10 ? TGL_PIPE_FLOAT :
(x & 0x78) == 0x18 ? TGL_PIPE_INT :
(x & 0x78) == 0x50 ? TGL_PIPE_LONG :
(x & 0x78) == 0x8 ? TGL_PIPE_ALL :
TGL_PIPE_NONE) };
assert(devinfo->verx10 >= 125 || swsb.pipe == TGL_PIPE_NONE);
return swsb;
}
}
}
enum tgl_sync_function {
TGL_SYNC_NOP = 0x0,
TGL_SYNC_ALLRD = 0x2,
TGL_SYNC_ALLWR = 0x3,
TGL_SYNC_FENCE = 0xd,
TGL_SYNC_BAR = 0xe,
TGL_SYNC_HOST = 0xf
};
/**
* Message target: Shared Function ID for where to SEND a message.
*

View file

@ -1127,12 +1127,6 @@ void elk_NOP(struct elk_codegen *p)
elk_inst_set_opcode(p->isa, insn, ELK_OPCODE_NOP);
}
void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func)
{
elk_inst *insn = next_insn(p, ELK_OPCODE_SYNC);
elk_inst_set_cond_modifier(p->devinfo, insn, func);
}
/***********************************************************************
* Comparisons, if/else/endif
*/
@ -1990,7 +1984,6 @@ void elk_oword_block_write_scratch(struct elk_codegen *p,
(devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :
devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
ELK_SFID_DATAPORT_WRITE);
const struct tgl_swsb swsb = elk_get_default_swsb(p);
uint32_t msg_type;
if (devinfo->ver >= 6)
@ -2010,13 +2003,11 @@ void elk_oword_block_write_scratch(struct elk_codegen *p,
elk_set_default_exec_size(p, ELK_EXECUTE_8);
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
elk_MOV(p, mrf, retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
elk_set_default_exec_size(p, ELK_EXECUTE_1);
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p,
retype(elk_vec1_reg(ELK_MESSAGE_REGISTER_FILE,
mrf.nr,
@ -2024,7 +2015,6 @@ void elk_oword_block_write_scratch(struct elk_codegen *p,
elk_imm_ud(offset));
elk_pop_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
{
@ -2098,7 +2088,6 @@ elk_oword_block_read_scratch(struct elk_codegen *p,
unsigned offset)
{
const struct intel_device_info *devinfo = p->devinfo;
const struct tgl_swsb swsb = elk_get_default_swsb(p);
if (devinfo->ver >= 6)
offset /= 16;
@ -2125,7 +2114,6 @@ elk_oword_block_read_scratch(struct elk_codegen *p,
{
elk_push_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
elk_set_default_exec_size(p, ELK_EXECUTE_8);
elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
@ -2134,11 +2122,9 @@ elk_oword_block_read_scratch(struct elk_codegen *p,
/* set message header global offset field (reg 0, element 2) */
elk_set_default_exec_size(p, ELK_EXECUTE_1);
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p, get_element_ud(mrf, 2), elk_imm_ud(offset));
elk_pop_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
{
@ -2215,7 +2201,6 @@ void elk_oword_block_read(struct elk_codegen *p,
(devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_CONSTANT_CACHE :
ELK_SFID_DATAPORT_READ);
const unsigned exec_size = 1 << elk_get_default_exec_size(p);
const struct tgl_swsb swsb = elk_get_default_swsb(p);
/* On newer hardware, offset is in units of owords. */
if (devinfo->ver >= 6)
@ -2231,12 +2216,10 @@ void elk_oword_block_read(struct elk_codegen *p,
elk_push_insn_state(p);
elk_set_default_exec_size(p, ELK_EXECUTE_8);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
elk_MOV(p, mrf, retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
elk_set_default_exec_size(p, ELK_EXECUTE_1);
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p,
retype(elk_vec1_reg(ELK_MESSAGE_REGISTER_FILE,
mrf.nr,
@ -2244,8 +2227,6 @@ void elk_oword_block_read(struct elk_codegen *p,
elk_imm_ud(offset));
elk_pop_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
elk_inst *insn = next_insn(p, ELK_OPCODE_SEND);
elk_inst_set_sfid(devinfo, insn, target_cache);
@ -2444,7 +2425,6 @@ void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
elk_push_insn_state(p);
elk_AND(p, temp, get_element_ud(sampler_index, 0), elk_imm_ud(0x0f0));
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_SHL(p, temp, temp, elk_imm_ud(4));
elk_ADD(p,
get_element_ud(header, 3),
@ -2527,7 +2507,6 @@ elk_send_indirect_message(struct elk_codegen *p,
elk_set_src0(p, send, retype(payload, ELK_REGISTER_TYPE_UD));
elk_set_desc(p, send, desc.ud | desc_imm);
} else {
const struct tgl_swsb swsb = elk_get_default_swsb(p);
struct elk_reg addr = retype(elk_address_reg(0), ELK_REGISTER_TYPE_UD);
elk_push_insn_state(p);
@ -2536,7 +2515,6 @@ elk_send_indirect_message(struct elk_codegen *p,
elk_set_default_exec_size(p, ELK_EXECUTE_1);
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
elk_set_default_flag_reg(p, 0, 0);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
/* Load the indirect descriptor to an address register using OR so the
* caller can specify additional descriptor bits with the desc_imm
@ -2546,7 +2524,6 @@ elk_send_indirect_message(struct elk_codegen *p,
elk_pop_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
send = next_insn(p, ELK_OPCODE_SEND);
elk_set_src0(p, send, retype(payload, ELK_REGISTER_TYPE_UD));
elk_set_src1(p, send, addr);
@ -2566,7 +2543,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p,
unsigned desc_imm)
{
if (surface.file != ELK_IMMEDIATE_VALUE) {
const struct tgl_swsb swsb = elk_get_default_swsb(p);
struct elk_reg addr = retype(elk_address_reg(0), ELK_REGISTER_TYPE_UD);
elk_push_insn_state(p);
@ -2575,7 +2551,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p,
elk_set_default_exec_size(p, ELK_EXECUTE_1);
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
elk_set_default_flag_reg(p, 0, 0);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
* some surface array is accessed out of bounds.
@ -2588,7 +2563,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p,
elk_pop_insn_state(p);
surface = addr;
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
elk_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
@ -3161,7 +3135,6 @@ elk_broadcast(struct elk_codegen *p,
if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) {
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
subscript(src, ELK_REGISTER_TYPE_D, 0));
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
subscript(src, ELK_REGISTER_TYPE_D, 1));
} else {
@ -3205,15 +3178,12 @@ elk_broadcast(struct elk_codegen *p,
* register is above this limit.
*/
if (offset >= limit) {
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_ADD(p, addr, addr, elk_imm_ud(offset - offset % limit));
offset = offset % limit;
}
elk_pop_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_regdist(1));
/* Use indirect addressing to fetch the specified component. */
if (type_sz(src.type) > 4 &&
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo) ||
@ -3233,7 +3203,6 @@ elk_broadcast(struct elk_codegen *p,
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
retype(elk_vec1_indirect(addr.subnr, offset),
ELK_REGISTER_TYPE_D));
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
retype(elk_vec1_indirect(addr.subnr, offset + 4),
ELK_REGISTER_TYPE_D));
@ -3326,27 +3295,18 @@ elk_float_controls_mode(struct elk_codegen *p,
* does not ensure execution pipeline coherency. Software must set the
* thread control field to switch for an instruction that uses
* control register as an explicit operand."
*
* On Gfx12+ this is implemented in terms of SWSB annotations instead.
*/
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_inst *inst = elk_AND(p, elk_cr0_reg(0), elk_cr0_reg(0),
elk_imm_ud(~mask));
elk_inst_set_exec_size(p->devinfo, inst, ELK_EXECUTE_1);
if (p->devinfo->ver < 12)
elk_inst_set_thread_control(p->devinfo, inst, ELK_THREAD_SWITCH);
elk_inst_set_thread_control(p->devinfo, inst, ELK_THREAD_SWITCH);
if (mode) {
elk_inst *inst_or = elk_OR(p, elk_cr0_reg(0), elk_cr0_reg(0),
elk_imm_ud(mode));
elk_inst_set_exec_size(p->devinfo, inst_or, ELK_EXECUTE_1);
if (p->devinfo->ver < 12)
elk_inst_set_thread_control(p->devinfo, inst_or, ELK_THREAD_SWITCH);
elk_inst_set_thread_control(p->devinfo, inst_or, ELK_THREAD_SWITCH);
}
if (p->devinfo->ver >= 12)
elk_SYNC(p, TGL_SYNC_NOP);
}
void

View file

@ -13,7 +13,6 @@ extern "C" {
enum elk_opcode {
/* These are the actual hardware instructions. */
ELK_OPCODE_ILLEGAL,
ELK_OPCODE_SYNC,
ELK_OPCODE_MOV,
ELK_OPCODE_SEL,
ELK_OPCODE_MOVI, /**< G45+ */

View file

@ -293,7 +293,7 @@ sources_not_null(const struct elk_isa_info *isa,
if (num_sources == 3)
return (struct string){};
if (num_sources >= 1 && elk_inst_opcode(isa, inst) != ELK_OPCODE_SYNC)
if (num_sources >= 1)
ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
if (num_sources == 2)

View file

@ -480,7 +480,6 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst,
if (type_sz(reg.type) > 4 && !devinfo->has_64bit_float) {
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
subscript(reg, ELK_REGISTER_TYPE_D, 0));
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
subscript(reg, ELK_REGISTER_TYPE_D, 1));
} else {
@ -544,16 +543,11 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst,
insn = elk_MOV(p, addr, elk_imm_uw(imm_byte_offset));
elk_inst_set_mask_control(devinfo, insn, ELK_MASK_DISABLE);
elk_inst_set_pred_control(devinfo, insn, ELK_PREDICATE_NONE);
if (devinfo->ver >= 12)
elk_set_default_swsb(p, tgl_swsb_null());
else
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
}
insn = elk_ADD(p, addr, indirect_byte_offset, elk_imm_uw(imm_byte_offset));
if (devinfo->ver >= 12)
elk_set_default_swsb(p, tgl_swsb_regdist(1));
else if (devinfo->ver >= 7)
if (devinfo->ver >= 7)
elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
if (type_sz(reg.type) > 4 &&
@ -577,7 +571,6 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst,
*/
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
retype(elk_VxH_indirect(0, 0), ELK_REGISTER_TYPE_D));
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
retype(elk_VxH_indirect(0, 4), ELK_REGISTER_TYPE_D));
} else {
@ -708,28 +701,20 @@ elk_fs_generator::generate_shuffle(elk_fs_inst *inst,
insn = elk_MOV(p, addr, elk_imm_uw(src_start_offset));
elk_inst_set_mask_control(devinfo, insn, ELK_MASK_DISABLE);
elk_inst_set_pred_control(devinfo, insn, ELK_PREDICATE_NONE);
if (devinfo->ver >= 12)
elk_set_default_swsb(p, tgl_swsb_null());
else
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
/* Take into account the component size and horizontal stride. */
assert(src.vstride == src.hstride + src.width);
insn = elk_SHL(p, addr, group_idx,
elk_imm_uw(util_logbase2(type_sz(src.type)) +
src.hstride - 1));
if (devinfo->ver >= 12)
elk_set_default_swsb(p, tgl_swsb_regdist(1));
else
elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
/* Add on the register start offset */
elk_ADD(p, addr, addr, elk_imm_uw(src_start_offset));
elk_MOV(p, suboffset(dst, group << (dst.hstride - 1)),
retype(elk_VxH_indirect(0, 0), src.type));
}
elk_set_default_swsb(p, tgl_swsb_null());
}
}
@ -790,12 +775,8 @@ elk_fs_generator::generate_quad_swizzle(const elk_fs_inst *inst,
4 * inst->dst.stride, 1, 4 * inst->dst.stride),
stride(suboffset(src, ELK_GET_SWZ(swiz, c)), 4, 1, 0));
if (devinfo->ver < 12) {
elk_inst_set_no_dd_clear(devinfo, insn, c < 3);
elk_inst_set_no_dd_check(devinfo, insn, c > 0);
}
elk_set_default_swsb(p, tgl_swsb_null());
elk_inst_set_no_dd_clear(devinfo, insn, c < 3);
elk_inst_set_no_dd_check(devinfo, insn, c > 0);
}
break;
@ -847,12 +828,7 @@ void
elk_fs_generator::generate_barrier(elk_fs_inst *, struct elk_reg src)
{
elk_barrier(p, src);
if (devinfo->ver >= 12) {
elk_set_default_swsb(p, tgl_swsb_null());
elk_SYNC(p, TGL_SYNC_BAR);
} else {
elk_WAIT(p);
}
elk_WAIT(p);
}
bool
@ -1144,18 +1120,15 @@ elk_fs_generator::generate_tex(elk_fs_inst *inst, struct elk_reg dst,
/* Set up an implied move from g0 to the MRF. */
src = retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UW);
} else {
const tgl_swsb swsb = elk_get_default_swsb(p);
assert(inst->base_mrf != -1);
struct elk_reg header_reg = elk_message_reg(inst->base_mrf);
elk_push_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
elk_set_default_exec_size(p, ELK_EXECUTE_8);
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
/* Explicitly set up the message header by copying g0 to the MRF. */
elk_MOV(p, header_reg, elk_vec8_grf(0, 0));
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_set_default_exec_size(p, ELK_EXECUTE_1);
if (inst->offset) {
@ -1165,7 +1138,6 @@ elk_fs_generator::generate_tex(elk_fs_inst *inst, struct elk_reg dst,
}
elk_pop_insn_state(p);
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
}
@ -1302,7 +1274,6 @@ elk_fs_generator::generate_ddy(const elk_fs_inst *inst,
elk_ADD(p, byte_offset(dst, g * type_size),
negate(byte_offset(src, g * type_size)),
byte_offset(src, (g + 2) * type_size));
elk_set_default_swsb(p, tgl_swsb_null());
}
elk_pop_insn_state(p);
} else {
@ -1365,7 +1336,6 @@ elk_fs_generator::generate_scratch_write(elk_fs_inst *inst, struct elk_reg src)
const unsigned lower_size = inst->force_writemask_all ? inst->exec_size :
MIN2(16, inst->exec_size);
const unsigned block_size = 4 * lower_size / REG_SIZE;
const tgl_swsb swsb = elk_get_default_swsb(p);
assert(inst->mlen != 0);
elk_push_insn_state(p);
@ -1375,17 +1345,9 @@ elk_fs_generator::generate_scratch_write(elk_fs_inst *inst, struct elk_reg src)
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
elk_set_default_group(p, inst->group + lower_size * i);
if (i > 0) {
assert(swsb.mode & TGL_SBID_SET);
elk_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_SRC, swsb.sbid));
} else {
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
}
elk_MOV(p, elk_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
retype(offset(src, block_size * i), ELK_REGISTER_TYPE_UD));
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
elk_oword_block_write_scratch(p, elk_message_reg(inst->base_mrf),
block_size,
inst->offset + block_size * REG_SIZE * i);
@ -1459,10 +1421,7 @@ elk_fs_generator::generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst)
dst.type = ELK_REGISTER_TYPE_UD;
elk_inst *insn = elk_MOV(p, dst, elk_imm_ud(0));
if (devinfo->ver >= 12)
elk_set_default_swsb(p, tgl_swsb_null());
else
elk_inst_set_no_dd_clear(p->devinfo, insn, true);
elk_inst_set_no_dd_clear(p->devinfo, insn, true);
/* Copy the per-thread scratch space size from g0.3[3:0] */
elk_set_default_exec_size(p, ELK_EXECUTE_1);
@ -1588,7 +1547,6 @@ elk_fs_generator::generate_set_sample_id(elk_fs_inst *inst,
elk_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1);
elk_inst_set_group(devinfo, insn, inst->group + lower_size * i);
elk_inst_set_compression(devinfo, insn, lower_size > 8);
elk_set_default_swsb(p, tgl_swsb_null());
}
}
@ -1625,7 +1583,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
struct elk_reg src[4], dst;
unsigned int last_insn_offset = p->next_insn_offset;
bool multiple_instructions_emitted = false;
tgl_swsb swsb = inst->sched;
/* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
* "Register Region Restrictions" section: for BDW, SKL:
@ -1663,10 +1620,8 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
elk_set_default_flag_reg(p, 0, 0);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
elk_MOV(p, elk_acc_reg(8), elk_imm_f(0.0f));
last_insn_offset = p->next_insn_offset;
swsb = tgl_swsb_dst_dep(swsb, 1);
}
if (!is_accum_used && !inst->eot) {
@ -1674,24 +1629,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
inst->dst.is_accumulator();
}
/* Wa_14013672992:
*
* Always use @1 SWSB for EOT.
*/
if (inst->eot && intel_needs_workaround(devinfo, 14013672992)) {
if (tgl_swsb_src_dep(swsb).mode) {
elk_set_default_exec_size(p, ELK_EXECUTE_1);
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
elk_set_default_flag_reg(p, 0, 0);
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
elk_SYNC(p, TGL_SYNC_NOP);
last_insn_offset = p->next_insn_offset;
}
swsb = tgl_swsb_dst_dep(swsb, 1);
}
if (unlikely(debug_flag))
elk_disasm_annotate(elk_disasm_info, inst, p->next_insn_offset);
@ -1759,7 +1696,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
} else {
elk_set_default_acc_write_control(p, inst->writes_accumulator);
}
elk_set_default_swsb(p, swsb);
unsigned exec_size = inst->exec_size;
if (devinfo->verx10 == 70 &&
@ -1775,13 +1711,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
assert(inst->mlen <= ELK_MAX_MSG_LENGTH * reg_unit(devinfo));
switch (inst->opcode) {
case ELK_OPCODE_SYNC:
assert(src[0].file == ELK_IMMEDIATE_VALUE);
elk_SYNC(p, tgl_sync_function(src[0].ud));
if (tgl_sync_function(src[0].ud) == TGL_SYNC_NOP)
++sync_nop_count;
break;
case ELK_OPCODE_MOV:
elk_MOV(p, dst, src[0]);
break;
@ -2148,33 +2077,23 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
}
case ELK_FS_OPCODE_SCHEDULING_FENCE:
if (inst->sources == 0 && swsb.regdist == 0 &&
swsb.mode == TGL_SBID_NULL) {
if (inst->sources == 0) {
if (unlikely(debug_flag))
elk_disasm_info->use_tail = true;
break;
}
if (devinfo->ver >= 12) {
/* Use the available SWSB information to stall. A single SYNC is
* sufficient since if there were multiple dependencies, the
* scoreboard algorithm already injected other SYNCs before this
* instruction.
for (unsigned i = 0; i < inst->sources; i++) {
/* Emit a MOV to force a stall until the instruction producing the
* registers finishes.
*/
elk_SYNC(p, TGL_SYNC_NOP);
} else {
for (unsigned i = 0; i < inst->sources; i++) {
/* Emit a MOV to force a stall until the instruction producing the
* registers finishes.
*/
elk_MOV(p, retype(elk_null_reg(), ELK_REGISTER_TYPE_UW),
retype(src[i], ELK_REGISTER_TYPE_UW));
}
if (inst->sources > 1)
multiple_instructions_emitted = true;
elk_MOV(p, retype(elk_null_reg(), ELK_REGISTER_TYPE_UW),
retype(src[i], ELK_REGISTER_TYPE_UW));
}
if (inst->sources > 1)
multiple_instructions_emitted = true;
break;
case ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL:
@ -2209,7 +2128,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
elk_MOV(p, dst, src[1]);
elk_set_default_mask_control(p, ELK_MASK_ENABLE);
elk_set_default_swsb(p, tgl_swsb_null());
elk_MOV(p, dst, src[0]);
break;
@ -2298,21 +2216,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
break;
case ELK_SHADER_OPCODE_READ_SR_REG:
if (devinfo->ver >= 12) {
/* There is a SWSB restriction that requires that any time sr0 is
* accessed both the instruction doing the access and the next one
* have SWSB set to RegDist(1).
*/
if (elk_get_default_swsb(p).mode != TGL_SBID_NULL)
elk_SYNC(p, TGL_SYNC_NOP);
assert(src[0].file == ELK_IMMEDIATE_VALUE);
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_MOV(p, dst, elk_sr0_reg(src[0].ud));
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_AND(p, dst, dst, elk_imm_ud(0xffffffff));
} else {
elk_MOV(p, dst, elk_sr0_reg(src[0].ud));
}
elk_MOV(p, dst, elk_sr0_reg(src[0].ud));
break;
default:
@ -2339,14 +2243,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
elk_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
}
}
/* When enabled, insert sync NOP after every instruction and make sure
* that current instruction depends on the previous instruction.
*/
if (INTEL_DEBUG(DEBUG_SWSB_STALL) && devinfo->ver >= 12) {
elk_set_default_swsb(p, tgl_swsb_regdist(1));
elk_SYNC(p, TGL_SYNC_NOP);
}
}
elk_set_uip_jip(p, start_offset);

View file

@ -5244,50 +5244,6 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
const fs_builder ubld = bld.group(8, 0);
/* A memory barrier with acquire semantics requires us to
* guarantee that memory operations of the specified storage
* class sequenced-after the barrier aren't reordered before the
* barrier, nor before any previous atomic operation
* sequenced-before the barrier which may be synchronizing this
* acquire barrier with a prior release sequence.
*
* In order to guarantee the latter we must make sure that any
* such previous operation has completed execution before
* invalidating the relevant caches, since otherwise some cache
* could be polluted by a concurrent thread after its
* invalidation but before the previous atomic completes, which
* could lead to a violation of the expected memory ordering if
* a subsequent memory read hits the polluted cacheline, which
* would return a stale value read from memory before the
* completion of the atomic sequenced-before the barrier.
*
* This ordering inversion can be avoided trivially if the
* operations we need to order are all handled by a single
* in-order cache, since the flush implied by the memory fence
* occurs after any pending operations have completed, however
* that doesn't help us when dealing with multiple caches
* processing requests out of order, in which case we need to
* explicitly stall the EU until any pending memory operations
* have executed.
*
* Note that that might be somewhat heavy handed in some cases.
* In particular when this memory fence was inserted by
* spirv_to_nir() lowering an atomic with acquire semantics into
* an atomic+barrier sequence we could do a better job by
* synchronizing with respect to that one atomic *only*, but
* that would require additional information not currently
* available to the backend.
*
* XXX - Use an alternative workaround on IVB and ICL, since
* SYNC.ALLWR is only available on Gfx12+.
*/
if (devinfo->ver >= 12 &&
(!nir_intrinsic_has_memory_scope(instr) ||
(nir_intrinsic_memory_semantics(instr) & NIR_MEMORY_ACQUIRE))) {
ubld.exec_all().group(1, 0).emit(
ELK_OPCODE_SYNC, ubld.null_reg_ud(), elk_imm_ud(TGL_SYNC_ALLWR));
}
if (devinfo->has_lsc) {
assert(devinfo->verx10 >= 125);
uint32_t desc =
@ -5308,16 +5264,6 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
if (slm_fence) {
assert(opcode == ELK_SHADER_OPCODE_MEMORY_FENCE);
if (intel_needs_workaround(devinfo, 14014063774)) {
/* Wa_14014063774
*
* Before SLM fence compiler needs to insert SYNC.ALLWR in order
* to avoid the SLM data race.
*/
ubld.exec_all().group(1, 0).emit(
ELK_OPCODE_SYNC, ubld.null_reg_ud(),
elk_imm_ud(TGL_SYNC_ALLWR));
}
fence_regs[fence_regs_count++] =
emit_fence(ubld, opcode, GFX12_SFID_SLM, desc,
true /* commit_enable */,

View file

@ -349,7 +349,6 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
struct options options;
struct instoption instoption;
struct msgdesc msgdesc;
struct tgl_swsb depinfo;
elk_inst *instruction;
}
@ -395,7 +394,7 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
%token <integer> OR
%token <integer> PLN POP PUSH
%token <integer> RET RNDD RNDE RNDU RNDZ
%token <integer> SAD2 SADA2 SEL SHL SHR SMOV SUBB SYNC
%token <integer> SAD2 SADA2 SEL SHL SHR SMOV SUBB
%token <integer> SEND SENDC
%token <integer> WAIT WHILE
%token <integer> XOR
@ -404,11 +403,6 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
%token <integer> COS EXP FDIV INV INVM INTDIV INTDIVMOD INTMOD LOG POW RSQ
%token <integer> RSQRTM SIN SINCOS SQRT
/* sync instruction */
%token <integer> ALLRD ALLWR FENCE BAR HOST
%type <integer> sync_function
%type <reg> sync_arg
/* shared functions for send */
%token CONST CRE DATA DP_DATA_1 GATEWAY MATH PIXEL_INTERP READ RENDER SAMPLER
%token THREAD_SPAWNER URB VME WRITE DP_SAMPLER RT_ACCEL SLM TGM UGM
@ -544,33 +538,11 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
%type <string> jumplabeltarget
%type <string> jumplabel
/* SWSB */
%token <integer> REG_DIST_CURRENT
%token <integer> REG_DIST_FLOAT
%token <integer> REG_DIST_INT
%token <integer> REG_DIST_LONG
%token <integer> REG_DIST_ALL
%token <integer> SBID_ALLOC
%token <integer> SBID_WAIT_SRC
%token <integer> SBID_WAIT_DST
%type <depinfo> depinfo
%code {
static void
add_instruction_option(struct options *options, struct instoption opt)
{
if (opt.type == INSTOPTION_DEP_INFO) {
if (opt.depinfo_value.regdist) {
options->depinfo.regdist = opt.depinfo_value.regdist;
options->depinfo.pipe = opt.depinfo_value.pipe;
} else {
options->depinfo.sbid = opt.depinfo_value.sbid;
options->depinfo.mode = opt.depinfo_value.mode;
}
return;
}
switch (opt.uint_value) {
case ALIGN1:
options->access_mode = ELK_ALIGN_1;
@ -687,7 +659,6 @@ instruction:
| ternaryinstruction
| sendinstruction
| illegalinstruction
| syncinstruction
;
relocatableinstruction:
@ -1443,54 +1414,6 @@ loopinstruction:
}
;
/* sync instruction */
syncinstruction:
predicate SYNC sync_function execsize sync_arg instoptions
{
if (p->devinfo->ver < 12) {
error(&@2, "sync instruction is supported only on gfx12+\n");
}
if ($5.file == ELK_IMMEDIATE_VALUE &&
$3 != TGL_SYNC_ALLRD &&
$3 != TGL_SYNC_ALLWR) {
error(&@2, "Only allrd and allwr support immediate argument\n");
}
elk_set_default_access_mode(p, $6.access_mode);
elk_SYNC(p, $3);
i965_asm_set_instruction_options(p, $6);
elk_inst_set_exec_size(p->devinfo, elk_last_inst, $4);
elk_set_src0(p, elk_last_inst, $5);
elk_inst_set_eot(p->devinfo, elk_last_inst, $6.end_of_thread);
elk_inst_set_qtr_control(p->devinfo, elk_last_inst, $6.qtr_ctrl);
elk_inst_set_nib_control(p->devinfo, elk_last_inst, $6.nib_ctrl);
elk_pop_insn_state(p);
}
;
sync_function:
NOP { $$ = TGL_SYNC_NOP; }
| ALLRD
| ALLWR
| FENCE
| BAR
| HOST
;
sync_arg:
nullreg region reg_type
{
$$ = $1;
$$.vstride = $2.vstride;
$$.width = $2.width;
$$.hstride = $2.hstride;
$$.type = $3;
}
| immreg
;
/* Relative location */
relativelocation2:
immreg
@ -2367,84 +2290,33 @@ instoption_list:
}
;
depinfo:
REG_DIST_CURRENT
{
memset(&$$, 0, sizeof($$));
$$.regdist = $1;
$$.pipe = TGL_PIPE_NONE;
}
| REG_DIST_FLOAT
{
memset(&$$, 0, sizeof($$));
$$.regdist = $1;
$$.pipe = TGL_PIPE_FLOAT;
}
| REG_DIST_INT
{
memset(&$$, 0, sizeof($$));
$$.regdist = $1;
$$.pipe = TGL_PIPE_INT;
}
| REG_DIST_LONG
{
memset(&$$, 0, sizeof($$));
$$.regdist = $1;
$$.pipe = TGL_PIPE_LONG;
}
| REG_DIST_ALL
{
memset(&$$, 0, sizeof($$));
$$.regdist = $1;
$$.pipe = TGL_PIPE_ALL;
}
| SBID_ALLOC
{
memset(&$$, 0, sizeof($$));
$$.sbid = $1;
$$.mode = TGL_SBID_SET;
}
| SBID_WAIT_SRC
{
memset(&$$, 0, sizeof($$));
$$.sbid = $1;
$$.mode = TGL_SBID_SRC;
}
| SBID_WAIT_DST
{
memset(&$$, 0, sizeof($$));
$$.sbid = $1;
$$.mode = TGL_SBID_DST;
}
instoption:
ALIGN1 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN1;}
| ALIGN16 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN16; }
| ACCWREN { $$.type = INSTOPTION_FLAG; $$.uint_value = ACCWREN; }
| SECHALF { $$.type = INSTOPTION_FLAG; $$.uint_value = SECHALF; }
| COMPR { $$.type = INSTOPTION_FLAG; $$.uint_value = COMPR; }
| COMPR4 { $$.type = INSTOPTION_FLAG; $$.uint_value = COMPR4; }
| BREAKPOINT { $$.type = INSTOPTION_FLAG; $$.uint_value = BREAKPOINT; }
| NODDCLR { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCLR; }
| NODDCHK { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCHK; }
| MASK_DISABLE { $$.type = INSTOPTION_FLAG; $$.uint_value = MASK_DISABLE; }
| EOT { $$.type = INSTOPTION_FLAG; $$.uint_value = EOT; }
| SWITCH { $$.type = INSTOPTION_FLAG; $$.uint_value = SWITCH; }
| ATOMIC { $$.type = INSTOPTION_FLAG; $$.uint_value = ATOMIC; }
| CMPTCTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = CMPTCTRL; }
| WECTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = WECTRL; }
| QTR_2Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2Q; }
| QTR_3Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_3Q; }
| QTR_4Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_4Q; }
| QTR_2H { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2H; }
| QTR_2N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2N; }
| QTR_3N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_3N; }
| QTR_4N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_4N; }
| QTR_5N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_5N; }
| QTR_6N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_6N; }
| QTR_7N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_7N; }
| QTR_8N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_8N; }
| depinfo { $$.type = INSTOPTION_DEP_INFO; $$.depinfo_value = $1; }
ALIGN1 { $$.uint_value = ALIGN1;}
| ALIGN16 { $$.uint_value = ALIGN16; }
| ACCWREN { $$.uint_value = ACCWREN; }
| SECHALF { $$.uint_value = SECHALF; }
| COMPR { $$.uint_value = COMPR; }
| COMPR4 { $$.uint_value = COMPR4; }
| BREAKPOINT { $$.uint_value = BREAKPOINT; }
| NODDCLR { $$.uint_value = NODDCLR; }
| NODDCHK { $$.uint_value = NODDCHK; }
| MASK_DISABLE { $$.uint_value = MASK_DISABLE; }
| EOT { $$.uint_value = EOT; }
| SWITCH { $$.uint_value = SWITCH; }
| ATOMIC { $$.uint_value = ATOMIC; }
| CMPTCTRL { $$.uint_value = CMPTCTRL; }
| WECTRL { $$.uint_value = WECTRL; }
| QTR_2Q { $$.uint_value = QTR_2Q; }
| QTR_3Q { $$.uint_value = QTR_3Q; }
| QTR_4Q { $$.uint_value = QTR_4Q; }
| QTR_2H { $$.uint_value = QTR_2H; }
| QTR_2N { $$.uint_value = QTR_2N; }
| QTR_3N { $$.uint_value = QTR_3N; }
| QTR_4N { $$.uint_value = QTR_4N; }
| QTR_5N { $$.uint_value = QTR_5N; }
| QTR_6N { $$.uint_value = QTR_6N; }
| QTR_7N { $$.uint_value = QTR_7N; }
| QTR_8N { $$.uint_value = QTR_8N; }
;
%%

View file

@ -427,8 +427,6 @@ public:
bool last_rt:1;
bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
bool keep_payload_trailing_zeros;
tgl_swsb sched; /**< Scheduling info. */
};
/**

View file

@ -295,7 +295,6 @@ namespace {
const struct intel_device_info *devinfo = info.devinfo;
switch (info.op) {
case ELK_OPCODE_SYNC:
case ELK_OPCODE_SEL:
case ELK_OPCODE_NOT:
case ELK_OPCODE_AND:
@ -1285,38 +1284,6 @@ namespace {
return intel_eu_dependency_id(EU_DEPENDENCY_ID_FLAG0 + i);
}
/**
* Return the dependency ID corresponding to the SBID read completion
* condition of a Gfx12+ SWSB.
*/
enum intel_eu_dependency_id
tgl_swsb_rd_dependency_id(tgl_swsb swsb)
{
if (swsb.mode) {
assert(swsb.sbid <
EU_NUM_DEPENDENCY_IDS - EU_DEPENDENCY_ID_SBID_RD0);
return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_RD0 + swsb.sbid);
} else {
return EU_NUM_DEPENDENCY_IDS;
}
}
/**
* Return the dependency ID corresponding to the SBID write completion
* condition of a Gfx12+ SWSB.
*/
enum intel_eu_dependency_id
tgl_swsb_wr_dependency_id(tgl_swsb swsb)
{
if (swsb.mode) {
assert(swsb.sbid <
EU_DEPENDENCY_ID_SBID_RD0 - EU_DEPENDENCY_ID_SBID_WR0);
return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_WR0 + swsb.sbid);
} else {
return EU_NUM_DEPENDENCY_IDS;
}
}
/**
* Return the implicit accumulator register accessed by channel \p i of the
* instruction.
@ -1398,12 +1365,6 @@ namespace {
}
}
/* Stall on any SBID dependencies. */
if (inst->sched.mode & (TGL_SBID_SET | TGL_SBID_DST))
stall_on_dependency(st, tgl_swsb_wr_dependency_id(inst->sched));
else if (inst->sched.mode & TGL_SBID_SRC)
stall_on_dependency(st, tgl_swsb_rd_dependency_id(inst->sched));
/* Execute the instruction. */
execute_instruction(st, perf);
@ -1446,12 +1407,6 @@ namespace {
mark_write_dependency(st, perf, flag_dependency_id(i));
}
}
/* Mark any SBID dependencies. */
if (inst->sched.mode & TGL_SBID_SET) {
mark_read_dependency(st, perf, tgl_swsb_rd_dependency_id(inst->sched));
mark_write_dependency(st, perf, tgl_swsb_wr_dependency_id(inst->sched));
}
}
/**

View file

@ -127,7 +127,6 @@ subb { yylval.integer = ELK_OPCODE_SUBB; return SUBB; }
wait { yylval.integer = ELK_OPCODE_WAIT; return WAIT; }
while { yylval.integer = ELK_OPCODE_WHILE; return WHILE; }
xor { yylval.integer = ELK_OPCODE_XOR; return XOR; }
sync { yylval.integer = ELK_OPCODE_SYNC; return SYNC; }
/* extended math functions */
cos { yylval.integer = ELK_MATH_FUNCTION_COS; return COS; }
@ -156,13 +155,6 @@ sin { yylval.integer = ELK_MATH_FUNCTION_SIN; return SIN; }
sqrt { yylval.integer = ELK_MATH_FUNCTION_SQRT; return SQRT; }
sincos { yylval.integer = ELK_MATH_FUNCTION_SINCOS; return SINCOS; }
/* sync instruction */
allrd { yylval.integer = TGL_SYNC_ALLRD; return ALLRD; }
allwr { yylval.integer = TGL_SYNC_ALLWR; return ALLWR; }
fence { yylval.integer = TGL_SYNC_FENCE; return FENCE; }
bar { yylval.integer = TGL_SYNC_BAR; return BAR; }
host { yylval.integer = TGL_SYNC_HOST; return HOST; }
/* shared functions for send instruction */
sampler { return SAMPLER; }
dp_sampler { return DP_SAMPLER; }
@ -419,17 +411,6 @@ sr[0-9]+ { yylval.integer = atoi(yytext + 2); return STATEREG; }
return JUMP_LABEL;
}
/* SWSB */
"@"[1-7] { yylval.integer = atoi(yytext + 1); return REG_DIST_CURRENT; }
"F@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_FLOAT; }
"I@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_INT; }
"L@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_LONG; }
"A@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_ALL; }
"$"[0-9]* { yylval.integer = atoi(yytext + 1); return SBID_ALLOC; }
"$"[0-9]*".src" { yylval.integer = atoi(yytext + 1); return SBID_WAIT_SRC; }
"$"[0-9]*".dst" { yylval.integer = atoi(yytext + 1); return SBID_WAIT_DST; }
\n { yycolumn = 1; }
. {

View file

@ -1061,7 +1061,6 @@ elk_backend_instruction::has_side_effects() const
case ELK_SHADER_OPCODE_SEND:
return send_has_side_effects;
case ELK_OPCODE_SYNC:
case ELK_VEC4_OPCODE_UNTYPED_ATOMIC:
case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE: