mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 06:10:13 +01:00
intel/elk: Remove SYNC opcode and SWSB annotations
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
parent
37cd18e30e
commit
43b2261ab5
15 changed files with 49 additions and 765 deletions
|
|
@ -64,17 +64,8 @@ struct predicate {
|
|||
unsigned flag_subreg_nr:1;
|
||||
};
|
||||
|
||||
enum instoption_type {
|
||||
INSTOPTION_FLAG,
|
||||
INSTOPTION_DEP_INFO,
|
||||
};
|
||||
|
||||
struct instoption {
|
||||
enum instoption_type type;
|
||||
union {
|
||||
unsigned uint_value;
|
||||
struct tgl_swsb depinfo_value;
|
||||
};
|
||||
unsigned uint_value;
|
||||
};
|
||||
|
||||
struct options {
|
||||
|
|
@ -91,7 +82,6 @@ struct options {
|
|||
unsigned qtr_ctrl:2;
|
||||
unsigned nib_ctrl:1;
|
||||
unsigned is_compr:1;
|
||||
struct tgl_swsb depinfo;
|
||||
};
|
||||
|
||||
struct msgdesc {
|
||||
|
|
|
|||
|
|
@ -516,15 +516,6 @@ static const char *const math_function[16] = {
|
|||
[GFX8_MATH_FUNCTION_RSQRTM] = "rsqrtm",
|
||||
};
|
||||
|
||||
static const char *const sync_function[16] = {
|
||||
[TGL_SYNC_NOP] = "nop",
|
||||
[TGL_SYNC_ALLRD] = "allrd",
|
||||
[TGL_SYNC_ALLWR] = "allwr",
|
||||
[TGL_SYNC_FENCE] = "fence",
|
||||
[TGL_SYNC_BAR] = "bar",
|
||||
[TGL_SYNC_HOST] = "host",
|
||||
};
|
||||
|
||||
static const char *const math_saturate[2] = {
|
||||
[0] = "",
|
||||
[1] = "sat"
|
||||
|
|
@ -1803,11 +1794,6 @@ elk_disassemble_inst(FILE *file, const struct elk_isa_info *isa,
|
|||
err |= control(file, "function", math_function,
|
||||
elk_inst_math_function(devinfo, inst), NULL);
|
||||
|
||||
} else if (opcode == ELK_OPCODE_SYNC) {
|
||||
string(file, " ");
|
||||
err |= control(file, "function", sync_function,
|
||||
elk_inst_cond_modifier(devinfo, inst), NULL);
|
||||
|
||||
} else if (!is_send(opcode) &&
|
||||
(devinfo->ver < 12 ||
|
||||
elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE ||
|
||||
|
|
|
|||
|
|
@ -149,12 +149,6 @@ elk_get_default_access_mode(struct elk_codegen *p)
|
|||
return p->current->access_mode;
|
||||
}
|
||||
|
||||
struct tgl_swsb
|
||||
elk_get_default_swsb(struct elk_codegen *p)
|
||||
{
|
||||
return p->current->swsb;
|
||||
}
|
||||
|
||||
void
|
||||
elk_set_default_exec_size(struct elk_codegen *p, unsigned value)
|
||||
{
|
||||
|
|
@ -301,11 +295,6 @@ void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value)
|
|||
p->current->acc_wr_control = value;
|
||||
}
|
||||
|
||||
void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value)
|
||||
{
|
||||
p->current->swsb = value;
|
||||
}
|
||||
|
||||
void elk_push_insn_state( struct elk_codegen *p )
|
||||
{
|
||||
assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]);
|
||||
|
|
@ -647,7 +636,6 @@ elk_disassemble(const struct elk_isa_info *isa,
|
|||
static const struct elk_opcode_desc opcode_descs[] = {
|
||||
/* IR, HW, name, nsrc, ndst, gfx_vers */
|
||||
{ ELK_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
|
||||
{ ELK_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
|
||||
{ ELK_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
|
||||
{ ELK_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
|
||||
{ ELK_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
|
||||
|
|
|
|||
|
|
@ -64,9 +64,6 @@ struct elk_insn_state {
|
|||
/* One of ELK_MASK_* */
|
||||
unsigned mask_control:1;
|
||||
|
||||
/* Scheduling info for Gfx12+ */
|
||||
struct tgl_swsb swsb;
|
||||
|
||||
bool saturate:1;
|
||||
|
||||
/* One of ELK_ALIGN_* */
|
||||
|
|
@ -158,7 +155,6 @@ void elk_push_insn_state( struct elk_codegen *p );
|
|||
unsigned elk_get_default_exec_size(struct elk_codegen *p);
|
||||
unsigned elk_get_default_group(struct elk_codegen *p);
|
||||
unsigned elk_get_default_access_mode(struct elk_codegen *p);
|
||||
struct tgl_swsb elk_get_default_swsb(struct elk_codegen *p);
|
||||
void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
|
||||
void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
|
||||
void elk_set_default_saturate( struct elk_codegen *p, bool enable );
|
||||
|
|
@ -174,7 +170,6 @@ void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate
|
|||
void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
|
||||
void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
|
||||
void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
|
||||
void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value);
|
||||
|
||||
void elk_init_codegen(const struct elk_isa_info *isa,
|
||||
struct elk_codegen *p, void *mem_ctx);
|
||||
|
|
@ -1879,8 +1874,6 @@ void elk_NOP(struct elk_codegen *p);
|
|||
|
||||
void elk_WAIT(struct elk_codegen *p);
|
||||
|
||||
void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func);
|
||||
|
||||
/* Special case: there is never a destination, execution size will be
|
||||
* taken from src0:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -502,285 +502,6 @@ enum ENUM_PACKED elk_width {
|
|||
ELK_WIDTH_16 = 4,
|
||||
};
|
||||
|
||||
/**
|
||||
* Gfx12+ SWSB SBID synchronization mode.
|
||||
*
|
||||
* This is represented as a bitmask including any required SBID token
|
||||
* synchronization modes, used to synchronize out-of-order instructions. Only
|
||||
* the strongest mode of the mask will be provided to the hardware in the SWSB
|
||||
* field of an actual hardware instruction, but virtual instructions may be
|
||||
* able to take into account multiple of them.
|
||||
*/
|
||||
enum tgl_sbid_mode {
|
||||
TGL_SBID_NULL = 0,
|
||||
TGL_SBID_SRC = 1,
|
||||
TGL_SBID_DST = 2,
|
||||
TGL_SBID_SET = 4
|
||||
};
|
||||
|
||||
|
||||
enum gfx12_sub_byte_precision {
|
||||
ELK_SUB_BYTE_PRECISION_NONE = 0,
|
||||
|
||||
/** 4 bits. Signedness determined by base type */
|
||||
ELK_SUB_BYTE_PRECISION_4BIT = 1,
|
||||
|
||||
/** 2 bits. Signedness determined by base type */
|
||||
ELK_SUB_BYTE_PRECISION_2BIT = 2,
|
||||
};
|
||||
|
||||
enum elk_gfx12_systolic_depth {
|
||||
ELK_SYSTOLIC_DEPTH_16 = 0,
|
||||
ELK_SYSTOLIC_DEPTH_2 = 1,
|
||||
ELK_SYSTOLIC_DEPTH_4 = 2,
|
||||
ELK_SYSTOLIC_DEPTH_8 = 3,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Allow bitwise arithmetic of tgl_sbid_mode enums.
|
||||
*/
|
||||
inline tgl_sbid_mode
|
||||
operator|(tgl_sbid_mode x, tgl_sbid_mode y)
|
||||
{
|
||||
return tgl_sbid_mode(unsigned(x) | unsigned(y));
|
||||
}
|
||||
|
||||
inline tgl_sbid_mode
|
||||
operator&(tgl_sbid_mode x, tgl_sbid_mode y)
|
||||
{
|
||||
return tgl_sbid_mode(unsigned(x) & unsigned(y));
|
||||
}
|
||||
|
||||
inline tgl_sbid_mode &
|
||||
operator|=(tgl_sbid_mode &x, tgl_sbid_mode y)
|
||||
{
|
||||
return x = x | y;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* TGL+ SWSB RegDist synchronization pipeline.
|
||||
*
|
||||
* On TGL all instructions that use the RegDist synchronization mechanism are
|
||||
* considered to be executed as a single in-order pipeline, therefore only the
|
||||
* TGL_PIPE_FLOAT pipeline is applicable. On XeHP+ platforms there are two
|
||||
* additional asynchronous ALU pipelines (which still execute instructions
|
||||
* in-order and use the RegDist synchronization mechanism). TGL_PIPE_NONE
|
||||
* doesn't provide any RegDist pipeline synchronization information and allows
|
||||
* the hardware to infer the pipeline based on the source types of the
|
||||
* instruction. TGL_PIPE_ALL can be used when synchronization with all ALU
|
||||
* pipelines is intended.
|
||||
*/
|
||||
enum tgl_pipe {
|
||||
TGL_PIPE_NONE = 0,
|
||||
TGL_PIPE_FLOAT,
|
||||
TGL_PIPE_INT,
|
||||
TGL_PIPE_LONG,
|
||||
TGL_PIPE_MATH,
|
||||
TGL_PIPE_ALL
|
||||
};
|
||||
|
||||
/**
|
||||
* Logical representation of the SWSB scheduling information of a hardware
|
||||
* instruction. The binary representation is slightly more compact.
|
||||
*/
|
||||
struct tgl_swsb {
|
||||
unsigned regdist : 3;
|
||||
enum tgl_pipe pipe : 3;
|
||||
unsigned sbid : 5;
|
||||
enum tgl_sbid_mode mode : 3;
|
||||
};
|
||||
|
||||
/**
|
||||
* Construct a scheduling annotation with a single RegDist dependency. This
|
||||
* synchronizes with the completion of the d-th previous in-order instruction.
|
||||
* The index is one-based, zero causes a no-op tgl_swsb to be constructed.
|
||||
*/
|
||||
static inline struct tgl_swsb
|
||||
tgl_swsb_regdist(unsigned d)
|
||||
{
|
||||
const struct tgl_swsb swsb = { d, d ? TGL_PIPE_ALL : TGL_PIPE_NONE };
|
||||
assert(swsb.regdist == d);
|
||||
return swsb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a scheduling annotation that synchronizes with the specified SBID
|
||||
* token.
|
||||
*/
|
||||
static inline struct tgl_swsb
|
||||
tgl_swsb_sbid(enum tgl_sbid_mode mode, unsigned sbid)
|
||||
{
|
||||
const struct tgl_swsb swsb = { 0, TGL_PIPE_NONE, sbid, mode };
|
||||
assert(swsb.sbid == sbid);
|
||||
return swsb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a no-op scheduling annotation.
|
||||
*/
|
||||
static inline struct tgl_swsb
|
||||
tgl_swsb_null(void)
|
||||
{
|
||||
return tgl_swsb_regdist(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a scheduling annotation that allocates the same SBID synchronization
|
||||
* token as \p swsb. In addition it will synchronize against a previous
|
||||
* in-order instruction if \p regdist is non-zero.
|
||||
*/
|
||||
static inline struct tgl_swsb
|
||||
tgl_swsb_dst_dep(struct tgl_swsb swsb, unsigned regdist)
|
||||
{
|
||||
swsb.regdist = regdist;
|
||||
swsb.mode = swsb.mode & TGL_SBID_SET;
|
||||
swsb.pipe = (regdist ? TGL_PIPE_ALL : TGL_PIPE_NONE);
|
||||
return swsb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a scheduling annotation that synchronizes against the same SBID and
|
||||
* RegDist dependencies as \p swsb, but doesn't allocate any SBID token.
|
||||
*/
|
||||
static inline struct tgl_swsb
|
||||
tgl_swsb_src_dep(struct tgl_swsb swsb)
|
||||
{
|
||||
swsb.mode = swsb.mode & (TGL_SBID_SRC | TGL_SBID_DST);
|
||||
return swsb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the provided tgl_swsb to the hardware's binary representation of an
|
||||
* SWSB annotation.
|
||||
*/
|
||||
static inline uint32_t
|
||||
tgl_swsb_encode(const struct intel_device_info *devinfo, struct tgl_swsb swsb)
|
||||
{
|
||||
if (!swsb.mode) {
|
||||
const unsigned pipe = devinfo->verx10 < 125 ? 0 :
|
||||
swsb.pipe == TGL_PIPE_FLOAT ? 0x10 :
|
||||
swsb.pipe == TGL_PIPE_INT ? 0x18 :
|
||||
swsb.pipe == TGL_PIPE_LONG ? 0x20 :
|
||||
swsb.pipe == TGL_PIPE_MATH ? 0x28 :
|
||||
swsb.pipe == TGL_PIPE_ALL ? 0x8 : 0;
|
||||
return pipe | swsb.regdist;
|
||||
|
||||
} else if (swsb.regdist) {
|
||||
if (devinfo->ver >= 20) {
|
||||
if ((swsb.mode & TGL_SBID_SET)) {
|
||||
assert(swsb.pipe == TGL_PIPE_ALL ||
|
||||
swsb.pipe == TGL_PIPE_INT || swsb.pipe == TGL_PIPE_FLOAT);
|
||||
return (swsb.pipe == TGL_PIPE_INT ? 0x300 :
|
||||
swsb.pipe == TGL_PIPE_FLOAT ? 0x200 : 0x100) |
|
||||
swsb.regdist << 5 | swsb.sbid;
|
||||
} else {
|
||||
assert(!(swsb.mode & ~(TGL_SBID_DST | TGL_SBID_SRC)));
|
||||
return (swsb.pipe == TGL_PIPE_ALL ? 0x300 :
|
||||
swsb.mode == TGL_SBID_SRC ? 0x200 : 0x100) |
|
||||
swsb.regdist << 5 | swsb.sbid;
|
||||
}
|
||||
} else {
|
||||
assert(!(swsb.sbid & ~0xfu));
|
||||
return 0x80 | swsb.regdist << 4 | swsb.sbid;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (devinfo->ver >= 20) {
|
||||
return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0xc0 :
|
||||
swsb.mode & TGL_SBID_DST ? 0x80 : 0xa0);
|
||||
} else {
|
||||
assert(!(swsb.sbid & ~0xfu));
|
||||
return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0x40 :
|
||||
swsb.mode & TGL_SBID_DST ? 0x20 : 0x30);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the provided binary representation of an SWSB annotation to a
|
||||
* tgl_swsb.
|
||||
*/
|
||||
static inline struct tgl_swsb
|
||||
tgl_swsb_decode(const struct intel_device_info *devinfo,
|
||||
const bool is_unordered, const uint32_t x)
|
||||
{
|
||||
if (devinfo->ver >= 20) {
|
||||
if (x & 0x300) {
|
||||
if (is_unordered) {
|
||||
const struct tgl_swsb swsb = {
|
||||
(x & 0xe0u) >> 5,
|
||||
((x & 0x300) == 0x300 ? TGL_PIPE_INT :
|
||||
(x & 0x300) == 0x200 ? TGL_PIPE_FLOAT :
|
||||
TGL_PIPE_ALL),
|
||||
x & 0x1fu,
|
||||
TGL_SBID_SET
|
||||
};
|
||||
return swsb;
|
||||
} else {
|
||||
const struct tgl_swsb swsb = {
|
||||
(x & 0xe0u) >> 5,
|
||||
((x & 0x300) == 0x300 ? TGL_PIPE_ALL : TGL_PIPE_NONE),
|
||||
x & 0x1fu,
|
||||
((x & 0x300) == 0x200 ? TGL_SBID_SRC : TGL_SBID_DST)
|
||||
};
|
||||
return swsb;
|
||||
}
|
||||
|
||||
} else if ((x & 0xe0) == 0x80) {
|
||||
return tgl_swsb_sbid(TGL_SBID_DST, x & 0x1f);
|
||||
} else if ((x & 0xe0) == 0xa0) {
|
||||
return tgl_swsb_sbid(TGL_SBID_SRC, x & 0x1fu);
|
||||
} else if ((x & 0xe0) == 0xc0) {
|
||||
return tgl_swsb_sbid(TGL_SBID_SET, x & 0x1fu);
|
||||
} else {
|
||||
const struct tgl_swsb swsb = { x & 0x7u,
|
||||
((x & 0x38) == 0x10 ? TGL_PIPE_FLOAT :
|
||||
(x & 0x38) == 0x18 ? TGL_PIPE_INT :
|
||||
(x & 0x38) == 0x20 ? TGL_PIPE_LONG :
|
||||
(x & 0x38) == 0x28 ? TGL_PIPE_MATH :
|
||||
(x & 0x38) == 0x8 ? TGL_PIPE_ALL :
|
||||
TGL_PIPE_NONE) };
|
||||
return swsb;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (x & 0x80) {
|
||||
const struct tgl_swsb swsb = { (x & 0x70u) >> 4, TGL_PIPE_NONE,
|
||||
x & 0xfu,
|
||||
is_unordered ?
|
||||
TGL_SBID_SET : TGL_SBID_DST };
|
||||
return swsb;
|
||||
} else if ((x & 0x70) == 0x20) {
|
||||
return tgl_swsb_sbid(TGL_SBID_DST, x & 0xfu);
|
||||
} else if ((x & 0x70) == 0x30) {
|
||||
return tgl_swsb_sbid(TGL_SBID_SRC, x & 0xfu);
|
||||
} else if ((x & 0x70) == 0x40) {
|
||||
return tgl_swsb_sbid(TGL_SBID_SET, x & 0xfu);
|
||||
} else {
|
||||
const struct tgl_swsb swsb = { x & 0x7u,
|
||||
((x & 0x78) == 0x10 ? TGL_PIPE_FLOAT :
|
||||
(x & 0x78) == 0x18 ? TGL_PIPE_INT :
|
||||
(x & 0x78) == 0x50 ? TGL_PIPE_LONG :
|
||||
(x & 0x78) == 0x8 ? TGL_PIPE_ALL :
|
||||
TGL_PIPE_NONE) };
|
||||
assert(devinfo->verx10 >= 125 || swsb.pipe == TGL_PIPE_NONE);
|
||||
return swsb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum tgl_sync_function {
|
||||
TGL_SYNC_NOP = 0x0,
|
||||
TGL_SYNC_ALLRD = 0x2,
|
||||
TGL_SYNC_ALLWR = 0x3,
|
||||
TGL_SYNC_FENCE = 0xd,
|
||||
TGL_SYNC_BAR = 0xe,
|
||||
TGL_SYNC_HOST = 0xf
|
||||
};
|
||||
|
||||
/**
|
||||
* Message target: Shared Function ID for where to SEND a message.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1127,12 +1127,6 @@ void elk_NOP(struct elk_codegen *p)
|
|||
elk_inst_set_opcode(p->isa, insn, ELK_OPCODE_NOP);
|
||||
}
|
||||
|
||||
void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func)
|
||||
{
|
||||
elk_inst *insn = next_insn(p, ELK_OPCODE_SYNC);
|
||||
elk_inst_set_cond_modifier(p->devinfo, insn, func);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* Comparisons, if/else/endif
|
||||
*/
|
||||
|
|
@ -1990,7 +1984,6 @@ void elk_oword_block_write_scratch(struct elk_codegen *p,
|
|||
(devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :
|
||||
devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
|
||||
ELK_SFID_DATAPORT_WRITE);
|
||||
const struct tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
uint32_t msg_type;
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
|
|
@ -2010,13 +2003,11 @@ void elk_oword_block_write_scratch(struct elk_codegen *p,
|
|||
elk_set_default_exec_size(p, ELK_EXECUTE_8);
|
||||
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
|
||||
elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
elk_MOV(p, mrf, retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UD));
|
||||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p,
|
||||
retype(elk_vec1_reg(ELK_MESSAGE_REGISTER_FILE,
|
||||
mrf.nr,
|
||||
|
|
@ -2024,7 +2015,6 @@ void elk_oword_block_write_scratch(struct elk_codegen *p,
|
|||
elk_imm_ud(offset));
|
||||
|
||||
elk_pop_insn_state(p);
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
{
|
||||
|
|
@ -2098,7 +2088,6 @@ elk_oword_block_read_scratch(struct elk_codegen *p,
|
|||
unsigned offset)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
const struct tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
offset /= 16;
|
||||
|
|
@ -2125,7 +2114,6 @@ elk_oword_block_read_scratch(struct elk_codegen *p,
|
|||
|
||||
{
|
||||
elk_push_insn_state(p);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_8);
|
||||
elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
|
||||
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
|
||||
|
|
@ -2134,11 +2122,9 @@ elk_oword_block_read_scratch(struct elk_codegen *p,
|
|||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p, get_element_ud(mrf, 2), elk_imm_ud(offset));
|
||||
|
||||
elk_pop_insn_state(p);
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
{
|
||||
|
|
@ -2215,7 +2201,6 @@ void elk_oword_block_read(struct elk_codegen *p,
|
|||
(devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_CONSTANT_CACHE :
|
||||
ELK_SFID_DATAPORT_READ);
|
||||
const unsigned exec_size = 1 << elk_get_default_exec_size(p);
|
||||
const struct tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
|
||||
/* On newer hardware, offset is in units of owords. */
|
||||
if (devinfo->ver >= 6)
|
||||
|
|
@ -2231,12 +2216,10 @@ void elk_oword_block_read(struct elk_codegen *p,
|
|||
|
||||
elk_push_insn_state(p);
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_8);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
elk_MOV(p, mrf, retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UD));
|
||||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p,
|
||||
retype(elk_vec1_reg(ELK_MESSAGE_REGISTER_FILE,
|
||||
mrf.nr,
|
||||
|
|
@ -2244,8 +2227,6 @@ void elk_oword_block_read(struct elk_codegen *p,
|
|||
elk_imm_ud(offset));
|
||||
elk_pop_insn_state(p);
|
||||
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
|
||||
elk_inst *insn = next_insn(p, ELK_OPCODE_SEND);
|
||||
|
||||
elk_inst_set_sfid(devinfo, insn, target_cache);
|
||||
|
|
@ -2444,7 +2425,6 @@ void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
|
|||
|
||||
elk_push_insn_state(p);
|
||||
elk_AND(p, temp, get_element_ud(sampler_index, 0), elk_imm_ud(0x0f0));
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
elk_SHL(p, temp, temp, elk_imm_ud(4));
|
||||
elk_ADD(p,
|
||||
get_element_ud(header, 3),
|
||||
|
|
@ -2527,7 +2507,6 @@ elk_send_indirect_message(struct elk_codegen *p,
|
|||
elk_set_src0(p, send, retype(payload, ELK_REGISTER_TYPE_UD));
|
||||
elk_set_desc(p, send, desc.ud | desc_imm);
|
||||
} else {
|
||||
const struct tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
struct elk_reg addr = retype(elk_address_reg(0), ELK_REGISTER_TYPE_UD);
|
||||
|
||||
elk_push_insn_state(p);
|
||||
|
|
@ -2536,7 +2515,6 @@ elk_send_indirect_message(struct elk_codegen *p,
|
|||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
|
||||
elk_set_default_flag_reg(p, 0, 0);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
/* Load the indirect descriptor to an address register using OR so the
|
||||
* caller can specify additional descriptor bits with the desc_imm
|
||||
|
|
@ -2546,7 +2524,6 @@ elk_send_indirect_message(struct elk_codegen *p,
|
|||
|
||||
elk_pop_insn_state(p);
|
||||
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
send = next_insn(p, ELK_OPCODE_SEND);
|
||||
elk_set_src0(p, send, retype(payload, ELK_REGISTER_TYPE_UD));
|
||||
elk_set_src1(p, send, addr);
|
||||
|
|
@ -2566,7 +2543,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p,
|
|||
unsigned desc_imm)
|
||||
{
|
||||
if (surface.file != ELK_IMMEDIATE_VALUE) {
|
||||
const struct tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
struct elk_reg addr = retype(elk_address_reg(0), ELK_REGISTER_TYPE_UD);
|
||||
|
||||
elk_push_insn_state(p);
|
||||
|
|
@ -2575,7 +2551,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p,
|
|||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
|
||||
elk_set_default_flag_reg(p, 0, 0);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
|
||||
* some surface array is accessed out of bounds.
|
||||
|
|
@ -2588,7 +2563,6 @@ elk_send_indirect_surface_message(struct elk_codegen *p,
|
|||
elk_pop_insn_state(p);
|
||||
|
||||
surface = addr;
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
elk_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
|
||||
|
|
@ -3161,7 +3135,6 @@ elk_broadcast(struct elk_codegen *p,
|
|||
if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) {
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
|
||||
subscript(src, ELK_REGISTER_TYPE_D, 0));
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
|
||||
subscript(src, ELK_REGISTER_TYPE_D, 1));
|
||||
} else {
|
||||
|
|
@ -3205,15 +3178,12 @@ elk_broadcast(struct elk_codegen *p,
|
|||
* register is above this limit.
|
||||
*/
|
||||
if (offset >= limit) {
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
elk_ADD(p, addr, addr, elk_imm_ud(offset - offset % limit));
|
||||
offset = offset % limit;
|
||||
}
|
||||
|
||||
elk_pop_insn_state(p);
|
||||
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
/* Use indirect addressing to fetch the specified component. */
|
||||
if (type_sz(src.type) > 4 &&
|
||||
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo) ||
|
||||
|
|
@ -3233,7 +3203,6 @@ elk_broadcast(struct elk_codegen *p,
|
|||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
|
||||
retype(elk_vec1_indirect(addr.subnr, offset),
|
||||
ELK_REGISTER_TYPE_D));
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
|
||||
retype(elk_vec1_indirect(addr.subnr, offset + 4),
|
||||
ELK_REGISTER_TYPE_D));
|
||||
|
|
@ -3326,27 +3295,18 @@ elk_float_controls_mode(struct elk_codegen *p,
|
|||
* does not ensure execution pipeline coherency. Software must set the
|
||||
* thread control field to ‘switch’ for an instruction that uses
|
||||
* control register as an explicit operand."
|
||||
*
|
||||
* On Gfx12+ this is implemented in terms of SWSB annotations instead.
|
||||
*/
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
elk_inst *inst = elk_AND(p, elk_cr0_reg(0), elk_cr0_reg(0),
|
||||
elk_imm_ud(~mask));
|
||||
elk_inst_set_exec_size(p->devinfo, inst, ELK_EXECUTE_1);
|
||||
if (p->devinfo->ver < 12)
|
||||
elk_inst_set_thread_control(p->devinfo, inst, ELK_THREAD_SWITCH);
|
||||
elk_inst_set_thread_control(p->devinfo, inst, ELK_THREAD_SWITCH);
|
||||
|
||||
if (mode) {
|
||||
elk_inst *inst_or = elk_OR(p, elk_cr0_reg(0), elk_cr0_reg(0),
|
||||
elk_imm_ud(mode));
|
||||
elk_inst_set_exec_size(p->devinfo, inst_or, ELK_EXECUTE_1);
|
||||
if (p->devinfo->ver < 12)
|
||||
elk_inst_set_thread_control(p->devinfo, inst_or, ELK_THREAD_SWITCH);
|
||||
elk_inst_set_thread_control(p->devinfo, inst_or, ELK_THREAD_SWITCH);
|
||||
}
|
||||
|
||||
if (p->devinfo->ver >= 12)
|
||||
elk_SYNC(p, TGL_SYNC_NOP);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ extern "C" {
|
|||
enum elk_opcode {
|
||||
/* These are the actual hardware instructions. */
|
||||
ELK_OPCODE_ILLEGAL,
|
||||
ELK_OPCODE_SYNC,
|
||||
ELK_OPCODE_MOV,
|
||||
ELK_OPCODE_SEL,
|
||||
ELK_OPCODE_MOVI, /**< G45+ */
|
||||
|
|
|
|||
|
|
@ -293,7 +293,7 @@ sources_not_null(const struct elk_isa_info *isa,
|
|||
if (num_sources == 3)
|
||||
return (struct string){};
|
||||
|
||||
if (num_sources >= 1 && elk_inst_opcode(isa, inst) != ELK_OPCODE_SYNC)
|
||||
if (num_sources >= 1)
|
||||
ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
|
||||
|
||||
if (num_sources == 2)
|
||||
|
|
|
|||
|
|
@ -480,7 +480,6 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst,
|
|||
if (type_sz(reg.type) > 4 && !devinfo->has_64bit_float) {
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
|
||||
subscript(reg, ELK_REGISTER_TYPE_D, 0));
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
|
||||
subscript(reg, ELK_REGISTER_TYPE_D, 1));
|
||||
} else {
|
||||
|
|
@ -544,16 +543,11 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst,
|
|||
insn = elk_MOV(p, addr, elk_imm_uw(imm_byte_offset));
|
||||
elk_inst_set_mask_control(devinfo, insn, ELK_MASK_DISABLE);
|
||||
elk_inst_set_pred_control(devinfo, insn, ELK_PREDICATE_NONE);
|
||||
if (devinfo->ver >= 12)
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
else
|
||||
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
|
||||
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
|
||||
}
|
||||
|
||||
insn = elk_ADD(p, addr, indirect_byte_offset, elk_imm_uw(imm_byte_offset));
|
||||
if (devinfo->ver >= 12)
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
else if (devinfo->ver >= 7)
|
||||
if (devinfo->ver >= 7)
|
||||
elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
|
||||
|
||||
if (type_sz(reg.type) > 4 &&
|
||||
|
|
@ -577,7 +571,6 @@ elk_fs_generator::generate_mov_indirect(elk_fs_inst *inst,
|
|||
*/
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 0),
|
||||
retype(elk_VxH_indirect(0, 0), ELK_REGISTER_TYPE_D));
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p, subscript(dst, ELK_REGISTER_TYPE_D, 1),
|
||||
retype(elk_VxH_indirect(0, 4), ELK_REGISTER_TYPE_D));
|
||||
} else {
|
||||
|
|
@ -708,28 +701,20 @@ elk_fs_generator::generate_shuffle(elk_fs_inst *inst,
|
|||
insn = elk_MOV(p, addr, elk_imm_uw(src_start_offset));
|
||||
elk_inst_set_mask_control(devinfo, insn, ELK_MASK_DISABLE);
|
||||
elk_inst_set_pred_control(devinfo, insn, ELK_PREDICATE_NONE);
|
||||
if (devinfo->ver >= 12)
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
else
|
||||
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
|
||||
elk_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl);
|
||||
|
||||
/* Take into account the component size and horizontal stride. */
|
||||
assert(src.vstride == src.hstride + src.width);
|
||||
insn = elk_SHL(p, addr, group_idx,
|
||||
elk_imm_uw(util_logbase2(type_sz(src.type)) +
|
||||
src.hstride - 1));
|
||||
if (devinfo->ver >= 12)
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
else
|
||||
elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
|
||||
elk_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
|
||||
|
||||
/* Add on the register start offset */
|
||||
elk_ADD(p, addr, addr, elk_imm_uw(src_start_offset));
|
||||
elk_MOV(p, suboffset(dst, group << (dst.hstride - 1)),
|
||||
retype(elk_VxH_indirect(0, 0), src.type));
|
||||
}
|
||||
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -790,12 +775,8 @@ elk_fs_generator::generate_quad_swizzle(const elk_fs_inst *inst,
|
|||
4 * inst->dst.stride, 1, 4 * inst->dst.stride),
|
||||
stride(suboffset(src, ELK_GET_SWZ(swiz, c)), 4, 1, 0));
|
||||
|
||||
if (devinfo->ver < 12) {
|
||||
elk_inst_set_no_dd_clear(devinfo, insn, c < 3);
|
||||
elk_inst_set_no_dd_check(devinfo, insn, c > 0);
|
||||
}
|
||||
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_inst_set_no_dd_clear(devinfo, insn, c < 3);
|
||||
elk_inst_set_no_dd_check(devinfo, insn, c > 0);
|
||||
}
|
||||
|
||||
break;
|
||||
|
|
@ -847,12 +828,7 @@ void
|
|||
elk_fs_generator::generate_barrier(elk_fs_inst *, struct elk_reg src)
|
||||
{
|
||||
elk_barrier(p, src);
|
||||
if (devinfo->ver >= 12) {
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_SYNC(p, TGL_SYNC_BAR);
|
||||
} else {
|
||||
elk_WAIT(p);
|
||||
}
|
||||
elk_WAIT(p);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -1144,18 +1120,15 @@ elk_fs_generator::generate_tex(elk_fs_inst *inst, struct elk_reg dst,
|
|||
/* Set up an implied move from g0 to the MRF. */
|
||||
src = retype(elk_vec8_grf(0, 0), ELK_REGISTER_TYPE_UW);
|
||||
} else {
|
||||
const tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
assert(inst->base_mrf != -1);
|
||||
struct elk_reg header_reg = elk_message_reg(inst->base_mrf);
|
||||
|
||||
elk_push_insn_state(p);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_8);
|
||||
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
|
||||
elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
|
||||
/* Explicitly set up the message header by copying g0 to the MRF. */
|
||||
elk_MOV(p, header_reg, elk_vec8_grf(0, 0));
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
if (inst->offset) {
|
||||
|
|
@ -1165,7 +1138,6 @@ elk_fs_generator::generate_tex(elk_fs_inst *inst, struct elk_reg dst,
|
|||
}
|
||||
|
||||
elk_pop_insn_state(p);
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1302,7 +1274,6 @@ elk_fs_generator::generate_ddy(const elk_fs_inst *inst,
|
|||
elk_ADD(p, byte_offset(dst, g * type_size),
|
||||
negate(byte_offset(src, g * type_size)),
|
||||
byte_offset(src, (g + 2) * type_size));
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
elk_pop_insn_state(p);
|
||||
} else {
|
||||
|
|
@ -1365,7 +1336,6 @@ elk_fs_generator::generate_scratch_write(elk_fs_inst *inst, struct elk_reg src)
|
|||
const unsigned lower_size = inst->force_writemask_all ? inst->exec_size :
|
||||
MIN2(16, inst->exec_size);
|
||||
const unsigned block_size = 4 * lower_size / REG_SIZE;
|
||||
const tgl_swsb swsb = elk_get_default_swsb(p);
|
||||
assert(inst->mlen != 0);
|
||||
|
||||
elk_push_insn_state(p);
|
||||
|
|
@ -1375,17 +1345,9 @@ elk_fs_generator::generate_scratch_write(elk_fs_inst *inst, struct elk_reg src)
|
|||
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
|
||||
elk_set_default_group(p, inst->group + lower_size * i);
|
||||
|
||||
if (i > 0) {
|
||||
assert(swsb.mode & TGL_SBID_SET);
|
||||
elk_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_SRC, swsb.sbid));
|
||||
} else {
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
}
|
||||
|
||||
elk_MOV(p, elk_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
|
||||
retype(offset(src, block_size * i), ELK_REGISTER_TYPE_UD));
|
||||
|
||||
elk_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
elk_oword_block_write_scratch(p, elk_message_reg(inst->base_mrf),
|
||||
block_size,
|
||||
inst->offset + block_size * REG_SIZE * i);
|
||||
|
|
@ -1459,10 +1421,7 @@ elk_fs_generator::generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst)
|
|||
dst.type = ELK_REGISTER_TYPE_UD;
|
||||
|
||||
elk_inst *insn = elk_MOV(p, dst, elk_imm_ud(0));
|
||||
if (devinfo->ver >= 12)
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
else
|
||||
elk_inst_set_no_dd_clear(p->devinfo, insn, true);
|
||||
elk_inst_set_no_dd_clear(p->devinfo, insn, true);
|
||||
|
||||
/* Copy the per-thread scratch space size from g0.3[3:0] */
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
|
|
@ -1588,7 +1547,6 @@ elk_fs_generator::generate_set_sample_id(elk_fs_inst *inst,
|
|||
elk_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1);
|
||||
elk_inst_set_group(devinfo, insn, inst->group + lower_size * i);
|
||||
elk_inst_set_compression(devinfo, insn, lower_size > 8);
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1625,7 +1583,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
struct elk_reg src[4], dst;
|
||||
unsigned int last_insn_offset = p->next_insn_offset;
|
||||
bool multiple_instructions_emitted = false;
|
||||
tgl_swsb swsb = inst->sched;
|
||||
|
||||
/* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
|
||||
* "Register Region Restrictions" section: for BDW, SKL:
|
||||
|
|
@ -1663,10 +1620,8 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
|
||||
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
|
||||
elk_set_default_flag_reg(p, 0, 0);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
elk_MOV(p, elk_acc_reg(8), elk_imm_f(0.0f));
|
||||
last_insn_offset = p->next_insn_offset;
|
||||
swsb = tgl_swsb_dst_dep(swsb, 1);
|
||||
}
|
||||
|
||||
if (!is_accum_used && !inst->eot) {
|
||||
|
|
@ -1674,24 +1629,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
inst->dst.is_accumulator();
|
||||
}
|
||||
|
||||
/* Wa_14013672992:
|
||||
*
|
||||
* Always use @1 SWSB for EOT.
|
||||
*/
|
||||
if (inst->eot && intel_needs_workaround(devinfo, 14013672992)) {
|
||||
if (tgl_swsb_src_dep(swsb).mode) {
|
||||
elk_set_default_exec_size(p, ELK_EXECUTE_1);
|
||||
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
|
||||
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
|
||||
elk_set_default_flag_reg(p, 0, 0);
|
||||
elk_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
elk_SYNC(p, TGL_SYNC_NOP);
|
||||
last_insn_offset = p->next_insn_offset;
|
||||
}
|
||||
|
||||
swsb = tgl_swsb_dst_dep(swsb, 1);
|
||||
}
|
||||
|
||||
if (unlikely(debug_flag))
|
||||
elk_disasm_annotate(elk_disasm_info, inst, p->next_insn_offset);
|
||||
|
||||
|
|
@ -1759,7 +1696,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
} else {
|
||||
elk_set_default_acc_write_control(p, inst->writes_accumulator);
|
||||
}
|
||||
elk_set_default_swsb(p, swsb);
|
||||
|
||||
unsigned exec_size = inst->exec_size;
|
||||
if (devinfo->verx10 == 70 &&
|
||||
|
|
@ -1775,13 +1711,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
assert(inst->mlen <= ELK_MAX_MSG_LENGTH * reg_unit(devinfo));
|
||||
|
||||
switch (inst->opcode) {
|
||||
case ELK_OPCODE_SYNC:
|
||||
assert(src[0].file == ELK_IMMEDIATE_VALUE);
|
||||
elk_SYNC(p, tgl_sync_function(src[0].ud));
|
||||
|
||||
if (tgl_sync_function(src[0].ud) == TGL_SYNC_NOP)
|
||||
++sync_nop_count;
|
||||
break;
|
||||
case ELK_OPCODE_MOV:
|
||||
elk_MOV(p, dst, src[0]);
|
||||
break;
|
||||
|
|
@ -2148,33 +2077,23 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
}
|
||||
|
||||
case ELK_FS_OPCODE_SCHEDULING_FENCE:
|
||||
if (inst->sources == 0 && swsb.regdist == 0 &&
|
||||
swsb.mode == TGL_SBID_NULL) {
|
||||
if (inst->sources == 0) {
|
||||
if (unlikely(debug_flag))
|
||||
elk_disasm_info->use_tail = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
/* Use the available SWSB information to stall. A single SYNC is
|
||||
* sufficient since if there were multiple dependencies, the
|
||||
* scoreboard algorithm already injected other SYNCs before this
|
||||
* instruction.
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
/* Emit a MOV to force a stall until the instruction producing the
|
||||
* registers finishes.
|
||||
*/
|
||||
elk_SYNC(p, TGL_SYNC_NOP);
|
||||
} else {
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
/* Emit a MOV to force a stall until the instruction producing the
|
||||
* registers finishes.
|
||||
*/
|
||||
elk_MOV(p, retype(elk_null_reg(), ELK_REGISTER_TYPE_UW),
|
||||
retype(src[i], ELK_REGISTER_TYPE_UW));
|
||||
}
|
||||
|
||||
if (inst->sources > 1)
|
||||
multiple_instructions_emitted = true;
|
||||
elk_MOV(p, retype(elk_null_reg(), ELK_REGISTER_TYPE_UW),
|
||||
retype(src[i], ELK_REGISTER_TYPE_UW));
|
||||
}
|
||||
|
||||
if (inst->sources > 1)
|
||||
multiple_instructions_emitted = true;
|
||||
|
||||
break;
|
||||
|
||||
case ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
|
|
@ -2209,7 +2128,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
elk_set_default_mask_control(p, ELK_MASK_DISABLE);
|
||||
elk_MOV(p, dst, src[1]);
|
||||
elk_set_default_mask_control(p, ELK_MASK_ENABLE);
|
||||
elk_set_default_swsb(p, tgl_swsb_null());
|
||||
elk_MOV(p, dst, src[0]);
|
||||
break;
|
||||
|
||||
|
|
@ -2298,21 +2216,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
break;
|
||||
|
||||
case ELK_SHADER_OPCODE_READ_SR_REG:
|
||||
if (devinfo->ver >= 12) {
|
||||
/* There is a SWSB restriction that requires that any time sr0 is
|
||||
* accessed both the instruction doing the access and the next one
|
||||
* have SWSB set to RegDist(1).
|
||||
*/
|
||||
if (elk_get_default_swsb(p).mode != TGL_SBID_NULL)
|
||||
elk_SYNC(p, TGL_SYNC_NOP);
|
||||
assert(src[0].file == ELK_IMMEDIATE_VALUE);
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
elk_MOV(p, dst, elk_sr0_reg(src[0].ud));
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
elk_AND(p, dst, dst, elk_imm_ud(0xffffffff));
|
||||
} else {
|
||||
elk_MOV(p, dst, elk_sr0_reg(src[0].ud));
|
||||
}
|
||||
elk_MOV(p, dst, elk_sr0_reg(src[0].ud));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -2339,14 +2243,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
elk_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
|
||||
}
|
||||
}
|
||||
|
||||
/* When enabled, insert sync NOP after every instruction and make sure
|
||||
* that current instruction depends on the previous instruction.
|
||||
*/
|
||||
if (INTEL_DEBUG(DEBUG_SWSB_STALL) && devinfo->ver >= 12) {
|
||||
elk_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
elk_SYNC(p, TGL_SYNC_NOP);
|
||||
}
|
||||
}
|
||||
|
||||
elk_set_uip_jip(p, start_offset);
|
||||
|
|
|
|||
|
|
@ -5244,50 +5244,6 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
|
|||
|
||||
const fs_builder ubld = bld.group(8, 0);
|
||||
|
||||
/* A memory barrier with acquire semantics requires us to
|
||||
* guarantee that memory operations of the specified storage
|
||||
* class sequenced-after the barrier aren't reordered before the
|
||||
* barrier, nor before any previous atomic operation
|
||||
* sequenced-before the barrier which may be synchronizing this
|
||||
* acquire barrier with a prior release sequence.
|
||||
*
|
||||
* In order to guarantee the latter we must make sure that any
|
||||
* such previous operation has completed execution before
|
||||
* invalidating the relevant caches, since otherwise some cache
|
||||
* could be polluted by a concurrent thread after its
|
||||
* invalidation but before the previous atomic completes, which
|
||||
* could lead to a violation of the expected memory ordering if
|
||||
* a subsequent memory read hits the polluted cacheline, which
|
||||
* would return a stale value read from memory before the
|
||||
* completion of the atomic sequenced-before the barrier.
|
||||
*
|
||||
* This ordering inversion can be avoided trivially if the
|
||||
* operations we need to order are all handled by a single
|
||||
* in-order cache, since the flush implied by the memory fence
|
||||
* occurs after any pending operations have completed, however
|
||||
* that doesn't help us when dealing with multiple caches
|
||||
* processing requests out of order, in which case we need to
|
||||
* explicitly stall the EU until any pending memory operations
|
||||
* have executed.
|
||||
*
|
||||
* Note that that might be somewhat heavy handed in some cases.
|
||||
* In particular when this memory fence was inserted by
|
||||
* spirv_to_nir() lowering an atomic with acquire semantics into
|
||||
* an atomic+barrier sequence we could do a better job by
|
||||
* synchronizing with respect to that one atomic *only*, but
|
||||
* that would require additional information not currently
|
||||
* available to the backend.
|
||||
*
|
||||
* XXX - Use an alternative workaround on IVB and ICL, since
|
||||
* SYNC.ALLWR is only available on Gfx12+.
|
||||
*/
|
||||
if (devinfo->ver >= 12 &&
|
||||
(!nir_intrinsic_has_memory_scope(instr) ||
|
||||
(nir_intrinsic_memory_semantics(instr) & NIR_MEMORY_ACQUIRE))) {
|
||||
ubld.exec_all().group(1, 0).emit(
|
||||
ELK_OPCODE_SYNC, ubld.null_reg_ud(), elk_imm_ud(TGL_SYNC_ALLWR));
|
||||
}
|
||||
|
||||
if (devinfo->has_lsc) {
|
||||
assert(devinfo->verx10 >= 125);
|
||||
uint32_t desc =
|
||||
|
|
@ -5308,16 +5264,6 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
|
|||
|
||||
if (slm_fence) {
|
||||
assert(opcode == ELK_SHADER_OPCODE_MEMORY_FENCE);
|
||||
if (intel_needs_workaround(devinfo, 14014063774)) {
|
||||
/* Wa_14014063774
|
||||
*
|
||||
* Before SLM fence compiler needs to insert SYNC.ALLWR in order
|
||||
* to avoid the SLM data race.
|
||||
*/
|
||||
ubld.exec_all().group(1, 0).emit(
|
||||
ELK_OPCODE_SYNC, ubld.null_reg_ud(),
|
||||
elk_imm_ud(TGL_SYNC_ALLWR));
|
||||
}
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX12_SFID_SLM, desc,
|
||||
true /* commit_enable */,
|
||||
|
|
|
|||
|
|
@ -349,7 +349,6 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
|
|||
struct options options;
|
||||
struct instoption instoption;
|
||||
struct msgdesc msgdesc;
|
||||
struct tgl_swsb depinfo;
|
||||
elk_inst *instruction;
|
||||
}
|
||||
|
||||
|
|
@ -395,7 +394,7 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
|
|||
%token <integer> OR
|
||||
%token <integer> PLN POP PUSH
|
||||
%token <integer> RET RNDD RNDE RNDU RNDZ
|
||||
%token <integer> SAD2 SADA2 SEL SHL SHR SMOV SUBB SYNC
|
||||
%token <integer> SAD2 SADA2 SEL SHL SHR SMOV SUBB
|
||||
%token <integer> SEND SENDC
|
||||
%token <integer> WAIT WHILE
|
||||
%token <integer> XOR
|
||||
|
|
@ -404,11 +403,6 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
|
|||
%token <integer> COS EXP FDIV INV INVM INTDIV INTDIVMOD INTMOD LOG POW RSQ
|
||||
%token <integer> RSQRTM SIN SINCOS SQRT
|
||||
|
||||
/* sync instruction */
|
||||
%token <integer> ALLRD ALLWR FENCE BAR HOST
|
||||
%type <integer> sync_function
|
||||
%type <reg> sync_arg
|
||||
|
||||
/* shared functions for send */
|
||||
%token CONST CRE DATA DP_DATA_1 GATEWAY MATH PIXEL_INTERP READ RENDER SAMPLER
|
||||
%token THREAD_SPAWNER URB VME WRITE DP_SAMPLER RT_ACCEL SLM TGM UGM
|
||||
|
|
@ -544,33 +538,11 @@ add_label(struct elk_codegen *p, const char* label_name, enum instr_label_type t
|
|||
%type <string> jumplabeltarget
|
||||
%type <string> jumplabel
|
||||
|
||||
/* SWSB */
|
||||
%token <integer> REG_DIST_CURRENT
|
||||
%token <integer> REG_DIST_FLOAT
|
||||
%token <integer> REG_DIST_INT
|
||||
%token <integer> REG_DIST_LONG
|
||||
%token <integer> REG_DIST_ALL
|
||||
%token <integer> SBID_ALLOC
|
||||
%token <integer> SBID_WAIT_SRC
|
||||
%token <integer> SBID_WAIT_DST
|
||||
|
||||
%type <depinfo> depinfo
|
||||
|
||||
%code {
|
||||
|
||||
static void
|
||||
add_instruction_option(struct options *options, struct instoption opt)
|
||||
{
|
||||
if (opt.type == INSTOPTION_DEP_INFO) {
|
||||
if (opt.depinfo_value.regdist) {
|
||||
options->depinfo.regdist = opt.depinfo_value.regdist;
|
||||
options->depinfo.pipe = opt.depinfo_value.pipe;
|
||||
} else {
|
||||
options->depinfo.sbid = opt.depinfo_value.sbid;
|
||||
options->depinfo.mode = opt.depinfo_value.mode;
|
||||
}
|
||||
return;
|
||||
}
|
||||
switch (opt.uint_value) {
|
||||
case ALIGN1:
|
||||
options->access_mode = ELK_ALIGN_1;
|
||||
|
|
@ -687,7 +659,6 @@ instruction:
|
|||
| ternaryinstruction
|
||||
| sendinstruction
|
||||
| illegalinstruction
|
||||
| syncinstruction
|
||||
;
|
||||
|
||||
relocatableinstruction:
|
||||
|
|
@ -1443,54 +1414,6 @@ loopinstruction:
|
|||
}
|
||||
;
|
||||
|
||||
/* sync instruction */
|
||||
syncinstruction:
|
||||
predicate SYNC sync_function execsize sync_arg instoptions
|
||||
{
|
||||
if (p->devinfo->ver < 12) {
|
||||
error(&@2, "sync instruction is supported only on gfx12+\n");
|
||||
}
|
||||
|
||||
if ($5.file == ELK_IMMEDIATE_VALUE &&
|
||||
$3 != TGL_SYNC_ALLRD &&
|
||||
$3 != TGL_SYNC_ALLWR) {
|
||||
error(&@2, "Only allrd and allwr support immediate argument\n");
|
||||
}
|
||||
|
||||
elk_set_default_access_mode(p, $6.access_mode);
|
||||
elk_SYNC(p, $3);
|
||||
i965_asm_set_instruction_options(p, $6);
|
||||
elk_inst_set_exec_size(p->devinfo, elk_last_inst, $4);
|
||||
elk_set_src0(p, elk_last_inst, $5);
|
||||
elk_inst_set_eot(p->devinfo, elk_last_inst, $6.end_of_thread);
|
||||
elk_inst_set_qtr_control(p->devinfo, elk_last_inst, $6.qtr_ctrl);
|
||||
elk_inst_set_nib_control(p->devinfo, elk_last_inst, $6.nib_ctrl);
|
||||
|
||||
elk_pop_insn_state(p);
|
||||
}
|
||||
;
|
||||
|
||||
sync_function:
|
||||
NOP { $$ = TGL_SYNC_NOP; }
|
||||
| ALLRD
|
||||
| ALLWR
|
||||
| FENCE
|
||||
| BAR
|
||||
| HOST
|
||||
;
|
||||
|
||||
sync_arg:
|
||||
nullreg region reg_type
|
||||
{
|
||||
$$ = $1;
|
||||
$$.vstride = $2.vstride;
|
||||
$$.width = $2.width;
|
||||
$$.hstride = $2.hstride;
|
||||
$$.type = $3;
|
||||
}
|
||||
| immreg
|
||||
;
|
||||
|
||||
/* Relative location */
|
||||
relativelocation2:
|
||||
immreg
|
||||
|
|
@ -2367,84 +2290,33 @@ instoption_list:
|
|||
}
|
||||
;
|
||||
|
||||
depinfo:
|
||||
REG_DIST_CURRENT
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.regdist = $1;
|
||||
$$.pipe = TGL_PIPE_NONE;
|
||||
}
|
||||
| REG_DIST_FLOAT
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.regdist = $1;
|
||||
$$.pipe = TGL_PIPE_FLOAT;
|
||||
}
|
||||
| REG_DIST_INT
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.regdist = $1;
|
||||
$$.pipe = TGL_PIPE_INT;
|
||||
}
|
||||
| REG_DIST_LONG
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.regdist = $1;
|
||||
$$.pipe = TGL_PIPE_LONG;
|
||||
}
|
||||
| REG_DIST_ALL
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.regdist = $1;
|
||||
$$.pipe = TGL_PIPE_ALL;
|
||||
}
|
||||
| SBID_ALLOC
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.sbid = $1;
|
||||
$$.mode = TGL_SBID_SET;
|
||||
}
|
||||
| SBID_WAIT_SRC
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.sbid = $1;
|
||||
$$.mode = TGL_SBID_SRC;
|
||||
}
|
||||
| SBID_WAIT_DST
|
||||
{
|
||||
memset(&$$, 0, sizeof($$));
|
||||
$$.sbid = $1;
|
||||
$$.mode = TGL_SBID_DST;
|
||||
}
|
||||
|
||||
instoption:
|
||||
ALIGN1 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN1;}
|
||||
| ALIGN16 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN16; }
|
||||
| ACCWREN { $$.type = INSTOPTION_FLAG; $$.uint_value = ACCWREN; }
|
||||
| SECHALF { $$.type = INSTOPTION_FLAG; $$.uint_value = SECHALF; }
|
||||
| COMPR { $$.type = INSTOPTION_FLAG; $$.uint_value = COMPR; }
|
||||
| COMPR4 { $$.type = INSTOPTION_FLAG; $$.uint_value = COMPR4; }
|
||||
| BREAKPOINT { $$.type = INSTOPTION_FLAG; $$.uint_value = BREAKPOINT; }
|
||||
| NODDCLR { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCLR; }
|
||||
| NODDCHK { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCHK; }
|
||||
| MASK_DISABLE { $$.type = INSTOPTION_FLAG; $$.uint_value = MASK_DISABLE; }
|
||||
| EOT { $$.type = INSTOPTION_FLAG; $$.uint_value = EOT; }
|
||||
| SWITCH { $$.type = INSTOPTION_FLAG; $$.uint_value = SWITCH; }
|
||||
| ATOMIC { $$.type = INSTOPTION_FLAG; $$.uint_value = ATOMIC; }
|
||||
| CMPTCTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = CMPTCTRL; }
|
||||
| WECTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = WECTRL; }
|
||||
| QTR_2Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2Q; }
|
||||
| QTR_3Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_3Q; }
|
||||
| QTR_4Q { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_4Q; }
|
||||
| QTR_2H { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2H; }
|
||||
| QTR_2N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_2N; }
|
||||
| QTR_3N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_3N; }
|
||||
| QTR_4N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_4N; }
|
||||
| QTR_5N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_5N; }
|
||||
| QTR_6N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_6N; }
|
||||
| QTR_7N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_7N; }
|
||||
| QTR_8N { $$.type = INSTOPTION_FLAG; $$.uint_value = QTR_8N; }
|
||||
| depinfo { $$.type = INSTOPTION_DEP_INFO; $$.depinfo_value = $1; }
|
||||
ALIGN1 { $$.uint_value = ALIGN1;}
|
||||
| ALIGN16 { $$.uint_value = ALIGN16; }
|
||||
| ACCWREN { $$.uint_value = ACCWREN; }
|
||||
| SECHALF { $$.uint_value = SECHALF; }
|
||||
| COMPR { $$.uint_value = COMPR; }
|
||||
| COMPR4 { $$.uint_value = COMPR4; }
|
||||
| BREAKPOINT { $$.uint_value = BREAKPOINT; }
|
||||
| NODDCLR { $$.uint_value = NODDCLR; }
|
||||
| NODDCHK { $$.uint_value = NODDCHK; }
|
||||
| MASK_DISABLE { $$.uint_value = MASK_DISABLE; }
|
||||
| EOT { $$.uint_value = EOT; }
|
||||
| SWITCH { $$.uint_value = SWITCH; }
|
||||
| ATOMIC { $$.uint_value = ATOMIC; }
|
||||
| CMPTCTRL { $$.uint_value = CMPTCTRL; }
|
||||
| WECTRL { $$.uint_value = WECTRL; }
|
||||
| QTR_2Q { $$.uint_value = QTR_2Q; }
|
||||
| QTR_3Q { $$.uint_value = QTR_3Q; }
|
||||
| QTR_4Q { $$.uint_value = QTR_4Q; }
|
||||
| QTR_2H { $$.uint_value = QTR_2H; }
|
||||
| QTR_2N { $$.uint_value = QTR_2N; }
|
||||
| QTR_3N { $$.uint_value = QTR_3N; }
|
||||
| QTR_4N { $$.uint_value = QTR_4N; }
|
||||
| QTR_5N { $$.uint_value = QTR_5N; }
|
||||
| QTR_6N { $$.uint_value = QTR_6N; }
|
||||
| QTR_7N { $$.uint_value = QTR_7N; }
|
||||
| QTR_8N { $$.uint_value = QTR_8N; }
|
||||
;
|
||||
|
||||
%%
|
||||
|
|
|
|||
|
|
@ -427,8 +427,6 @@ public:
|
|||
bool last_rt:1;
|
||||
bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
|
||||
bool keep_payload_trailing_zeros;
|
||||
|
||||
tgl_swsb sched; /**< Scheduling info. */
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -295,7 +295,6 @@ namespace {
|
|||
const struct intel_device_info *devinfo = info.devinfo;
|
||||
|
||||
switch (info.op) {
|
||||
case ELK_OPCODE_SYNC:
|
||||
case ELK_OPCODE_SEL:
|
||||
case ELK_OPCODE_NOT:
|
||||
case ELK_OPCODE_AND:
|
||||
|
|
@ -1285,38 +1284,6 @@ namespace {
|
|||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_FLAG0 + i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the dependency ID corresponding to the SBID read completion
|
||||
* condition of a Gfx12+ SWSB.
|
||||
*/
|
||||
enum intel_eu_dependency_id
|
||||
tgl_swsb_rd_dependency_id(tgl_swsb swsb)
|
||||
{
|
||||
if (swsb.mode) {
|
||||
assert(swsb.sbid <
|
||||
EU_NUM_DEPENDENCY_IDS - EU_DEPENDENCY_ID_SBID_RD0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_RD0 + swsb.sbid);
|
||||
} else {
|
||||
return EU_NUM_DEPENDENCY_IDS;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the dependency ID corresponding to the SBID write completion
|
||||
* condition of a Gfx12+ SWSB.
|
||||
*/
|
||||
enum intel_eu_dependency_id
|
||||
tgl_swsb_wr_dependency_id(tgl_swsb swsb)
|
||||
{
|
||||
if (swsb.mode) {
|
||||
assert(swsb.sbid <
|
||||
EU_DEPENDENCY_ID_SBID_RD0 - EU_DEPENDENCY_ID_SBID_WR0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_WR0 + swsb.sbid);
|
||||
} else {
|
||||
return EU_NUM_DEPENDENCY_IDS;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the implicit accumulator register accessed by channel \p i of the
|
||||
* instruction.
|
||||
|
|
@ -1398,12 +1365,6 @@ namespace {
|
|||
}
|
||||
}
|
||||
|
||||
/* Stall on any SBID dependencies. */
|
||||
if (inst->sched.mode & (TGL_SBID_SET | TGL_SBID_DST))
|
||||
stall_on_dependency(st, tgl_swsb_wr_dependency_id(inst->sched));
|
||||
else if (inst->sched.mode & TGL_SBID_SRC)
|
||||
stall_on_dependency(st, tgl_swsb_rd_dependency_id(inst->sched));
|
||||
|
||||
/* Execute the instruction. */
|
||||
execute_instruction(st, perf);
|
||||
|
||||
|
|
@ -1446,12 +1407,6 @@ namespace {
|
|||
mark_write_dependency(st, perf, flag_dependency_id(i));
|
||||
}
|
||||
}
|
||||
|
||||
/* Mark any SBID dependencies. */
|
||||
if (inst->sched.mode & TGL_SBID_SET) {
|
||||
mark_read_dependency(st, perf, tgl_swsb_rd_dependency_id(inst->sched));
|
||||
mark_write_dependency(st, perf, tgl_swsb_wr_dependency_id(inst->sched));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -127,7 +127,6 @@ subb { yylval.integer = ELK_OPCODE_SUBB; return SUBB; }
|
|||
wait { yylval.integer = ELK_OPCODE_WAIT; return WAIT; }
|
||||
while { yylval.integer = ELK_OPCODE_WHILE; return WHILE; }
|
||||
xor { yylval.integer = ELK_OPCODE_XOR; return XOR; }
|
||||
sync { yylval.integer = ELK_OPCODE_SYNC; return SYNC; }
|
||||
|
||||
/* extended math functions */
|
||||
cos { yylval.integer = ELK_MATH_FUNCTION_COS; return COS; }
|
||||
|
|
@ -156,13 +155,6 @@ sin { yylval.integer = ELK_MATH_FUNCTION_SIN; return SIN; }
|
|||
sqrt { yylval.integer = ELK_MATH_FUNCTION_SQRT; return SQRT; }
|
||||
sincos { yylval.integer = ELK_MATH_FUNCTION_SINCOS; return SINCOS; }
|
||||
|
||||
/* sync instruction */
|
||||
allrd { yylval.integer = TGL_SYNC_ALLRD; return ALLRD; }
|
||||
allwr { yylval.integer = TGL_SYNC_ALLWR; return ALLWR; }
|
||||
fence { yylval.integer = TGL_SYNC_FENCE; return FENCE; }
|
||||
bar { yylval.integer = TGL_SYNC_BAR; return BAR; }
|
||||
host { yylval.integer = TGL_SYNC_HOST; return HOST; }
|
||||
|
||||
/* shared functions for send instruction */
|
||||
sampler { return SAMPLER; }
|
||||
dp_sampler { return DP_SAMPLER; }
|
||||
|
|
@ -419,17 +411,6 @@ sr[0-9]+ { yylval.integer = atoi(yytext + 2); return STATEREG; }
|
|||
return JUMP_LABEL;
|
||||
}
|
||||
|
||||
/* SWSB */
|
||||
"@"[1-7] { yylval.integer = atoi(yytext + 1); return REG_DIST_CURRENT; }
|
||||
"F@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_FLOAT; }
|
||||
"I@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_INT; }
|
||||
"L@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_LONG; }
|
||||
"A@"[1-7] { yylval.integer = atoi(yytext + 2); return REG_DIST_ALL; }
|
||||
|
||||
"$"[0-9]* { yylval.integer = atoi(yytext + 1); return SBID_ALLOC; }
|
||||
"$"[0-9]*".src" { yylval.integer = atoi(yytext + 1); return SBID_WAIT_SRC; }
|
||||
"$"[0-9]*".dst" { yylval.integer = atoi(yytext + 1); return SBID_WAIT_DST; }
|
||||
|
||||
\n { yycolumn = 1; }
|
||||
|
||||
. {
|
||||
|
|
|
|||
|
|
@ -1061,7 +1061,6 @@ elk_backend_instruction::has_side_effects() const
|
|||
case ELK_SHADER_OPCODE_SEND:
|
||||
return send_has_side_effects;
|
||||
|
||||
case ELK_OPCODE_SYNC:
|
||||
case ELK_VEC4_OPCODE_UNTYPED_ATOMIC:
|
||||
case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||
case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue