mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
brw: Add brw_send_inst
Move all the SEND specific fields from brw_inst into brw_send_inst. This new instruction kind will contain all variants of SENDs plus the virtual opcodes that were already relying on those SEND fields. Use the `as_send()` helper to go from a brw_inst into the brw_send_inst when applicable. Some of the code was changed to use the brw_send_inst type directly. Until other kinds are added, all the instructions are allocated the same amount of space as brw_send_inst. This ensures that all brw_transform_inst() calls are still valid. This will change after a few patches so that BASE instructions can use less memory. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
parent
b27f6621ae
commit
0fcce2722f
23 changed files with 645 additions and 494 deletions
|
|
@ -137,8 +137,12 @@ namespace {
|
||||||
td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)),
|
td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)),
|
||||||
tx(get_exec_type(inst)), sx(0), ss(0),
|
tx(get_exec_type(inst)), sx(0), ss(0),
|
||||||
sc(has_bank_conflict(isa, inst) ? sd : 0),
|
sc(has_bank_conflict(isa, inst) ? sd : 0),
|
||||||
desc(inst->desc), sfid(inst->sfid)
|
desc(0), sfid(0)
|
||||||
{
|
{
|
||||||
|
const brw_send_inst *send = inst->as_send();
|
||||||
|
if (send) {
|
||||||
|
desc = send->desc;
|
||||||
|
sfid = send->sfid;
|
||||||
/* We typically want the maximum source size, except for split send
|
/* We typically want the maximum source size, except for split send
|
||||||
* messages which require the total size.
|
* messages which require the total size.
|
||||||
*/
|
*/
|
||||||
|
|
@ -146,12 +150,13 @@ namespace {
|
||||||
ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) +
|
ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) +
|
||||||
DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE);
|
DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE);
|
||||||
} else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) {
|
} else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) {
|
||||||
ss = inst->mlen;
|
ss = send->mlen;
|
||||||
/* If haven't lowered yet, count the sources. */
|
/* If haven't lowered yet, count the sources. */
|
||||||
if (!ss) {
|
if (!ss) {
|
||||||
for (int i = 3; i < inst->sources; i++)
|
for (int i = 3; i < inst->sources; i++)
|
||||||
ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE);
|
ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
for (unsigned i = 0; i < inst->sources; i++)
|
for (unsigned i = 0; i < inst->sources; i++)
|
||||||
ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE));
|
ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE));
|
||||||
|
|
|
||||||
|
|
@ -632,10 +632,10 @@ public:
|
||||||
#undef _ALU1
|
#undef _ALU1
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
brw_inst *
|
brw_send_inst *
|
||||||
SEND() const
|
SEND() const
|
||||||
{
|
{
|
||||||
return emit(SHADER_OPCODE_SEND, SEND_NUM_SRCS);
|
return emit(SHADER_OPCODE_SEND, SEND_NUM_SRCS)->as_send();
|
||||||
}
|
}
|
||||||
|
|
||||||
brw_inst *
|
brw_inst *
|
||||||
|
|
|
||||||
|
|
@ -620,7 +620,7 @@ static void
|
||||||
brw_emit_repclear_shader(brw_shader &s)
|
brw_emit_repclear_shader(brw_shader &s)
|
||||||
{
|
{
|
||||||
brw_wm_prog_key *key = (brw_wm_prog_key*) s.key;
|
brw_wm_prog_key *key = (brw_wm_prog_key*) s.key;
|
||||||
brw_inst *write = NULL;
|
brw_send_inst *write = NULL;
|
||||||
|
|
||||||
assert(s.devinfo->ver < 20);
|
assert(s.devinfo->ver < 20);
|
||||||
assert(s.uniforms == 0);
|
assert(s.uniforms == 0);
|
||||||
|
|
@ -666,7 +666,7 @@ brw_emit_repclear_shader(brw_shader &s)
|
||||||
write->src[SEND_SRC_PAYLOAD1] = i == 0 ? color_output : header;
|
write->src[SEND_SRC_PAYLOAD1] = i == 0 ? color_output : header;
|
||||||
write->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
write->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||||
write->check_tdr = true;
|
write->check_tdr = true;
|
||||||
write->send_has_side_effects = true;
|
write->has_side_effects = true;
|
||||||
|
|
||||||
/* We can use a headerless message for the first render target */
|
/* We can use a headerless message for the first render target */
|
||||||
write->header_size = i == 0 ? 0 : 2;
|
write->header_size = i == 0 ? 0 : 2;
|
||||||
|
|
|
||||||
|
|
@ -289,9 +289,9 @@ brw_emit_urb_fence(brw_shader &s)
|
||||||
{
|
{
|
||||||
const brw_builder bld1 = brw_builder(&s).uniform();
|
const brw_builder bld1 = brw_builder(&s).uniform();
|
||||||
brw_reg dst = bld1.vgrf(BRW_TYPE_UD);
|
brw_reg dst = bld1.vgrf(BRW_TYPE_UD);
|
||||||
brw_inst *fence = bld1.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
|
brw_send_inst *fence = bld1.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
|
||||||
brw_vec8_grf(0, 0),
|
brw_vec8_grf(0, 0),
|
||||||
brw_imm_ud(true));
|
brw_imm_ud(true))->as_send();
|
||||||
fence->size_written = REG_SIZE * reg_unit(s.devinfo);
|
fence->size_written = REG_SIZE * reg_unit(s.devinfo);
|
||||||
fence->sfid = BRW_SFID_URB;
|
fence->sfid = BRW_SFID_URB;
|
||||||
/* The logical thing here would likely be a THREADGROUP fence but that's
|
/* The logical thing here would likely be a THREADGROUP fence but that's
|
||||||
|
|
|
||||||
|
|
@ -4935,7 +4935,7 @@ emit_rt_lsc_fence(const brw_builder &bld,
|
||||||
const brw_builder ubld = bld.exec_all().group(8, 0);
|
const brw_builder ubld = bld.exec_all().group(8, 0);
|
||||||
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
brw_inst *send = ubld.SEND();
|
brw_send_inst *send = ubld.SEND();
|
||||||
send->dst = tmp;
|
send->dst = tmp;
|
||||||
|
|
||||||
send->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
send->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||||
|
|
@ -4949,7 +4949,7 @@ emit_rt_lsc_fence(const brw_builder &bld,
|
||||||
send->ex_mlen = 0;
|
send->ex_mlen = 0;
|
||||||
/* Temp write for scheduling */
|
/* Temp write for scheduling */
|
||||||
send->size_written = REG_SIZE * reg_unit(devinfo);
|
send->size_written = REG_SIZE * reg_unit(devinfo);
|
||||||
send->send_has_side_effects = true;
|
send->has_side_effects = true;
|
||||||
|
|
||||||
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), tmp);
|
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), tmp);
|
||||||
}
|
}
|
||||||
|
|
@ -5164,8 +5164,8 @@ emit_fence(const brw_builder &bld, enum opcode opcode,
|
||||||
opcode == SHADER_OPCODE_MEMORY_FENCE);
|
opcode == SHADER_OPCODE_MEMORY_FENCE);
|
||||||
|
|
||||||
brw_reg dst = commit_enable ? bld.vgrf(BRW_TYPE_UD) : bld.null_reg_ud();
|
brw_reg dst = commit_enable ? bld.vgrf(BRW_TYPE_UD) : bld.null_reg_ud();
|
||||||
brw_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0),
|
brw_send_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0),
|
||||||
brw_imm_ud(commit_enable));
|
brw_imm_ud(commit_enable))->as_send();
|
||||||
fence->sfid = sfid;
|
fence->sfid = sfid;
|
||||||
fence->desc = desc;
|
fence->desc = desc;
|
||||||
fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0;
|
fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0;
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,7 @@ brw_generator::patch_halt_jumps()
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_generator::generate_send(brw_inst *inst,
|
brw_generator::generate_send(brw_send_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg desc,
|
struct brw_reg desc,
|
||||||
struct brw_reg ex_desc,
|
struct brw_reg ex_desc,
|
||||||
|
|
@ -181,7 +181,7 @@ brw_generator::generate_send(brw_inst *inst,
|
||||||
* descriptor is written indirectly (it already contains a SS/BSS
|
* descriptor is written indirectly (it already contains a SS/BSS
|
||||||
* surface handle)
|
* surface handle)
|
||||||
*/
|
*/
|
||||||
assert(!inst->send_ex_desc_imm);
|
assert(!inst->ex_desc_imm);
|
||||||
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, inst->eot, gather);
|
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, inst->eot, gather);
|
||||||
if (inst->check_tdr)
|
if (inst->check_tdr)
|
||||||
brw_eu_inst_set_opcode(p->isa, brw_last_inst, BRW_OPCODE_SENDC);
|
brw_eu_inst_set_opcode(p->isa, brw_last_inst, BRW_OPCODE_SENDC);
|
||||||
|
|
@ -191,8 +191,8 @@ brw_generator::generate_send(brw_inst *inst,
|
||||||
*/
|
*/
|
||||||
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
||||||
desc, ex_desc,
|
desc, ex_desc,
|
||||||
inst->send_ex_desc_imm ? inst->offset : 0,
|
inst->ex_desc_imm ? inst->offset : 0,
|
||||||
inst->ex_mlen, inst->send_ex_bso,
|
inst->ex_mlen, inst->ex_bso,
|
||||||
inst->eot, gather);
|
inst->eot, gather);
|
||||||
if (inst->check_tdr)
|
if (inst->check_tdr)
|
||||||
brw_eu_inst_set_opcode(p->isa, brw_last_inst,
|
brw_eu_inst_set_opcode(p->isa, brw_last_inst,
|
||||||
|
|
@ -886,7 +886,8 @@ brw_generator::generate_code(const brw_shader &s,
|
||||||
|
|
||||||
assert(inst->force_writemask_all || inst->exec_size >= 4);
|
assert(inst->force_writemask_all || inst->exec_size >= 4);
|
||||||
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
|
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
|
||||||
assert(inst->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo));
|
if (const brw_send_inst *send = inst->as_send())
|
||||||
|
assert(send->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo));
|
||||||
|
|
||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
case BRW_OPCODE_NOP:
|
case BRW_OPCODE_NOP:
|
||||||
|
|
@ -1094,7 +1095,6 @@ brw_generator::generate_code(const brw_shader &s,
|
||||||
case SHADER_OPCODE_SIN:
|
case SHADER_OPCODE_SIN:
|
||||||
case SHADER_OPCODE_COS:
|
case SHADER_OPCODE_COS:
|
||||||
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
||||||
assert(inst->mlen == 0);
|
|
||||||
gfx6_math(p, dst, brw_math_function(inst->opcode),
|
gfx6_math(p, dst, brw_math_function(inst->opcode),
|
||||||
src[0], retype(brw_null_reg(), src[0].type));
|
src[0], retype(brw_null_reg(), src[0].type));
|
||||||
break;
|
break;
|
||||||
|
|
@ -1103,7 +1103,6 @@ brw_generator::generate_code(const brw_shader &s,
|
||||||
case SHADER_OPCODE_POW:
|
case SHADER_OPCODE_POW:
|
||||||
assert(devinfo->verx10 < 125);
|
assert(devinfo->verx10 < 125);
|
||||||
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
||||||
assert(inst->mlen == 0);
|
|
||||||
assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8);
|
assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8);
|
||||||
gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
@ -1144,13 +1143,13 @@ brw_generator::generate_code(const brw_shader &s,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
generate_send(inst, dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC],
|
generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC],
|
||||||
src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]);
|
src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]);
|
||||||
send_count++;
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_SEND_GATHER:
|
case SHADER_OPCODE_SEND_GATHER:
|
||||||
generate_send(inst, dst,
|
generate_send(inst->as_send(), dst,
|
||||||
src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC],
|
src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC],
|
||||||
src[SEND_GATHER_SRC_SCALAR], brw_null_reg());
|
src[SEND_GATHER_SRC_SCALAR], brw_null_reg());
|
||||||
send_count++;
|
send_count++;
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ public:
|
||||||
const unsigned *get_assembly();
|
const unsigned *get_assembly();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void generate_send(brw_inst *inst,
|
void generate_send(brw_send_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg desc,
|
struct brw_reg desc,
|
||||||
struct brw_reg ex_desc,
|
struct brw_reg ex_desc,
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,11 @@
|
||||||
static inline unsigned
|
static inline unsigned
|
||||||
brw_inst_kind_size(brw_inst_kind kind)
|
brw_inst_kind_size(brw_inst_kind kind)
|
||||||
{
|
{
|
||||||
return sizeof(brw_inst);
|
/* TODO: Temporarily here to ensure all instructions can be converted to
|
||||||
|
* SEND. Once all new kinds are added, change so that BASE allocate only
|
||||||
|
* sizeof(brw_inst).
|
||||||
|
*/
|
||||||
|
return sizeof(brw_send_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
static brw_inst *
|
static brw_inst *
|
||||||
|
|
@ -110,8 +114,6 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
|
||||||
const brw_inst_kind kind = inst->kind;
|
const brw_inst_kind kind = inst->kind;
|
||||||
const brw_inst_kind new_kind = brw_inst_kind_for_opcode(new_opcode);
|
const brw_inst_kind new_kind = brw_inst_kind_for_opcode(new_opcode);
|
||||||
|
|
||||||
assert(new_kind == BRW_KIND_BASE);
|
|
||||||
|
|
||||||
const unsigned inst_size = brw_inst_kind_size(kind);
|
const unsigned inst_size = brw_inst_kind_size(kind);
|
||||||
const unsigned new_inst_size = brw_inst_kind_size(new_kind);
|
const unsigned new_inst_size = brw_inst_kind_size(new_kind);
|
||||||
assert(new_inst_size <= inst_size);
|
assert(new_inst_size <= inst_size);
|
||||||
|
|
@ -127,6 +129,9 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
|
||||||
inst->src = new_src;
|
inst->src = new_src;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new_kind != kind)
|
||||||
|
memset(((char *)inst) + sizeof(brw_inst), 0, new_inst_size - sizeof(brw_inst));
|
||||||
|
|
||||||
inst->sources = new_num_sources;
|
inst->sources = new_num_sources;
|
||||||
inst->opcode = new_opcode;
|
inst->opcode = new_opcode;
|
||||||
inst->kind = new_kind;
|
inst->kind = new_kind;
|
||||||
|
|
@ -137,8 +142,22 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
|
||||||
brw_inst_kind
|
brw_inst_kind
|
||||||
brw_inst_kind_for_opcode(enum opcode opcode)
|
brw_inst_kind_for_opcode(enum opcode opcode)
|
||||||
{
|
{
|
||||||
|
switch (opcode) {
|
||||||
|
case BRW_OPCODE_SEND:
|
||||||
|
case BRW_OPCODE_SENDS:
|
||||||
|
case BRW_OPCODE_SENDC:
|
||||||
|
case BRW_OPCODE_SENDSC:
|
||||||
|
case SHADER_OPCODE_SEND:
|
||||||
|
case SHADER_OPCODE_SEND_GATHER:
|
||||||
|
case SHADER_OPCODE_BARRIER:
|
||||||
|
case SHADER_OPCODE_MEMORY_FENCE:
|
||||||
|
case SHADER_OPCODE_INTERLOCK:
|
||||||
|
return BRW_KIND_SEND;
|
||||||
|
|
||||||
|
default:
|
||||||
return BRW_KIND_BASE;
|
return BRW_KIND_BASE;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
brw_inst::is_send() const
|
brw_inst::is_send() const
|
||||||
|
|
@ -483,9 +502,9 @@ brw_inst::size_read(const struct intel_device_info *devinfo, int arg) const
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
if (arg == SEND_SRC_PAYLOAD1) {
|
if (arg == SEND_SRC_PAYLOAD1) {
|
||||||
return mlen * REG_SIZE;
|
return as_send()->mlen * REG_SIZE;
|
||||||
} else if (arg == SEND_SRC_PAYLOAD2) {
|
} else if (arg == SEND_SRC_PAYLOAD2) {
|
||||||
return ex_mlen * REG_SIZE;
|
return as_send()->ex_mlen * REG_SIZE;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -893,7 +912,7 @@ brw_inst::has_side_effects() const
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
case SHADER_OPCODE_SEND_GATHER:
|
case SHADER_OPCODE_SEND_GATHER:
|
||||||
return send_has_side_effects;
|
return as_send()->has_side_effects;
|
||||||
|
|
||||||
case BRW_OPCODE_SYNC:
|
case BRW_OPCODE_SYNC:
|
||||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||||
|
|
@ -927,7 +946,7 @@ brw_inst::is_volatile() const
|
||||||
return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS;
|
return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
case SHADER_OPCODE_SEND_GATHER:
|
case SHADER_OPCODE_SEND_GATHER:
|
||||||
return send_is_volatile;
|
return as_send()->is_volatile;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ struct brw_shader;
|
||||||
|
|
||||||
enum ENUM_PACKED brw_inst_kind {
|
enum ENUM_PACKED brw_inst_kind {
|
||||||
BRW_KIND_BASE,
|
BRW_KIND_BASE,
|
||||||
|
BRW_KIND_SEND,
|
||||||
};
|
};
|
||||||
|
|
||||||
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
|
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
|
||||||
|
|
@ -53,6 +54,23 @@ struct brw_inst : brw_exec_node {
|
||||||
static void* operator new(size_t size, void *ptr) { return ptr; }
|
static void* operator new(size_t size, void *ptr) { return ptr; }
|
||||||
static void operator delete(void *p) {}
|
static void operator delete(void *p) {}
|
||||||
|
|
||||||
|
/* Prefer macro here instead of templates to get nicer
|
||||||
|
* helper names.
|
||||||
|
*/
|
||||||
|
#define KIND_HELPERS(HELPER_NAME, TYPE_NAME, ENUM_NAME) \
|
||||||
|
struct TYPE_NAME *HELPER_NAME() { \
|
||||||
|
return kind == ENUM_NAME ? (struct TYPE_NAME *)this \
|
||||||
|
: nullptr; \
|
||||||
|
} \
|
||||||
|
const struct TYPE_NAME *HELPER_NAME() const { \
|
||||||
|
return kind == ENUM_NAME ? (const struct TYPE_NAME *)this \
|
||||||
|
: nullptr; \
|
||||||
|
}
|
||||||
|
|
||||||
|
KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
|
||||||
|
|
||||||
|
#undef KIND_HELPERS
|
||||||
|
|
||||||
bool is_send() const;
|
bool is_send() const;
|
||||||
bool is_payload(unsigned arg) const;
|
bool is_payload(unsigned arg) const;
|
||||||
bool is_partial_write(unsigned grf_size = REG_SIZE) const;
|
bool is_partial_write(unsigned grf_size = REG_SIZE) const;
|
||||||
|
|
@ -144,13 +162,8 @@ struct brw_inst : brw_exec_node {
|
||||||
*/
|
*/
|
||||||
uint8_t group;
|
uint8_t group;
|
||||||
|
|
||||||
uint8_t mlen; /**< SEND message length */
|
|
||||||
uint8_t ex_mlen; /**< SENDS extended message length */
|
|
||||||
uint8_t sfid; /**< SFID for SEND instructions */
|
|
||||||
/** The number of hardware registers used for a message header. */
|
/** The number of hardware registers used for a message header. */
|
||||||
uint8_t header_size;
|
uint8_t header_size;
|
||||||
uint32_t desc; /**< SEND[S] message descriptor immediate */
|
|
||||||
uint32_t ex_desc; /**< SEND[S] extended message descriptor immediate */
|
|
||||||
|
|
||||||
uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
|
uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
|
||||||
uint16_t size_written; /**< Data written to the destination register in bytes. */
|
uint16_t size_written; /**< Data written to the destination register in bytes. */
|
||||||
|
|
@ -179,25 +192,11 @@ struct brw_inst : brw_exec_node {
|
||||||
*/
|
*/
|
||||||
unsigned rcount:4;
|
unsigned rcount:4;
|
||||||
|
|
||||||
unsigned pad:4;
|
|
||||||
|
|
||||||
bool predicate_inverse:1;
|
bool predicate_inverse:1;
|
||||||
bool writes_accumulator:1; /**< instruction implicitly writes accumulator */
|
bool writes_accumulator:1; /**< instruction implicitly writes accumulator */
|
||||||
bool force_writemask_all:1;
|
bool force_writemask_all:1;
|
||||||
bool saturate:1;
|
bool saturate:1;
|
||||||
bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
|
|
||||||
bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
|
|
||||||
bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */
|
|
||||||
bool send_ex_bso:1; /**< Only for SHADER_OPCODE_SEND, use extended
|
|
||||||
* bindless surface offset (26bits instead of
|
|
||||||
* 20bits)
|
|
||||||
*/
|
|
||||||
/**
|
|
||||||
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
|
|
||||||
* part of the extended descriptor that must be encoded in the
|
|
||||||
* instruction.
|
|
||||||
*/
|
|
||||||
bool send_ex_desc_imm:1;
|
|
||||||
/**
|
/**
|
||||||
* The predication mask applied to this instruction is guaranteed to
|
* The predication mask applied to this instruction is guaranteed to
|
||||||
* be uniform and a superset of the execution mask of the present block.
|
* be uniform and a superset of the execution mask of the present block.
|
||||||
|
|
@ -215,6 +214,8 @@ struct brw_inst : brw_exec_node {
|
||||||
* never executed.
|
* never executed.
|
||||||
*/
|
*/
|
||||||
bool has_no_mask_send_params:1;
|
bool has_no_mask_send_params:1;
|
||||||
|
|
||||||
|
unsigned pad:13;
|
||||||
};
|
};
|
||||||
uint32_t bits;
|
uint32_t bits;
|
||||||
};
|
};
|
||||||
|
|
@ -233,6 +234,42 @@ struct brw_inst : brw_exec_node {
|
||||||
bblock_t *block;
|
bblock_t *block;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct brw_send_inst : brw_inst {
|
||||||
|
uint32_t desc;
|
||||||
|
uint32_t ex_desc;
|
||||||
|
|
||||||
|
uint8_t mlen;
|
||||||
|
uint8_t ex_mlen;
|
||||||
|
uint8_t sfid;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
/**
|
||||||
|
* Turns it into a SENDC.
|
||||||
|
*/
|
||||||
|
bool check_tdr:1;
|
||||||
|
|
||||||
|
bool has_side_effects:1;
|
||||||
|
bool is_volatile:1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use extended bindless surface offset (26bits instead of 20bits)
|
||||||
|
*/
|
||||||
|
bool ex_bso:1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
|
||||||
|
* part of the extended descriptor that must be encoded in the
|
||||||
|
* instruction.
|
||||||
|
*/
|
||||||
|
bool ex_desc_imm:1;
|
||||||
|
|
||||||
|
uint8_t pad:3;
|
||||||
|
};
|
||||||
|
uint8_t send_bits;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Make the execution of \p inst dependent on the evaluation of a possibly
|
* Make the execution of \p inst dependent on the evaluation of a possibly
|
||||||
* inverted predicate.
|
* inverted predicate.
|
||||||
|
|
|
||||||
|
|
@ -519,22 +519,27 @@ brw_lower_sends_overlapping_payload(brw_shader &s)
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
|
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
|
||||||
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
|
if (inst->opcode != SHADER_OPCODE_SEND)
|
||||||
regions_overlap(inst->src[SEND_SRC_PAYLOAD1],
|
continue;
|
||||||
inst->mlen * REG_SIZE,
|
|
||||||
inst->src[SEND_SRC_PAYLOAD2],
|
brw_send_inst *send = inst->as_send();
|
||||||
inst->ex_mlen * REG_SIZE)) {
|
|
||||||
const unsigned arg = inst->mlen < inst->ex_mlen ?
|
if (send->ex_mlen > 0 &&
|
||||||
|
regions_overlap(send->src[SEND_SRC_PAYLOAD1],
|
||||||
|
send->mlen * REG_SIZE,
|
||||||
|
send->src[SEND_SRC_PAYLOAD2],
|
||||||
|
send->ex_mlen * REG_SIZE)) {
|
||||||
|
const unsigned arg = send->mlen < send->ex_mlen ?
|
||||||
SEND_SRC_PAYLOAD1 : SEND_SRC_PAYLOAD2;
|
SEND_SRC_PAYLOAD1 : SEND_SRC_PAYLOAD2;
|
||||||
const unsigned len = MIN2(inst->mlen, inst->ex_mlen);
|
const unsigned len = MIN2(send->mlen, send->ex_mlen);
|
||||||
|
|
||||||
brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD);
|
brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Sadly, we've lost all notion of channels and bit sizes at this
|
/* Sadly, we've lost all notion of channels and bit sizes at this
|
||||||
* point. Just WE_all it.
|
* point. Just WE_all it.
|
||||||
*/
|
*/
|
||||||
const brw_builder ibld = brw_builder(inst).exec_all().group(16, 0);
|
const brw_builder ibld = brw_builder(send).exec_all().group(16, 0);
|
||||||
brw_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD);
|
brw_reg copy_src = retype(send->src[arg], BRW_TYPE_UD);
|
||||||
brw_reg copy_dst = tmp;
|
brw_reg copy_dst = tmp;
|
||||||
for (unsigned i = 0; i < len; i += 2) {
|
for (unsigned i = 0; i < len; i += 2) {
|
||||||
if (len == i + 1) {
|
if (len == i + 1) {
|
||||||
|
|
@ -546,7 +551,7 @@ brw_lower_sends_overlapping_payload(brw_shader &s)
|
||||||
copy_src = offset(copy_src, ibld, 1);
|
copy_src = offset(copy_src, ibld, 1);
|
||||||
copy_dst = offset(copy_dst, ibld, 1);
|
copy_dst = offset(copy_dst, ibld, 1);
|
||||||
}
|
}
|
||||||
inst->src[arg] = tmp;
|
send->src[arg] = tmp;
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -867,7 +872,7 @@ brw_s0(enum brw_reg_type type, unsigned subnr)
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
brw_lower_send_gather_inst(brw_shader &s, brw_inst *inst)
|
brw_lower_send_gather_inst(brw_shader &s, brw_send_inst *inst)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = s.devinfo;
|
const intel_device_info *devinfo = s.devinfo;
|
||||||
assert(devinfo->ver >= 30);
|
assert(devinfo->ver >= 30);
|
||||||
|
|
@ -926,7 +931,7 @@ brw_lower_send_gather(brw_shader &s)
|
||||||
|
|
||||||
foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
|
foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
|
||||||
if (inst->opcode == SHADER_OPCODE_SEND_GATHER)
|
if (inst->opcode == SHADER_OPCODE_SEND_GATHER)
|
||||||
progress |= brw_lower_send_gather_inst(s, inst);
|
progress |= brw_lower_send_gather_inst(s, inst->as_send());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (progress)
|
if (progress)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -677,7 +677,10 @@ brw_lower_simd_width(brw_shader &s)
|
||||||
(inst->size_written - residency_size) /
|
(inst->size_written - residency_size) /
|
||||||
inst->dst.component_size(inst->exec_size);
|
inst->dst.component_size(inst->exec_size);
|
||||||
|
|
||||||
assert(!inst->writes_accumulator && !inst->mlen);
|
if (const brw_send_inst *send = inst->as_send())
|
||||||
|
assert(!send->mlen);
|
||||||
|
|
||||||
|
assert(!inst->writes_accumulator);
|
||||||
|
|
||||||
/* Inserting the zip, unzip, and duplicated instructions in all of
|
/* Inserting the zip, unzip, and duplicated instructions in all of
|
||||||
* the right spots is somewhat tricky. All of the unzip and any
|
* the right spots is somewhat tricky. All of the unzip and any
|
||||||
|
|
|
||||||
|
|
@ -263,9 +263,12 @@ brw_opt_zero_samples(brw_shader &s)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst(block, brw_inst, send, s.cfg) {
|
foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
|
||||||
if (send->opcode != SHADER_OPCODE_SEND ||
|
if (inst->opcode != SHADER_OPCODE_SEND)
|
||||||
send->sfid != BRW_SFID_SAMPLER)
|
continue;
|
||||||
|
|
||||||
|
brw_send_inst *send = inst->as_send();
|
||||||
|
if (send->sfid != BRW_SFID_SAMPLER)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Wa_14012688258:
|
/* Wa_14012688258:
|
||||||
|
|
@ -340,9 +343,12 @@ brw_opt_split_sends(brw_shader &s)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst(block, brw_inst, send, s.cfg) {
|
foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
|
||||||
if (send->opcode != SHADER_OPCODE_SEND ||
|
if (inst->opcode != SHADER_OPCODE_SEND)
|
||||||
send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 ||
|
continue;
|
||||||
|
|
||||||
|
brw_send_inst *send = inst->as_send();
|
||||||
|
if (send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 ||
|
||||||
send->src[SEND_SRC_PAYLOAD1].file != VGRF)
|
send->src[SEND_SRC_PAYLOAD1].file != VGRF)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -619,19 +625,21 @@ brw_opt_send_to_send_gather(brw_shader &s)
|
||||||
if (inst->opcode != SHADER_OPCODE_SEND)
|
if (inst->opcode != SHADER_OPCODE_SEND)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
brw_send_inst *send = inst->as_send();
|
||||||
|
|
||||||
/* For 1-2 registers, send-gather offers no benefits over split-send. */
|
/* For 1-2 registers, send-gather offers no benefits over split-send. */
|
||||||
if (inst->mlen + inst->ex_mlen <= 2 * unit)
|
if (send->mlen + send->ex_mlen <= 2 * unit)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
assert(inst->mlen % unit == 0);
|
assert(send->mlen % unit == 0);
|
||||||
assert(inst->ex_mlen % unit == 0);
|
assert(send->ex_mlen % unit == 0);
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
brw_reg src;
|
brw_reg src;
|
||||||
unsigned phys_len;
|
unsigned phys_len;
|
||||||
} payload[2] = {
|
} payload[2] = {
|
||||||
{ inst->src[SEND_SRC_PAYLOAD1], inst->mlen / unit },
|
{ send->src[SEND_SRC_PAYLOAD1], send->mlen / unit },
|
||||||
{ inst->src[SEND_SRC_PAYLOAD2], inst->ex_mlen / unit },
|
{ send->src[SEND_SRC_PAYLOAD2], send->ex_mlen / unit },
|
||||||
};
|
};
|
||||||
|
|
||||||
const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len;
|
const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len;
|
||||||
|
|
@ -645,25 +653,25 @@ brw_opt_send_to_send_gather(brw_shader &s)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
inst = brw_transform_inst(s, inst, SHADER_OPCODE_SEND_GATHER,
|
send = brw_transform_inst(s, send, SHADER_OPCODE_SEND_GATHER,
|
||||||
SEND_GATHER_SRC_PAYLOAD + num_payload_sources);
|
SEND_GATHER_SRC_PAYLOAD + num_payload_sources)->as_send();
|
||||||
|
|
||||||
/* Sources 0 and 1 remain the same. Source 2 will be filled
|
/* Sources 0 and 1 remain the same. Source 2 will be filled
|
||||||
* after register allocation.
|
* after register allocation.
|
||||||
*/
|
*/
|
||||||
inst->src[SEND_GATHER_SRC_SCALAR] = {};
|
send->src[SEND_GATHER_SRC_SCALAR] = {};
|
||||||
|
|
||||||
int idx = 3;
|
int idx = 3;
|
||||||
for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) {
|
for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) {
|
||||||
for (unsigned i = 0; i < payload[p].phys_len; i++) {
|
for (unsigned i = 0; i < payload[p].phys_len; i++) {
|
||||||
inst->src[idx++] = byte_offset(payload[p].src,
|
send->src[idx++] = byte_offset(payload[p].src,
|
||||||
i * reg_unit(devinfo) * REG_SIZE);
|
i * reg_unit(devinfo) * REG_SIZE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(idx == inst->sources);
|
assert(idx == send->sources);
|
||||||
|
|
||||||
inst->mlen = 0;
|
send->mlen = 0;
|
||||||
inst->ex_mlen = 0;
|
send->ex_mlen = 0;
|
||||||
|
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
|
|
@ -699,10 +707,12 @@ brw_opt_send_gather_to_send(brw_shader &s)
|
||||||
if (inst->opcode != SHADER_OPCODE_SEND_GATHER)
|
if (inst->opcode != SHADER_OPCODE_SEND_GATHER)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
assert(inst->sources > 2);
|
brw_send_inst *send = inst->as_send();
|
||||||
assert(inst->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE);
|
|
||||||
|
|
||||||
const int num_payload_sources = inst->sources - 3;
|
assert(send->sources > 2);
|
||||||
|
assert(send->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE);
|
||||||
|
|
||||||
|
const int num_payload_sources = send->sources - 3;
|
||||||
assert(num_payload_sources > 0);
|
assert(num_payload_sources > 0);
|
||||||
|
|
||||||
/* Limited by Src0.Length in the SEND instruction. */
|
/* Limited by Src0.Length in the SEND instruction. */
|
||||||
|
|
@ -713,7 +723,7 @@ brw_opt_send_gather_to_send(brw_shader &s)
|
||||||
* and there's no need to use SEND_GATHER (which would set ARF scalar register
|
* and there's no need to use SEND_GATHER (which would set ARF scalar register
|
||||||
* adding an extra instruction).
|
* adding an extra instruction).
|
||||||
*/
|
*/
|
||||||
const brw_reg *payload = &inst->src[SEND_GATHER_SRC_PAYLOAD];
|
const brw_reg *payload = &send->src[SEND_GATHER_SRC_PAYLOAD];
|
||||||
brw_reg payload1 = payload[0];
|
brw_reg payload1 = payload[0];
|
||||||
brw_reg payload2 = {};
|
brw_reg payload2 = {};
|
||||||
int payload1_len = 0;
|
int payload1_len = 0;
|
||||||
|
|
@ -755,21 +765,21 @@ brw_opt_send_gather_to_send(brw_shader &s)
|
||||||
*
|
*
|
||||||
* TODO: Pass LSC address length or infer it so valid splits can work.
|
* TODO: Pass LSC address length or infer it so valid splits can work.
|
||||||
*/
|
*/
|
||||||
if (payload2_len && (inst->sfid == BRW_SFID_UGM ||
|
if (payload2_len && (send->sfid == BRW_SFID_UGM ||
|
||||||
inst->sfid == BRW_SFID_TGM ||
|
send->sfid == BRW_SFID_TGM ||
|
||||||
inst->sfid == BRW_SFID_SLM ||
|
send->sfid == BRW_SFID_SLM ||
|
||||||
inst->sfid == BRW_SFID_URB)) {
|
send->sfid == BRW_SFID_URB)) {
|
||||||
enum lsc_opcode lsc_op = lsc_msg_desc_opcode(devinfo, inst->desc);
|
enum lsc_opcode lsc_op = lsc_msg_desc_opcode(devinfo, send->desc);
|
||||||
if (lsc_op_num_data_values(lsc_op) > 0)
|
if (lsc_op_num_data_values(lsc_op) > 0)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
inst = brw_transform_inst(s, inst, SHADER_OPCODE_SEND);
|
send = brw_transform_inst(s, send, SHADER_OPCODE_SEND)->as_send();
|
||||||
|
|
||||||
inst->src[SEND_SRC_PAYLOAD1] = payload1;
|
send->src[SEND_SRC_PAYLOAD1] = payload1;
|
||||||
inst->src[SEND_SRC_PAYLOAD2] = payload2;
|
send->src[SEND_SRC_PAYLOAD2] = payload2;
|
||||||
inst->mlen = payload1_len * unit;
|
send->mlen = payload1_len * unit;
|
||||||
inst->ex_mlen = payload2_len * unit;
|
send->ex_mlen = payload2_len * unit;
|
||||||
|
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -135,10 +135,12 @@ is_expression(const brw_shader *v, const brw_inst *const inst)
|
||||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||||
return !is_coalescing_payload(*v, inst);
|
return !is_coalescing_payload(*v, inst);
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
case SHADER_OPCODE_SEND_GATHER:
|
case SHADER_OPCODE_SEND_GATHER: {
|
||||||
return !inst->send_has_side_effects &&
|
const brw_send_inst *send = inst->as_send();
|
||||||
!inst->send_is_volatile &&
|
return !send->has_side_effects &&
|
||||||
!inst->eot;
|
!send->is_volatile &&
|
||||||
|
!send->eot;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -238,25 +240,31 @@ operands_match(const brw_inst *a, const brw_inst *b, bool *negate)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
send_inst_match(brw_send_inst *a, brw_send_inst *b)
|
||||||
|
{
|
||||||
|
return a->mlen == b->mlen &&
|
||||||
|
a->ex_mlen == b->ex_mlen &&
|
||||||
|
a->sfid == b->sfid &&
|
||||||
|
a->desc == b->desc &&
|
||||||
|
a->ex_desc == b->ex_desc &&
|
||||||
|
a->send_bits == b->send_bits;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
instructions_match(brw_inst *a, brw_inst *b, bool *negate)
|
instructions_match(brw_inst *a, brw_inst *b, bool *negate)
|
||||||
{
|
{
|
||||||
/* `Kind` is derived from opcode, so skipped. */
|
|
||||||
|
|
||||||
return a->opcode == b->opcode &&
|
return a->opcode == b->opcode &&
|
||||||
|
/* `kind` is derived from opcode, so skipped. */
|
||||||
|
(a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) &&
|
||||||
a->exec_size == b->exec_size &&
|
a->exec_size == b->exec_size &&
|
||||||
a->group == b->group &&
|
a->group == b->group &&
|
||||||
a->predicate == b->predicate &&
|
a->predicate == b->predicate &&
|
||||||
a->conditional_mod == b->conditional_mod &&
|
a->conditional_mod == b->conditional_mod &&
|
||||||
a->dst.type == b->dst.type &&
|
a->dst.type == b->dst.type &&
|
||||||
a->offset == b->offset &&
|
a->offset == b->offset &&
|
||||||
a->mlen == b->mlen &&
|
|
||||||
a->ex_mlen == b->ex_mlen &&
|
|
||||||
a->sfid == b->sfid &&
|
|
||||||
a->desc == b->desc &&
|
|
||||||
a->ex_desc == b->ex_desc &&
|
|
||||||
a->size_written == b->size_written &&
|
a->size_written == b->size_written &&
|
||||||
a->check_tdr == b->check_tdr &&
|
|
||||||
a->header_size == b->header_size &&
|
a->header_size == b->header_size &&
|
||||||
a->sources == b->sources &&
|
a->sources == b->sources &&
|
||||||
a->bits == b->bits &&
|
a->bits == b->bits &&
|
||||||
|
|
@ -299,17 +307,12 @@ hash_inst(const void *v)
|
||||||
inst->sources,
|
inst->sources,
|
||||||
inst->exec_size,
|
inst->exec_size,
|
||||||
inst->group,
|
inst->group,
|
||||||
inst->mlen,
|
|
||||||
inst->ex_mlen,
|
|
||||||
inst->sfid,
|
|
||||||
inst->header_size,
|
inst->header_size,
|
||||||
|
|
||||||
inst->conditional_mod,
|
inst->conditional_mod,
|
||||||
inst->predicate,
|
inst->predicate,
|
||||||
};
|
};
|
||||||
const uint32_t u32data[] = {
|
const uint32_t u32data[] = {
|
||||||
inst->desc,
|
|
||||||
inst->ex_desc,
|
|
||||||
inst->offset,
|
inst->offset,
|
||||||
inst->size_written,
|
inst->size_written,
|
||||||
inst->opcode,
|
inst->opcode,
|
||||||
|
|
@ -321,6 +324,29 @@ hash_inst(const void *v)
|
||||||
|
|
||||||
/* Skip hashing sched - we shouldn't be CSE'ing after that SWSB */
|
/* Skip hashing sched - we shouldn't be CSE'ing after that SWSB */
|
||||||
|
|
||||||
|
switch (inst->kind) {
|
||||||
|
case BRW_KIND_SEND: {
|
||||||
|
const brw_send_inst *send = inst->as_send();
|
||||||
|
const uint8_t send_u8data[] = {
|
||||||
|
send->mlen,
|
||||||
|
send->ex_mlen,
|
||||||
|
send->sfid,
|
||||||
|
send->send_bits,
|
||||||
|
};
|
||||||
|
const uint32_t send_u32data[] = {
|
||||||
|
send->desc,
|
||||||
|
send->ex_desc,
|
||||||
|
};
|
||||||
|
hash = HASH(hash, send_u8data);
|
||||||
|
hash = HASH(hash, send_u32data);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case BRW_KIND_BASE:
|
||||||
|
/* Nothing else to do. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (inst->opcode == BRW_OPCODE_MAD) {
|
if (inst->opcode == BRW_OPCODE_MAD) {
|
||||||
/* Commutatively combine the hashes for the multiplicands */
|
/* Commutatively combine the hashes for the multiplicands */
|
||||||
hash = hash_reg(hash, inst->src[0]);
|
hash = hash_reg(hash, inst->src[0]);
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,9 @@ can_omit_write(const brw_inst *inst)
|
||||||
/* We can eliminate the destination write for ordinary instructions,
|
/* We can eliminate the destination write for ordinary instructions,
|
||||||
* but not most SENDs.
|
* but not most SENDs.
|
||||||
*/
|
*/
|
||||||
if (inst->opcode < NUM_BRW_OPCODES && inst->mlen == 0)
|
const brw_send_inst *send = inst->as_send();
|
||||||
|
if (inst->opcode < NUM_BRW_OPCODES &&
|
||||||
|
(!send || send->mlen == 0))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* It might not be safe for other virtual opcodes. */
|
/* It might not be safe for other virtual opcodes. */
|
||||||
|
|
|
||||||
|
|
@ -415,12 +415,14 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
|
||||||
}
|
}
|
||||||
fprintf(file, "(%d) ", inst->exec_size);
|
fprintf(file, "(%d) ", inst->exec_size);
|
||||||
|
|
||||||
if (inst->mlen) {
|
const brw_send_inst *send = inst->as_send();
|
||||||
fprintf(file, "(mlen: %d) ", inst->mlen);
|
|
||||||
|
if (send && send->mlen) {
|
||||||
|
fprintf(file, "(mlen: %d) ", send->mlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->ex_mlen) {
|
if (send && send->ex_mlen) {
|
||||||
fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen);
|
fprintf(file, "(ex_mlen: %d) ", send->ex_mlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->eot) {
|
if (inst->eot) {
|
||||||
|
|
@ -665,13 +667,13 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
|
||||||
if (inst->has_no_mask_send_params)
|
if (inst->has_no_mask_send_params)
|
||||||
fprintf(file, "NoMaskParams ");
|
fprintf(file, "NoMaskParams ");
|
||||||
|
|
||||||
if (is_send && inst->desc)
|
if (send && send->desc)
|
||||||
fprintf(file, "Desc 0x%08x ", inst->desc);
|
fprintf(file, "Desc 0x%08x ", send->desc);
|
||||||
|
|
||||||
if (is_send && inst->ex_desc)
|
if (send && send->ex_desc)
|
||||||
fprintf(file, "ExDesc 0x%08x ", inst->ex_desc);
|
fprintf(file, "ExDesc 0x%08x ", send->ex_desc);
|
||||||
|
|
||||||
if (is_send && inst->send_ex_desc_imm)
|
if (send && send->ex_desc_imm)
|
||||||
fprintf(file, "ExDescImmInst 0x%08x ", inst->offset);
|
fprintf(file, "ExDescImmInst 0x%08x ", inst->offset);
|
||||||
|
|
||||||
if (inst->sched.regdist || inst->sched.mode) {
|
if (inst->sched.regdist || inst->sched.mode) {
|
||||||
|
|
|
||||||
|
|
@ -621,7 +621,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
|
||||||
* they're used as sources in the same instruction. We also need to add
|
* they're used as sources in the same instruction. We also need to add
|
||||||
* interference here.
|
* interference here.
|
||||||
*/
|
*/
|
||||||
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
|
if (inst->opcode == SHADER_OPCODE_SEND && inst->as_send()->ex_mlen > 0 &&
|
||||||
inst->src[SEND_SRC_PAYLOAD1].file == VGRF &&
|
inst->src[SEND_SRC_PAYLOAD1].file == VGRF &&
|
||||||
inst->src[SEND_SRC_PAYLOAD2].file == VGRF &&
|
inst->src[SEND_SRC_PAYLOAD2].file == VGRF &&
|
||||||
inst->src[SEND_SRC_PAYLOAD1].nr != inst->src[SEND_SRC_PAYLOAD2].nr) {
|
inst->src[SEND_SRC_PAYLOAD1].nr != inst->src[SEND_SRC_PAYLOAD2].nr) {
|
||||||
|
|
@ -643,7 +643,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
|
||||||
assert(inst->opcode == SHADER_OPCODE_SEND);
|
assert(inst->opcode == SHADER_OPCODE_SEND);
|
||||||
const brw_reg srcs[2] = {
|
const brw_reg srcs[2] = {
|
||||||
inst->src[SEND_SRC_PAYLOAD1],
|
inst->src[SEND_SRC_PAYLOAD1],
|
||||||
inst->ex_mlen > 0 ? inst->src[SEND_SRC_PAYLOAD2] : brw_reg(),
|
inst->as_send()->ex_mlen > 0 ? inst->src[SEND_SRC_PAYLOAD2] : brw_reg(),
|
||||||
};
|
};
|
||||||
const unsigned sizes[2] = {
|
const unsigned sizes[2] = {
|
||||||
DIV_ROUND_UP(fs->alloc.sizes[srcs[0].nr], reg_unit(devinfo)),
|
DIV_ROUND_UP(fs->alloc.sizes[srcs[0].nr], reg_unit(devinfo)),
|
||||||
|
|
@ -892,7 +892,7 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
|
||||||
for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) {
|
for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) {
|
||||||
++stats->fill_count;
|
++stats->fill_count;
|
||||||
|
|
||||||
brw_inst *unspill_inst;
|
brw_send_inst *unspill_inst;
|
||||||
if (devinfo->verx10 >= 125) {
|
if (devinfo->verx10 >= 125) {
|
||||||
/* LSC is limited to SIMD16 (SIMD32 on Xe2) load/store but we can
|
/* LSC is limited to SIMD16 (SIMD32 on Xe2) load/store but we can
|
||||||
* load more using transpose messages.
|
* load more using transpose messages.
|
||||||
|
|
@ -933,8 +933,8 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
|
||||||
unspill_inst->ex_mlen = 0;
|
unspill_inst->ex_mlen = 0;
|
||||||
unspill_inst->size_written =
|
unspill_inst->size_written =
|
||||||
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE;
|
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE;
|
||||||
unspill_inst->send_has_side_effects = false;
|
unspill_inst->has_side_effects = false;
|
||||||
unspill_inst->send_is_volatile = true;
|
unspill_inst->is_volatile = true;
|
||||||
|
|
||||||
unspill_inst->src[0] = brw_imm_ud(
|
unspill_inst->src[0] = brw_imm_ud(
|
||||||
desc |
|
desc |
|
||||||
|
|
@ -958,8 +958,8 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
|
||||||
unspill_inst->mlen = 1;
|
unspill_inst->mlen = 1;
|
||||||
unspill_inst->header_size = 1;
|
unspill_inst->header_size = 1;
|
||||||
unspill_inst->size_written = reg_size * REG_SIZE;
|
unspill_inst->size_written = reg_size * REG_SIZE;
|
||||||
unspill_inst->send_has_side_effects = false;
|
unspill_inst->has_side_effects = false;
|
||||||
unspill_inst->send_is_volatile = true;
|
unspill_inst->is_volatile = true;
|
||||||
unspill_inst->sfid = BRW_SFID_HDC0;
|
unspill_inst->sfid = BRW_SFID_HDC0;
|
||||||
|
|
||||||
unspill_inst->src[0] = brw_imm_ud(
|
unspill_inst->src[0] = brw_imm_ud(
|
||||||
|
|
@ -992,7 +992,7 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
|
||||||
for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) {
|
for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) {
|
||||||
++stats->spill_count;
|
++stats->spill_count;
|
||||||
|
|
||||||
brw_inst *spill_inst;
|
brw_send_inst *spill_inst;
|
||||||
if (devinfo->verx10 >= 125) {
|
if (devinfo->verx10 >= 125) {
|
||||||
brw_reg offset = build_lane_offsets(bld, spill_offset, ip);
|
brw_reg offset = build_lane_offsets(bld, spill_offset, ip);
|
||||||
|
|
||||||
|
|
@ -1019,8 +1019,8 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
|
||||||
bld.dispatch_width());
|
bld.dispatch_width());
|
||||||
spill_inst->ex_mlen = reg_size;
|
spill_inst->ex_mlen = reg_size;
|
||||||
spill_inst->size_written = 0;
|
spill_inst->size_written = 0;
|
||||||
spill_inst->send_has_side_effects = true;
|
spill_inst->has_side_effects = true;
|
||||||
spill_inst->send_is_volatile = false;
|
spill_inst->is_volatile = false;
|
||||||
|
|
||||||
spill_inst->src[0] = brw_imm_ud(
|
spill_inst->src[0] = brw_imm_ud(
|
||||||
desc |
|
desc |
|
||||||
|
|
@ -1045,8 +1045,8 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
|
||||||
spill_inst->ex_mlen = reg_size;
|
spill_inst->ex_mlen = reg_size;
|
||||||
spill_inst->size_written = 0;
|
spill_inst->size_written = 0;
|
||||||
spill_inst->header_size = 1;
|
spill_inst->header_size = 1;
|
||||||
spill_inst->send_has_side_effects = true;
|
spill_inst->has_side_effects = true;
|
||||||
spill_inst->send_is_volatile = false;
|
spill_inst->is_volatile = false;
|
||||||
spill_inst->sfid = BRW_SFID_HDC0;
|
spill_inst->sfid = BRW_SFID_HDC0;
|
||||||
|
|
||||||
spill_inst->src[0] = brw_imm_ud(
|
spill_inst->src[0] = brw_imm_ud(
|
||||||
|
|
|
||||||
|
|
@ -268,10 +268,12 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
case SHADER_OPCODE_SEND_GATHER:
|
case SHADER_OPCODE_SEND_GATHER: {
|
||||||
switch (inst->sfid) {
|
brw_send_inst *send = inst->as_send();
|
||||||
|
|
||||||
|
switch (send->sfid) {
|
||||||
case BRW_SFID_SAMPLER: {
|
case BRW_SFID_SAMPLER: {
|
||||||
unsigned msg_type = (inst->desc >> 12) & 0x1f;
|
unsigned msg_type = (send->desc >> 12) & 0x1f;
|
||||||
switch (msg_type) {
|
switch (msg_type) {
|
||||||
case GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
|
case GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
|
||||||
case GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
|
case GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
|
||||||
|
|
@ -364,7 +366,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BRW_SFID_RENDER_CACHE:
|
case BRW_SFID_RENDER_CACHE:
|
||||||
switch (brw_fb_desc_msg_type(isa->devinfo, inst->desc)) {
|
switch (brw_fb_desc_msg_type(isa->devinfo, send->desc)) {
|
||||||
case GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE:
|
case GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE:
|
||||||
case GFX7_DATAPORT_RC_TYPED_SURFACE_READ:
|
case GFX7_DATAPORT_RC_TYPED_SURFACE_READ:
|
||||||
/* See also SHADER_OPCODE_TYPED_SURFACE_READ */
|
/* See also SHADER_OPCODE_TYPED_SURFACE_READ */
|
||||||
|
|
@ -388,7 +390,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BRW_SFID_HDC0:
|
case BRW_SFID_HDC0:
|
||||||
switch ((inst->desc >> 14) & 0x1f) {
|
switch ((send->desc >> 14) & 0x1f) {
|
||||||
case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ:
|
case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ:
|
||||||
case GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ:
|
case GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ:
|
||||||
case GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE:
|
case GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE:
|
||||||
|
|
@ -460,7 +462,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BRW_SFID_HDC1:
|
case BRW_SFID_HDC1:
|
||||||
switch (brw_dp_desc_msg_type(isa->devinfo, inst->desc)) {
|
switch (brw_dp_desc_msg_type(isa->devinfo, send->desc)) {
|
||||||
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
|
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
|
||||||
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
|
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
|
||||||
case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
|
case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
|
||||||
|
|
@ -500,7 +502,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
case BRW_SFID_UGM:
|
case BRW_SFID_UGM:
|
||||||
case BRW_SFID_TGM:
|
case BRW_SFID_TGM:
|
||||||
case BRW_SFID_SLM:
|
case BRW_SFID_SLM:
|
||||||
switch (lsc_msg_desc_opcode(isa->devinfo, inst->desc)) {
|
switch (lsc_msg_desc_opcode(isa->devinfo, send->desc)) {
|
||||||
case LSC_OP_LOAD:
|
case LSC_OP_LOAD:
|
||||||
case LSC_OP_STORE:
|
case LSC_OP_STORE:
|
||||||
case LSC_OP_LOAD_CMASK:
|
case LSC_OP_LOAD_CMASK:
|
||||||
|
|
@ -555,6 +557,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
UNREACHABLE("Unknown SFID");
|
UNREACHABLE("Unknown SFID");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case BRW_OPCODE_DPAS:
|
case BRW_OPCODE_DPAS:
|
||||||
switch (inst->rcount) {
|
switch (inst->rcount) {
|
||||||
|
|
|
||||||
|
|
@ -372,7 +372,7 @@ brw_shader::emit_cs_terminate()
|
||||||
if (devinfo->ver < 11)
|
if (devinfo->ver < 11)
|
||||||
desc |= (1 << 4); /* Do not dereference URB */
|
desc |= (1 << 4); /* Do not dereference URB */
|
||||||
|
|
||||||
brw_inst *send = ubld.SEND();
|
brw_send_inst *send = ubld.SEND();
|
||||||
send->dst = reg_undef;
|
send->dst = reg_undef;
|
||||||
send->src[SEND_SRC_DESC] = brw_imm_ud(desc);
|
send->src[SEND_SRC_DESC] = brw_imm_ud(desc);
|
||||||
send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||||
|
|
@ -678,7 +678,7 @@ brw_shader::assign_curb_setup()
|
||||||
addr = base_addr;
|
addr = base_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
brw_inst *send = ubld.SEND();
|
brw_send_inst *send = ubld.SEND();
|
||||||
send->dst = retype(brw_vec8_grf(payload().num_regs + i, 0),
|
send->dst = retype(brw_vec8_grf(payload().num_regs + i, 0),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
|
|
||||||
|
|
@ -704,7 +704,7 @@ brw_shader::assign_curb_setup()
|
||||||
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
|
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
|
||||||
assert((payload().num_regs + i + send->size_written / REG_SIZE) <=
|
assert((payload().num_regs + i + send->size_written / REG_SIZE) <=
|
||||||
(payload().num_regs + prog_data->curb_read_length));
|
(payload().num_regs + prog_data->curb_read_length));
|
||||||
send->send_is_volatile = true;
|
send->is_volatile = true;
|
||||||
|
|
||||||
send->src[SEND_SRC_DESC] =
|
send->src[SEND_SRC_DESC] =
|
||||||
brw_imm_ud(desc | brw_message_desc(devinfo,
|
brw_imm_ud(desc | brw_message_desc(devinfo,
|
||||||
|
|
|
||||||
|
|
@ -379,6 +379,9 @@ brw_inst *brw_clone_inst(brw_shader &s, const brw_inst *inst);
|
||||||
* brw_inst are maintained and any previous sources still visible. Additional
|
* brw_inst are maintained and any previous sources still visible. Additional
|
||||||
* sources will be uninitialized.
|
* sources will be uninitialized.
|
||||||
*
|
*
|
||||||
|
* All instructions can be transformed to an instruction of BASE kind.
|
||||||
|
* All non-BASE instructions can be transformed to an instruction of SEND kind.
|
||||||
|
*
|
||||||
* If new_num_srcs is UINT_MAX a default will be picked based on the opcode.
|
* If new_num_srcs is UINT_MAX a default will be picked based on the opcode.
|
||||||
* Not all opcodes have a default.
|
* Not all opcodes have a default.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -314,7 +314,7 @@ brw_validate(const brw_shader &s)
|
||||||
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_DESC]));
|
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_DESC]));
|
||||||
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_EX_DESC]));
|
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_EX_DESC]));
|
||||||
VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1].file, BAD_FILE);
|
VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1].file, BAD_FILE);
|
||||||
VAL_ASSERT(inst->ex_mlen > 0 ||
|
VAL_ASSERT(inst->as_send()->ex_mlen > 0 ||
|
||||||
inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE);
|
inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE);
|
||||||
/* Send payloads cannot be immediates nor have source modifiers */
|
/* Send payloads cannot be immediates nor have source modifiers */
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
|
|
|
||||||
|
|
@ -37,20 +37,24 @@ brw_workaround_emit_dummy_mov_instruction(brw_shader &s)
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst)
|
needs_dummy_fence(const intel_device_info *devinfo, const brw_inst *inst)
|
||||||
{
|
{
|
||||||
|
const brw_send_inst *send = inst->as_send();
|
||||||
|
if (!send)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* This workaround is about making sure that any instruction writing
|
/* This workaround is about making sure that any instruction writing
|
||||||
* through UGM has completed before we hit EOT.
|
* through UGM has completed before we hit EOT.
|
||||||
*/
|
*/
|
||||||
if (inst->sfid != BRW_SFID_UGM)
|
if (send->sfid != BRW_SFID_UGM)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Any UGM, non-Scratch-surface Stores (not including Atomic) messages,
|
/* Any UGM, non-Scratch-surface Stores (not including Atomic) messages,
|
||||||
* where the L1-cache override is NOT among {WB, WS, WT}
|
* where the L1-cache override is NOT among {WB, WS, WT}
|
||||||
*/
|
*/
|
||||||
enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, inst->desc);
|
enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, send->desc);
|
||||||
if (lsc_opcode_is_store(opcode)) {
|
if (lsc_opcode_is_store(opcode)) {
|
||||||
switch (lsc_msg_desc_cache_ctrl(devinfo, inst->desc)) {
|
switch (lsc_msg_desc_cache_ctrl(devinfo, send->desc)) {
|
||||||
case LSC_CACHE_STORE_L1STATE_L3MOCS:
|
case LSC_CACHE_STORE_L1STATE_L3MOCS:
|
||||||
case LSC_CACHE_STORE_L1WB_L3WB:
|
case LSC_CACHE_STORE_L1WB_L3WB:
|
||||||
case LSC_CACHE_STORE_L1S_L3UC:
|
case LSC_CACHE_STORE_L1S_L3UC:
|
||||||
|
|
@ -65,7 +69,7 @@ needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Any UGM Atomic message WITHOUT return value */
|
/* Any UGM Atomic message WITHOUT return value */
|
||||||
if (lsc_opcode_is_atomic(opcode) && inst->dst.is_null())
|
if (lsc_opcode_is_atomic(opcode) && send->dst.is_null())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -106,7 +110,7 @@ brw_workaround_memory_fence_before_eot(brw_shader &s)
|
||||||
const brw_builder ubld = brw_builder(inst).uniform();
|
const brw_builder ubld = brw_builder(inst).uniform();
|
||||||
|
|
||||||
brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
||||||
brw_inst *dummy_fence = ubld.SEND();
|
brw_send_inst *dummy_fence = ubld.SEND();
|
||||||
|
|
||||||
dummy_fence->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
dummy_fence->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||||
dummy_fence->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
dummy_fence->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ emit_SEND(const brw_builder &bld, const brw_reg &dst,
|
||||||
{
|
{
|
||||||
brw_reg uniform_desc = component(desc, 0);
|
brw_reg uniform_desc = component(desc, 0);
|
||||||
|
|
||||||
brw_inst *send = bld.SEND();
|
brw_send_inst *send = bld.SEND();
|
||||||
send->dst = dst;
|
send->dst = dst;
|
||||||
|
|
||||||
send->src[SEND_SRC_DESC] = uniform_desc;
|
send->src[SEND_SRC_DESC] = uniform_desc;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue