brw: Add brw_send_inst

Move all the SEND specific fields from brw_inst into brw_send_inst.
This new instruction kind will contain all variants of SENDs plus the
virtual opcodes that were already relying on those SEND fields.

Use the `as_send()` helper to go from a brw_inst into the brw_send_inst
when applicable.  Some of the code was changed to use the brw_send_inst
type directly.

Until other kinds are added, all the instructions are allocated the same
amount of space as brw_send_inst.  This ensures that all
brw_transform_inst() calls are still valid.  This will change after
a few patches so that BASE instructions can use less memory.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
Caio Oliveira 2025-08-20 15:43:08 -07:00 committed by Marge Bot
parent b27f6621ae
commit 0fcce2722f
23 changed files with 645 additions and 494 deletions

View file

@ -137,8 +137,12 @@ namespace {
td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)), td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)),
tx(get_exec_type(inst)), sx(0), ss(0), tx(get_exec_type(inst)), sx(0), ss(0),
sc(has_bank_conflict(isa, inst) ? sd : 0), sc(has_bank_conflict(isa, inst) ? sd : 0),
desc(inst->desc), sfid(inst->sfid) desc(0), sfid(0)
{ {
const brw_send_inst *send = inst->as_send();
if (send) {
desc = send->desc;
sfid = send->sfid;
/* We typically want the maximum source size, except for split send /* We typically want the maximum source size, except for split send
* messages which require the total size. * messages which require the total size.
*/ */
@ -146,12 +150,13 @@ namespace {
ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) + ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) +
DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE); DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE);
} else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) { } else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) {
ss = inst->mlen; ss = send->mlen;
/* If haven't lowered yet, count the sources. */ /* If haven't lowered yet, count the sources. */
if (!ss) { if (!ss) {
for (int i = 3; i < inst->sources; i++) for (int i = 3; i < inst->sources; i++)
ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE); ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE);
} }
}
} else { } else {
for (unsigned i = 0; i < inst->sources; i++) for (unsigned i = 0; i < inst->sources; i++)
ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE)); ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE));

View file

@ -632,10 +632,10 @@ public:
#undef _ALU1 #undef _ALU1
/** @} */ /** @} */
brw_inst * brw_send_inst *
SEND() const SEND() const
{ {
return emit(SHADER_OPCODE_SEND, SEND_NUM_SRCS); return emit(SHADER_OPCODE_SEND, SEND_NUM_SRCS)->as_send();
} }
brw_inst * brw_inst *

View file

@ -620,7 +620,7 @@ static void
brw_emit_repclear_shader(brw_shader &s) brw_emit_repclear_shader(brw_shader &s)
{ {
brw_wm_prog_key *key = (brw_wm_prog_key*) s.key; brw_wm_prog_key *key = (brw_wm_prog_key*) s.key;
brw_inst *write = NULL; brw_send_inst *write = NULL;
assert(s.devinfo->ver < 20); assert(s.devinfo->ver < 20);
assert(s.uniforms == 0); assert(s.uniforms == 0);
@ -666,7 +666,7 @@ brw_emit_repclear_shader(brw_shader &s)
write->src[SEND_SRC_PAYLOAD1] = i == 0 ? color_output : header; write->src[SEND_SRC_PAYLOAD1] = i == 0 ? color_output : header;
write->src[SEND_SRC_PAYLOAD2] = brw_reg(); write->src[SEND_SRC_PAYLOAD2] = brw_reg();
write->check_tdr = true; write->check_tdr = true;
write->send_has_side_effects = true; write->has_side_effects = true;
/* We can use a headerless message for the first render target */ /* We can use a headerless message for the first render target */
write->header_size = i == 0 ? 0 : 2; write->header_size = i == 0 ? 0 : 2;

View file

@ -289,9 +289,9 @@ brw_emit_urb_fence(brw_shader &s)
{ {
const brw_builder bld1 = brw_builder(&s).uniform(); const brw_builder bld1 = brw_builder(&s).uniform();
brw_reg dst = bld1.vgrf(BRW_TYPE_UD); brw_reg dst = bld1.vgrf(BRW_TYPE_UD);
brw_inst *fence = bld1.emit(SHADER_OPCODE_MEMORY_FENCE, dst, brw_send_inst *fence = bld1.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
brw_vec8_grf(0, 0), brw_vec8_grf(0, 0),
brw_imm_ud(true)); brw_imm_ud(true))->as_send();
fence->size_written = REG_SIZE * reg_unit(s.devinfo); fence->size_written = REG_SIZE * reg_unit(s.devinfo);
fence->sfid = BRW_SFID_URB; fence->sfid = BRW_SFID_URB;
/* The logical thing here would likely be a THREADGROUP fence but that's /* The logical thing here would likely be a THREADGROUP fence but that's

View file

@ -4935,7 +4935,7 @@ emit_rt_lsc_fence(const brw_builder &bld,
const brw_builder ubld = bld.exec_all().group(8, 0); const brw_builder ubld = bld.exec_all().group(8, 0);
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
brw_inst *send = ubld.SEND(); brw_send_inst *send = ubld.SEND();
send->dst = tmp; send->dst = tmp;
send->src[SEND_SRC_DESC] = brw_imm_ud(0); send->src[SEND_SRC_DESC] = brw_imm_ud(0);
@ -4949,7 +4949,7 @@ emit_rt_lsc_fence(const brw_builder &bld,
send->ex_mlen = 0; send->ex_mlen = 0;
/* Temp write for scheduling */ /* Temp write for scheduling */
send->size_written = REG_SIZE * reg_unit(devinfo); send->size_written = REG_SIZE * reg_unit(devinfo);
send->send_has_side_effects = true; send->has_side_effects = true;
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), tmp); ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), tmp);
} }
@ -5164,8 +5164,8 @@ emit_fence(const brw_builder &bld, enum opcode opcode,
opcode == SHADER_OPCODE_MEMORY_FENCE); opcode == SHADER_OPCODE_MEMORY_FENCE);
brw_reg dst = commit_enable ? bld.vgrf(BRW_TYPE_UD) : bld.null_reg_ud(); brw_reg dst = commit_enable ? bld.vgrf(BRW_TYPE_UD) : bld.null_reg_ud();
brw_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0), brw_send_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable)); brw_imm_ud(commit_enable))->as_send();
fence->sfid = sfid; fence->sfid = sfid;
fence->desc = desc; fence->desc = desc;
fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0; fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0;

View file

@ -161,7 +161,7 @@ brw_generator::patch_halt_jumps()
} }
void void
brw_generator::generate_send(brw_inst *inst, brw_generator::generate_send(brw_send_inst *inst,
struct brw_reg dst, struct brw_reg dst,
struct brw_reg desc, struct brw_reg desc,
struct brw_reg ex_desc, struct brw_reg ex_desc,
@ -181,7 +181,7 @@ brw_generator::generate_send(brw_inst *inst,
* descriptor is written indirectly (it already contains a SS/BSS * descriptor is written indirectly (it already contains a SS/BSS
* surface handle) * surface handle)
*/ */
assert(!inst->send_ex_desc_imm); assert(!inst->ex_desc_imm);
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, inst->eot, gather); brw_send_indirect_message(p, inst->sfid, dst, payload, desc, inst->eot, gather);
if (inst->check_tdr) if (inst->check_tdr)
brw_eu_inst_set_opcode(p->isa, brw_last_inst, BRW_OPCODE_SENDC); brw_eu_inst_set_opcode(p->isa, brw_last_inst, BRW_OPCODE_SENDC);
@ -191,8 +191,8 @@ brw_generator::generate_send(brw_inst *inst,
*/ */
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2, brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
desc, ex_desc, desc, ex_desc,
inst->send_ex_desc_imm ? inst->offset : 0, inst->ex_desc_imm ? inst->offset : 0,
inst->ex_mlen, inst->send_ex_bso, inst->ex_mlen, inst->ex_bso,
inst->eot, gather); inst->eot, gather);
if (inst->check_tdr) if (inst->check_tdr)
brw_eu_inst_set_opcode(p->isa, brw_last_inst, brw_eu_inst_set_opcode(p->isa, brw_last_inst,
@ -886,7 +886,8 @@ brw_generator::generate_code(const brw_shader &s,
assert(inst->force_writemask_all || inst->exec_size >= 4); assert(inst->force_writemask_all || inst->exec_size >= 4);
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0); assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
assert(inst->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo)); if (const brw_send_inst *send = inst->as_send())
assert(send->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo));
switch (inst->opcode) { switch (inst->opcode) {
case BRW_OPCODE_NOP: case BRW_OPCODE_NOP:
@ -1094,7 +1095,6 @@ brw_generator::generate_code(const brw_shader &s,
case SHADER_OPCODE_SIN: case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS: case SHADER_OPCODE_COS:
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
assert(inst->mlen == 0);
gfx6_math(p, dst, brw_math_function(inst->opcode), gfx6_math(p, dst, brw_math_function(inst->opcode),
src[0], retype(brw_null_reg(), src[0].type)); src[0], retype(brw_null_reg(), src[0].type));
break; break;
@ -1103,7 +1103,6 @@ brw_generator::generate_code(const brw_shader &s,
case SHADER_OPCODE_POW: case SHADER_OPCODE_POW:
assert(devinfo->verx10 < 125); assert(devinfo->verx10 < 125);
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
assert(inst->mlen == 0);
assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8); assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8);
gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
break; break;
@ -1144,13 +1143,13 @@ brw_generator::generate_code(const brw_shader &s,
break; break;
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
generate_send(inst, dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC], generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC],
src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]); src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]);
send_count++; send_count++;
break; break;
case SHADER_OPCODE_SEND_GATHER: case SHADER_OPCODE_SEND_GATHER:
generate_send(inst, dst, generate_send(inst->as_send(), dst,
src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC], src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC],
src[SEND_GATHER_SRC_SCALAR], brw_null_reg()); src[SEND_GATHER_SRC_SCALAR], brw_null_reg());
send_count++; send_count++;

View file

@ -25,7 +25,7 @@ public:
const unsigned *get_assembly(); const unsigned *get_assembly();
private: private:
void generate_send(brw_inst *inst, void generate_send(brw_send_inst *inst,
struct brw_reg dst, struct brw_reg dst,
struct brw_reg desc, struct brw_reg desc,
struct brw_reg ex_desc, struct brw_reg ex_desc,

View file

@ -14,7 +14,11 @@
static inline unsigned static inline unsigned
brw_inst_kind_size(brw_inst_kind kind) brw_inst_kind_size(brw_inst_kind kind)
{ {
return sizeof(brw_inst); /* TODO: Temporarily here to ensure all instructions can be converted to
* SEND. Once all new kinds are added, change so that BASE allocate only
* sizeof(brw_inst).
*/
return sizeof(brw_send_inst);
} }
static brw_inst * static brw_inst *
@ -110,8 +114,6 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
const brw_inst_kind kind = inst->kind; const brw_inst_kind kind = inst->kind;
const brw_inst_kind new_kind = brw_inst_kind_for_opcode(new_opcode); const brw_inst_kind new_kind = brw_inst_kind_for_opcode(new_opcode);
assert(new_kind == BRW_KIND_BASE);
const unsigned inst_size = brw_inst_kind_size(kind); const unsigned inst_size = brw_inst_kind_size(kind);
const unsigned new_inst_size = brw_inst_kind_size(new_kind); const unsigned new_inst_size = brw_inst_kind_size(new_kind);
assert(new_inst_size <= inst_size); assert(new_inst_size <= inst_size);
@ -127,6 +129,9 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
inst->src = new_src; inst->src = new_src;
} }
if (new_kind != kind)
memset(((char *)inst) + sizeof(brw_inst), 0, new_inst_size - sizeof(brw_inst));
inst->sources = new_num_sources; inst->sources = new_num_sources;
inst->opcode = new_opcode; inst->opcode = new_opcode;
inst->kind = new_kind; inst->kind = new_kind;
@ -137,8 +142,22 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
brw_inst_kind brw_inst_kind
brw_inst_kind_for_opcode(enum opcode opcode) brw_inst_kind_for_opcode(enum opcode opcode)
{ {
switch (opcode) {
case BRW_OPCODE_SEND:
case BRW_OPCODE_SENDS:
case BRW_OPCODE_SENDC:
case BRW_OPCODE_SENDSC:
case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SEND_GATHER:
case SHADER_OPCODE_BARRIER:
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_INTERLOCK:
return BRW_KIND_SEND;
default:
return BRW_KIND_BASE; return BRW_KIND_BASE;
} }
}
bool bool
brw_inst::is_send() const brw_inst::is_send() const
@ -483,9 +502,9 @@ brw_inst::size_read(const struct intel_device_info *devinfo, int arg) const
switch (opcode) { switch (opcode) {
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
if (arg == SEND_SRC_PAYLOAD1) { if (arg == SEND_SRC_PAYLOAD1) {
return mlen * REG_SIZE; return as_send()->mlen * REG_SIZE;
} else if (arg == SEND_SRC_PAYLOAD2) { } else if (arg == SEND_SRC_PAYLOAD2) {
return ex_mlen * REG_SIZE; return as_send()->ex_mlen * REG_SIZE;
} }
break; break;
@ -893,7 +912,7 @@ brw_inst::has_side_effects() const
switch (opcode) { switch (opcode) {
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SEND_GATHER: case SHADER_OPCODE_SEND_GATHER:
return send_has_side_effects; return as_send()->has_side_effects;
case BRW_OPCODE_SYNC: case BRW_OPCODE_SYNC:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL: case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
@ -927,7 +946,7 @@ brw_inst::is_volatile() const
return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS; return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS;
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SEND_GATHER: case SHADER_OPCODE_SEND_GATHER:
return send_is_volatile; return as_send()->is_volatile;
default: default:
return false; return false;
} }

View file

@ -41,6 +41,7 @@ struct brw_shader;
enum ENUM_PACKED brw_inst_kind { enum ENUM_PACKED brw_inst_kind {
BRW_KIND_BASE, BRW_KIND_BASE,
BRW_KIND_SEND,
}; };
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode); brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
@ -53,6 +54,23 @@ struct brw_inst : brw_exec_node {
static void* operator new(size_t size, void *ptr) { return ptr; } static void* operator new(size_t size, void *ptr) { return ptr; }
static void operator delete(void *p) {} static void operator delete(void *p) {}
/* Prefer macro here instead of templates to get nicer
* helper names.
*/
#define KIND_HELPERS(HELPER_NAME, TYPE_NAME, ENUM_NAME) \
struct TYPE_NAME *HELPER_NAME() { \
return kind == ENUM_NAME ? (struct TYPE_NAME *)this \
: nullptr; \
} \
const struct TYPE_NAME *HELPER_NAME() const { \
return kind == ENUM_NAME ? (const struct TYPE_NAME *)this \
: nullptr; \
}
KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
#undef KIND_HELPERS
bool is_send() const; bool is_send() const;
bool is_payload(unsigned arg) const; bool is_payload(unsigned arg) const;
bool is_partial_write(unsigned grf_size = REG_SIZE) const; bool is_partial_write(unsigned grf_size = REG_SIZE) const;
@ -144,13 +162,8 @@ struct brw_inst : brw_exec_node {
*/ */
uint8_t group; uint8_t group;
uint8_t mlen; /**< SEND message length */
uint8_t ex_mlen; /**< SENDS extended message length */
uint8_t sfid; /**< SFID for SEND instructions */
/** The number of hardware registers used for a message header. */ /** The number of hardware registers used for a message header. */
uint8_t header_size; uint8_t header_size;
uint32_t desc; /**< SEND[S] message descriptor immediate */
uint32_t ex_desc; /**< SEND[S] extended message descriptor immediate */
uint32_t offset; /**< spill/unspill offset or texture offset bitfield */ uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
uint16_t size_written; /**< Data written to the destination register in bytes. */ uint16_t size_written; /**< Data written to the destination register in bytes. */
@ -179,25 +192,11 @@ struct brw_inst : brw_exec_node {
*/ */
unsigned rcount:4; unsigned rcount:4;
unsigned pad:4;
bool predicate_inverse:1; bool predicate_inverse:1;
bool writes_accumulator:1; /**< instruction implicitly writes accumulator */ bool writes_accumulator:1; /**< instruction implicitly writes accumulator */
bool force_writemask_all:1; bool force_writemask_all:1;
bool saturate:1; bool saturate:1;
bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */
bool send_ex_bso:1; /**< Only for SHADER_OPCODE_SEND, use extended
* bindless surface offset (26bits instead of
* 20bits)
*/
/**
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
* part of the extended descriptor that must be encoded in the
* instruction.
*/
bool send_ex_desc_imm:1;
/** /**
* The predication mask applied to this instruction is guaranteed to * The predication mask applied to this instruction is guaranteed to
* be uniform and a superset of the execution mask of the present block. * be uniform and a superset of the execution mask of the present block.
@ -215,6 +214,8 @@ struct brw_inst : brw_exec_node {
* never executed. * never executed.
*/ */
bool has_no_mask_send_params:1; bool has_no_mask_send_params:1;
unsigned pad:13;
}; };
uint32_t bits; uint32_t bits;
}; };
@ -233,6 +234,42 @@ struct brw_inst : brw_exec_node {
bblock_t *block; bblock_t *block;
}; };
struct brw_send_inst : brw_inst {
uint32_t desc;
uint32_t ex_desc;
uint8_t mlen;
uint8_t ex_mlen;
uint8_t sfid;
union {
struct {
/**
* Turns it into a SENDC.
*/
bool check_tdr:1;
bool has_side_effects:1;
bool is_volatile:1;
/**
* Use extended bindless surface offset (26bits instead of 20bits)
*/
bool ex_bso:1;
/**
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
* part of the extended descriptor that must be encoded in the
* instruction.
*/
bool ex_desc_imm:1;
uint8_t pad:3;
};
uint8_t send_bits;
};
};
/** /**
* Make the execution of \p inst dependent on the evaluation of a possibly * Make the execution of \p inst dependent on the evaluation of a possibly
* inverted predicate. * inverted predicate.

View file

@ -519,22 +519,27 @@ brw_lower_sends_overlapping_payload(brw_shader &s)
bool progress = false; bool progress = false;
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && if (inst->opcode != SHADER_OPCODE_SEND)
regions_overlap(inst->src[SEND_SRC_PAYLOAD1], continue;
inst->mlen * REG_SIZE,
inst->src[SEND_SRC_PAYLOAD2], brw_send_inst *send = inst->as_send();
inst->ex_mlen * REG_SIZE)) {
const unsigned arg = inst->mlen < inst->ex_mlen ? if (send->ex_mlen > 0 &&
regions_overlap(send->src[SEND_SRC_PAYLOAD1],
send->mlen * REG_SIZE,
send->src[SEND_SRC_PAYLOAD2],
send->ex_mlen * REG_SIZE)) {
const unsigned arg = send->mlen < send->ex_mlen ?
SEND_SRC_PAYLOAD1 : SEND_SRC_PAYLOAD2; SEND_SRC_PAYLOAD1 : SEND_SRC_PAYLOAD2;
const unsigned len = MIN2(inst->mlen, inst->ex_mlen); const unsigned len = MIN2(send->mlen, send->ex_mlen);
brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD); brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD);
/* Sadly, we've lost all notion of channels and bit sizes at this /* Sadly, we've lost all notion of channels and bit sizes at this
* point. Just WE_all it. * point. Just WE_all it.
*/ */
const brw_builder ibld = brw_builder(inst).exec_all().group(16, 0); const brw_builder ibld = brw_builder(send).exec_all().group(16, 0);
brw_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD); brw_reg copy_src = retype(send->src[arg], BRW_TYPE_UD);
brw_reg copy_dst = tmp; brw_reg copy_dst = tmp;
for (unsigned i = 0; i < len; i += 2) { for (unsigned i = 0; i < len; i += 2) {
if (len == i + 1) { if (len == i + 1) {
@ -546,7 +551,7 @@ brw_lower_sends_overlapping_payload(brw_shader &s)
copy_src = offset(copy_src, ibld, 1); copy_src = offset(copy_src, ibld, 1);
copy_dst = offset(copy_dst, ibld, 1); copy_dst = offset(copy_dst, ibld, 1);
} }
inst->src[arg] = tmp; send->src[arg] = tmp;
progress = true; progress = true;
} }
} }
@ -867,7 +872,7 @@ brw_s0(enum brw_reg_type type, unsigned subnr)
} }
static bool static bool
brw_lower_send_gather_inst(brw_shader &s, brw_inst *inst) brw_lower_send_gather_inst(brw_shader &s, brw_send_inst *inst)
{ {
const intel_device_info *devinfo = s.devinfo; const intel_device_info *devinfo = s.devinfo;
assert(devinfo->ver >= 30); assert(devinfo->ver >= 30);
@ -926,7 +931,7 @@ brw_lower_send_gather(brw_shader &s)
foreach_block_and_inst(block, brw_inst, inst, s.cfg) { foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
if (inst->opcode == SHADER_OPCODE_SEND_GATHER) if (inst->opcode == SHADER_OPCODE_SEND_GATHER)
progress |= brw_lower_send_gather_inst(s, inst); progress |= brw_lower_send_gather_inst(s, inst->as_send());
} }
if (progress) if (progress)

File diff suppressed because it is too large Load diff

View file

@ -677,7 +677,10 @@ brw_lower_simd_width(brw_shader &s)
(inst->size_written - residency_size) / (inst->size_written - residency_size) /
inst->dst.component_size(inst->exec_size); inst->dst.component_size(inst->exec_size);
assert(!inst->writes_accumulator && !inst->mlen); if (const brw_send_inst *send = inst->as_send())
assert(!send->mlen);
assert(!inst->writes_accumulator);
/* Inserting the zip, unzip, and duplicated instructions in all of /* Inserting the zip, unzip, and duplicated instructions in all of
* the right spots is somewhat tricky. All of the unzip and any * the right spots is somewhat tricky. All of the unzip and any

View file

@ -263,9 +263,12 @@ brw_opt_zero_samples(brw_shader &s)
{ {
bool progress = false; bool progress = false;
foreach_block_and_inst(block, brw_inst, send, s.cfg) { foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
if (send->opcode != SHADER_OPCODE_SEND || if (inst->opcode != SHADER_OPCODE_SEND)
send->sfid != BRW_SFID_SAMPLER) continue;
brw_send_inst *send = inst->as_send();
if (send->sfid != BRW_SFID_SAMPLER)
continue; continue;
/* Wa_14012688258: /* Wa_14012688258:
@ -340,9 +343,12 @@ brw_opt_split_sends(brw_shader &s)
{ {
bool progress = false; bool progress = false;
foreach_block_and_inst(block, brw_inst, send, s.cfg) { foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
if (send->opcode != SHADER_OPCODE_SEND || if (inst->opcode != SHADER_OPCODE_SEND)
send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 || continue;
brw_send_inst *send = inst->as_send();
if (send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 ||
send->src[SEND_SRC_PAYLOAD1].file != VGRF) send->src[SEND_SRC_PAYLOAD1].file != VGRF)
continue; continue;
@ -619,19 +625,21 @@ brw_opt_send_to_send_gather(brw_shader &s)
if (inst->opcode != SHADER_OPCODE_SEND) if (inst->opcode != SHADER_OPCODE_SEND)
continue; continue;
brw_send_inst *send = inst->as_send();
/* For 1-2 registers, send-gather offers no benefits over split-send. */ /* For 1-2 registers, send-gather offers no benefits over split-send. */
if (inst->mlen + inst->ex_mlen <= 2 * unit) if (send->mlen + send->ex_mlen <= 2 * unit)
continue; continue;
assert(inst->mlen % unit == 0); assert(send->mlen % unit == 0);
assert(inst->ex_mlen % unit == 0); assert(send->ex_mlen % unit == 0);
struct { struct {
brw_reg src; brw_reg src;
unsigned phys_len; unsigned phys_len;
} payload[2] = { } payload[2] = {
{ inst->src[SEND_SRC_PAYLOAD1], inst->mlen / unit }, { send->src[SEND_SRC_PAYLOAD1], send->mlen / unit },
{ inst->src[SEND_SRC_PAYLOAD2], inst->ex_mlen / unit }, { send->src[SEND_SRC_PAYLOAD2], send->ex_mlen / unit },
}; };
const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len; const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len;
@ -645,25 +653,25 @@ brw_opt_send_to_send_gather(brw_shader &s)
continue; continue;
} }
inst = brw_transform_inst(s, inst, SHADER_OPCODE_SEND_GATHER, send = brw_transform_inst(s, send, SHADER_OPCODE_SEND_GATHER,
SEND_GATHER_SRC_PAYLOAD + num_payload_sources); SEND_GATHER_SRC_PAYLOAD + num_payload_sources)->as_send();
/* Sources 0 and 1 remain the same. Source 2 will be filled /* Sources 0 and 1 remain the same. Source 2 will be filled
* after register allocation. * after register allocation.
*/ */
inst->src[SEND_GATHER_SRC_SCALAR] = {}; send->src[SEND_GATHER_SRC_SCALAR] = {};
int idx = 3; int idx = 3;
for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) { for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) {
for (unsigned i = 0; i < payload[p].phys_len; i++) { for (unsigned i = 0; i < payload[p].phys_len; i++) {
inst->src[idx++] = byte_offset(payload[p].src, send->src[idx++] = byte_offset(payload[p].src,
i * reg_unit(devinfo) * REG_SIZE); i * reg_unit(devinfo) * REG_SIZE);
} }
} }
assert(idx == inst->sources); assert(idx == send->sources);
inst->mlen = 0; send->mlen = 0;
inst->ex_mlen = 0; send->ex_mlen = 0;
progress = true; progress = true;
} }
@ -699,10 +707,12 @@ brw_opt_send_gather_to_send(brw_shader &s)
if (inst->opcode != SHADER_OPCODE_SEND_GATHER) if (inst->opcode != SHADER_OPCODE_SEND_GATHER)
continue; continue;
assert(inst->sources > 2); brw_send_inst *send = inst->as_send();
assert(inst->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE);
const int num_payload_sources = inst->sources - 3; assert(send->sources > 2);
assert(send->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE);
const int num_payload_sources = send->sources - 3;
assert(num_payload_sources > 0); assert(num_payload_sources > 0);
/* Limited by Src0.Length in the SEND instruction. */ /* Limited by Src0.Length in the SEND instruction. */
@ -713,7 +723,7 @@ brw_opt_send_gather_to_send(brw_shader &s)
* and there's no need to use SEND_GATHER (which would set ARF scalar register * and there's no need to use SEND_GATHER (which would set ARF scalar register
* adding an extra instruction). * adding an extra instruction).
*/ */
const brw_reg *payload = &inst->src[SEND_GATHER_SRC_PAYLOAD]; const brw_reg *payload = &send->src[SEND_GATHER_SRC_PAYLOAD];
brw_reg payload1 = payload[0]; brw_reg payload1 = payload[0];
brw_reg payload2 = {}; brw_reg payload2 = {};
int payload1_len = 0; int payload1_len = 0;
@ -755,21 +765,21 @@ brw_opt_send_gather_to_send(brw_shader &s)
* *
* TODO: Pass LSC address length or infer it so valid splits can work. * TODO: Pass LSC address length or infer it so valid splits can work.
*/ */
if (payload2_len && (inst->sfid == BRW_SFID_UGM || if (payload2_len && (send->sfid == BRW_SFID_UGM ||
inst->sfid == BRW_SFID_TGM || send->sfid == BRW_SFID_TGM ||
inst->sfid == BRW_SFID_SLM || send->sfid == BRW_SFID_SLM ||
inst->sfid == BRW_SFID_URB)) { send->sfid == BRW_SFID_URB)) {
enum lsc_opcode lsc_op = lsc_msg_desc_opcode(devinfo, inst->desc); enum lsc_opcode lsc_op = lsc_msg_desc_opcode(devinfo, send->desc);
if (lsc_op_num_data_values(lsc_op) > 0) if (lsc_op_num_data_values(lsc_op) > 0)
continue; continue;
} }
inst = brw_transform_inst(s, inst, SHADER_OPCODE_SEND); send = brw_transform_inst(s, send, SHADER_OPCODE_SEND)->as_send();
inst->src[SEND_SRC_PAYLOAD1] = payload1; send->src[SEND_SRC_PAYLOAD1] = payload1;
inst->src[SEND_SRC_PAYLOAD2] = payload2; send->src[SEND_SRC_PAYLOAD2] = payload2;
inst->mlen = payload1_len * unit; send->mlen = payload1_len * unit;
inst->ex_mlen = payload2_len * unit; send->ex_mlen = payload2_len * unit;
progress = true; progress = true;
} }

View file

@ -135,10 +135,12 @@ is_expression(const brw_shader *v, const brw_inst *const inst)
case SHADER_OPCODE_LOAD_PAYLOAD: case SHADER_OPCODE_LOAD_PAYLOAD:
return !is_coalescing_payload(*v, inst); return !is_coalescing_payload(*v, inst);
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SEND_GATHER: case SHADER_OPCODE_SEND_GATHER: {
return !inst->send_has_side_effects && const brw_send_inst *send = inst->as_send();
!inst->send_is_volatile && return !send->has_side_effects &&
!inst->eot; !send->is_volatile &&
!send->eot;
}
default: default:
return false; return false;
} }
@ -238,25 +240,31 @@ operands_match(const brw_inst *a, const brw_inst *b, bool *negate)
} }
} }
static bool
send_inst_match(brw_send_inst *a, brw_send_inst *b)
{
return a->mlen == b->mlen &&
a->ex_mlen == b->ex_mlen &&
a->sfid == b->sfid &&
a->desc == b->desc &&
a->ex_desc == b->ex_desc &&
a->send_bits == b->send_bits;
}
static bool static bool
instructions_match(brw_inst *a, brw_inst *b, bool *negate) instructions_match(brw_inst *a, brw_inst *b, bool *negate)
{ {
/* `Kind` is derived from opcode, so skipped. */
return a->opcode == b->opcode && return a->opcode == b->opcode &&
/* `kind` is derived from opcode, so skipped. */
(a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) &&
a->exec_size == b->exec_size && a->exec_size == b->exec_size &&
a->group == b->group && a->group == b->group &&
a->predicate == b->predicate && a->predicate == b->predicate &&
a->conditional_mod == b->conditional_mod && a->conditional_mod == b->conditional_mod &&
a->dst.type == b->dst.type && a->dst.type == b->dst.type &&
a->offset == b->offset && a->offset == b->offset &&
a->mlen == b->mlen &&
a->ex_mlen == b->ex_mlen &&
a->sfid == b->sfid &&
a->desc == b->desc &&
a->ex_desc == b->ex_desc &&
a->size_written == b->size_written && a->size_written == b->size_written &&
a->check_tdr == b->check_tdr &&
a->header_size == b->header_size && a->header_size == b->header_size &&
a->sources == b->sources && a->sources == b->sources &&
a->bits == b->bits && a->bits == b->bits &&
@ -299,17 +307,12 @@ hash_inst(const void *v)
inst->sources, inst->sources,
inst->exec_size, inst->exec_size,
inst->group, inst->group,
inst->mlen,
inst->ex_mlen,
inst->sfid,
inst->header_size, inst->header_size,
inst->conditional_mod, inst->conditional_mod,
inst->predicate, inst->predicate,
}; };
const uint32_t u32data[] = { const uint32_t u32data[] = {
inst->desc,
inst->ex_desc,
inst->offset, inst->offset,
inst->size_written, inst->size_written,
inst->opcode, inst->opcode,
@ -321,6 +324,29 @@ hash_inst(const void *v)
/* Skip hashing sched - we shouldn't be CSE'ing after that SWSB */ /* Skip hashing sched - we shouldn't be CSE'ing after that SWSB */
switch (inst->kind) {
case BRW_KIND_SEND: {
const brw_send_inst *send = inst->as_send();
const uint8_t send_u8data[] = {
send->mlen,
send->ex_mlen,
send->sfid,
send->send_bits,
};
const uint32_t send_u32data[] = {
send->desc,
send->ex_desc,
};
hash = HASH(hash, send_u8data);
hash = HASH(hash, send_u32data);
break;
}
case BRW_KIND_BASE:
/* Nothing else to do. */
break;
}
if (inst->opcode == BRW_OPCODE_MAD) { if (inst->opcode == BRW_OPCODE_MAD) {
/* Commutatively combine the hashes for the multiplicands */ /* Commutatively combine the hashes for the multiplicands */
hash = hash_reg(hash, inst->src[0]); hash = hash_reg(hash, inst->src[0]);

View file

@ -60,7 +60,9 @@ can_omit_write(const brw_inst *inst)
/* We can eliminate the destination write for ordinary instructions, /* We can eliminate the destination write for ordinary instructions,
* but not most SENDs. * but not most SENDs.
*/ */
if (inst->opcode < NUM_BRW_OPCODES && inst->mlen == 0) const brw_send_inst *send = inst->as_send();
if (inst->opcode < NUM_BRW_OPCODES &&
(!send || send->mlen == 0))
return true; return true;
/* It might not be safe for other virtual opcodes. */ /* It might not be safe for other virtual opcodes. */

View file

@ -415,12 +415,14 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
} }
fprintf(file, "(%d) ", inst->exec_size); fprintf(file, "(%d) ", inst->exec_size);
if (inst->mlen) { const brw_send_inst *send = inst->as_send();
fprintf(file, "(mlen: %d) ", inst->mlen);
if (send && send->mlen) {
fprintf(file, "(mlen: %d) ", send->mlen);
} }
if (inst->ex_mlen) { if (send && send->ex_mlen) {
fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); fprintf(file, "(ex_mlen: %d) ", send->ex_mlen);
} }
if (inst->eot) { if (inst->eot) {
@ -665,13 +667,13 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
if (inst->has_no_mask_send_params) if (inst->has_no_mask_send_params)
fprintf(file, "NoMaskParams "); fprintf(file, "NoMaskParams ");
if (is_send && inst->desc) if (send && send->desc)
fprintf(file, "Desc 0x%08x ", inst->desc); fprintf(file, "Desc 0x%08x ", send->desc);
if (is_send && inst->ex_desc) if (send && send->ex_desc)
fprintf(file, "ExDesc 0x%08x ", inst->ex_desc); fprintf(file, "ExDesc 0x%08x ", send->ex_desc);
if (is_send && inst->send_ex_desc_imm) if (send && send->ex_desc_imm)
fprintf(file, "ExDescImmInst 0x%08x ", inst->offset); fprintf(file, "ExDescImmInst 0x%08x ", inst->offset);
if (inst->sched.regdist || inst->sched.mode) { if (inst->sched.regdist || inst->sched.mode) {

View file

@ -621,7 +621,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
* they're used as sources in the same instruction. We also need to add * they're used as sources in the same instruction. We also need to add
* interference here. * interference here.
*/ */
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && if (inst->opcode == SHADER_OPCODE_SEND && inst->as_send()->ex_mlen > 0 &&
inst->src[SEND_SRC_PAYLOAD1].file == VGRF && inst->src[SEND_SRC_PAYLOAD1].file == VGRF &&
inst->src[SEND_SRC_PAYLOAD2].file == VGRF && inst->src[SEND_SRC_PAYLOAD2].file == VGRF &&
inst->src[SEND_SRC_PAYLOAD1].nr != inst->src[SEND_SRC_PAYLOAD2].nr) { inst->src[SEND_SRC_PAYLOAD1].nr != inst->src[SEND_SRC_PAYLOAD2].nr) {
@ -643,7 +643,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
assert(inst->opcode == SHADER_OPCODE_SEND); assert(inst->opcode == SHADER_OPCODE_SEND);
const brw_reg srcs[2] = { const brw_reg srcs[2] = {
inst->src[SEND_SRC_PAYLOAD1], inst->src[SEND_SRC_PAYLOAD1],
inst->ex_mlen > 0 ? inst->src[SEND_SRC_PAYLOAD2] : brw_reg(), inst->as_send()->ex_mlen > 0 ? inst->src[SEND_SRC_PAYLOAD2] : brw_reg(),
}; };
const unsigned sizes[2] = { const unsigned sizes[2] = {
DIV_ROUND_UP(fs->alloc.sizes[srcs[0].nr], reg_unit(devinfo)), DIV_ROUND_UP(fs->alloc.sizes[srcs[0].nr], reg_unit(devinfo)),
@ -892,7 +892,7 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) { for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) {
++stats->fill_count; ++stats->fill_count;
brw_inst *unspill_inst; brw_send_inst *unspill_inst;
if (devinfo->verx10 >= 125) { if (devinfo->verx10 >= 125) {
/* LSC is limited to SIMD16 (SIMD32 on Xe2) load/store but we can /* LSC is limited to SIMD16 (SIMD32 on Xe2) load/store but we can
* load more using transpose messages. * load more using transpose messages.
@ -933,8 +933,8 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
unspill_inst->ex_mlen = 0; unspill_inst->ex_mlen = 0;
unspill_inst->size_written = unspill_inst->size_written =
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE; lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE;
unspill_inst->send_has_side_effects = false; unspill_inst->has_side_effects = false;
unspill_inst->send_is_volatile = true; unspill_inst->is_volatile = true;
unspill_inst->src[0] = brw_imm_ud( unspill_inst->src[0] = brw_imm_ud(
desc | desc |
@ -958,8 +958,8 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
unspill_inst->mlen = 1; unspill_inst->mlen = 1;
unspill_inst->header_size = 1; unspill_inst->header_size = 1;
unspill_inst->size_written = reg_size * REG_SIZE; unspill_inst->size_written = reg_size * REG_SIZE;
unspill_inst->send_has_side_effects = false; unspill_inst->has_side_effects = false;
unspill_inst->send_is_volatile = true; unspill_inst->is_volatile = true;
unspill_inst->sfid = BRW_SFID_HDC0; unspill_inst->sfid = BRW_SFID_HDC0;
unspill_inst->src[0] = brw_imm_ud( unspill_inst->src[0] = brw_imm_ud(
@ -992,7 +992,7 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) { for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) {
++stats->spill_count; ++stats->spill_count;
brw_inst *spill_inst; brw_send_inst *spill_inst;
if (devinfo->verx10 >= 125) { if (devinfo->verx10 >= 125) {
brw_reg offset = build_lane_offsets(bld, spill_offset, ip); brw_reg offset = build_lane_offsets(bld, spill_offset, ip);
@ -1019,8 +1019,8 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
bld.dispatch_width()); bld.dispatch_width());
spill_inst->ex_mlen = reg_size; spill_inst->ex_mlen = reg_size;
spill_inst->size_written = 0; spill_inst->size_written = 0;
spill_inst->send_has_side_effects = true; spill_inst->has_side_effects = true;
spill_inst->send_is_volatile = false; spill_inst->is_volatile = false;
spill_inst->src[0] = brw_imm_ud( spill_inst->src[0] = brw_imm_ud(
desc | desc |
@ -1045,8 +1045,8 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
spill_inst->ex_mlen = reg_size; spill_inst->ex_mlen = reg_size;
spill_inst->size_written = 0; spill_inst->size_written = 0;
spill_inst->header_size = 1; spill_inst->header_size = 1;
spill_inst->send_has_side_effects = true; spill_inst->has_side_effects = true;
spill_inst->send_is_volatile = false; spill_inst->is_volatile = false;
spill_inst->sfid = BRW_SFID_HDC0; spill_inst->sfid = BRW_SFID_HDC0;
spill_inst->src[0] = brw_imm_ud( spill_inst->src[0] = brw_imm_ud(

View file

@ -268,10 +268,12 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
break; break;
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SEND_GATHER: case SHADER_OPCODE_SEND_GATHER: {
switch (inst->sfid) { brw_send_inst *send = inst->as_send();
switch (send->sfid) {
case BRW_SFID_SAMPLER: { case BRW_SFID_SAMPLER: {
unsigned msg_type = (inst->desc >> 12) & 0x1f; unsigned msg_type = (send->desc >> 12) & 0x1f;
switch (msg_type) { switch (msg_type) {
case GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO: case GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
case GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO: case GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
@ -364,7 +366,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
break; break;
case BRW_SFID_RENDER_CACHE: case BRW_SFID_RENDER_CACHE:
switch (brw_fb_desc_msg_type(isa->devinfo, inst->desc)) { switch (brw_fb_desc_msg_type(isa->devinfo, send->desc)) {
case GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE: case GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE:
case GFX7_DATAPORT_RC_TYPED_SURFACE_READ: case GFX7_DATAPORT_RC_TYPED_SURFACE_READ:
/* See also SHADER_OPCODE_TYPED_SURFACE_READ */ /* See also SHADER_OPCODE_TYPED_SURFACE_READ */
@ -388,7 +390,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
break; break;
case BRW_SFID_HDC0: case BRW_SFID_HDC0:
switch ((inst->desc >> 14) & 0x1f) { switch ((send->desc >> 14) & 0x1f) {
case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ: case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ:
case GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ: case GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ:
case GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE: case GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE:
@ -460,7 +462,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
break; break;
case BRW_SFID_HDC1: case BRW_SFID_HDC1:
switch (brw_dp_desc_msg_type(isa->devinfo, inst->desc)) { switch (brw_dp_desc_msg_type(isa->devinfo, send->desc)) {
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE: case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ: case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
@ -500,7 +502,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
case BRW_SFID_UGM: case BRW_SFID_UGM:
case BRW_SFID_TGM: case BRW_SFID_TGM:
case BRW_SFID_SLM: case BRW_SFID_SLM:
switch (lsc_msg_desc_opcode(isa->devinfo, inst->desc)) { switch (lsc_msg_desc_opcode(isa->devinfo, send->desc)) {
case LSC_OP_LOAD: case LSC_OP_LOAD:
case LSC_OP_STORE: case LSC_OP_STORE:
case LSC_OP_LOAD_CMASK: case LSC_OP_LOAD_CMASK:
@ -555,6 +557,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
UNREACHABLE("Unknown SFID"); UNREACHABLE("Unknown SFID");
} }
break; break;
}
case BRW_OPCODE_DPAS: case BRW_OPCODE_DPAS:
switch (inst->rcount) { switch (inst->rcount) {

View file

@ -372,7 +372,7 @@ brw_shader::emit_cs_terminate()
if (devinfo->ver < 11) if (devinfo->ver < 11)
desc |= (1 << 4); /* Do not dereference URB */ desc |= (1 << 4); /* Do not dereference URB */
brw_inst *send = ubld.SEND(); brw_send_inst *send = ubld.SEND();
send->dst = reg_undef; send->dst = reg_undef;
send->src[SEND_SRC_DESC] = brw_imm_ud(desc); send->src[SEND_SRC_DESC] = brw_imm_ud(desc);
send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
@ -678,7 +678,7 @@ brw_shader::assign_curb_setup()
addr = base_addr; addr = base_addr;
} }
brw_inst *send = ubld.SEND(); brw_send_inst *send = ubld.SEND();
send->dst = retype(brw_vec8_grf(payload().num_regs + i, 0), send->dst = retype(brw_vec8_grf(payload().num_regs + i, 0),
BRW_TYPE_UD); BRW_TYPE_UD);
@ -704,7 +704,7 @@ brw_shader::assign_curb_setup()
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE; lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
assert((payload().num_regs + i + send->size_written / REG_SIZE) <= assert((payload().num_regs + i + send->size_written / REG_SIZE) <=
(payload().num_regs + prog_data->curb_read_length)); (payload().num_regs + prog_data->curb_read_length));
send->send_is_volatile = true; send->is_volatile = true;
send->src[SEND_SRC_DESC] = send->src[SEND_SRC_DESC] =
brw_imm_ud(desc | brw_message_desc(devinfo, brw_imm_ud(desc | brw_message_desc(devinfo,

View file

@ -379,6 +379,9 @@ brw_inst *brw_clone_inst(brw_shader &s, const brw_inst *inst);
* brw_inst are maintained and any previous sources still visible. Additional * brw_inst are maintained and any previous sources still visible. Additional
* sources will be uninitialized. * sources will be uninitialized.
* *
* All instructions can be transformed to an instruction of BASE kind.
* All non-BASE instructions can be transformed to an instruction of SEND kind.
*
* If new_num_srcs is UINT_MAX a default will be picked based on the opcode. * If new_num_srcs is UINT_MAX a default will be picked based on the opcode.
* Not all opcodes have a default. * Not all opcodes have a default.
*/ */

View file

@ -314,7 +314,7 @@ brw_validate(const brw_shader &s)
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_DESC])); VAL_ASSERT(is_uniform(inst->src[SEND_SRC_DESC]));
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_EX_DESC])); VAL_ASSERT(is_uniform(inst->src[SEND_SRC_EX_DESC]));
VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1].file, BAD_FILE); VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1].file, BAD_FILE);
VAL_ASSERT(inst->ex_mlen > 0 || VAL_ASSERT(inst->as_send()->ex_mlen > 0 ||
inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE); inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE);
/* Send payloads cannot be immediates nor have source modifiers */ /* Send payloads cannot be immediates nor have source modifiers */
for (unsigned i = 0; i < 2; i++) { for (unsigned i = 0; i < 2; i++) {

View file

@ -37,20 +37,24 @@ brw_workaround_emit_dummy_mov_instruction(brw_shader &s)
} }
static bool static bool
needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst) needs_dummy_fence(const intel_device_info *devinfo, const brw_inst *inst)
{ {
const brw_send_inst *send = inst->as_send();
if (!send)
return false;
/* This workaround is about making sure that any instruction writing /* This workaround is about making sure that any instruction writing
* through UGM has completed before we hit EOT. * through UGM has completed before we hit EOT.
*/ */
if (inst->sfid != BRW_SFID_UGM) if (send->sfid != BRW_SFID_UGM)
return false; return false;
/* Any UGM, non-Scratch-surface Stores (not including Atomic) messages, /* Any UGM, non-Scratch-surface Stores (not including Atomic) messages,
* where the L1-cache override is NOT among {WB, WS, WT} * where the L1-cache override is NOT among {WB, WS, WT}
*/ */
enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, inst->desc); enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, send->desc);
if (lsc_opcode_is_store(opcode)) { if (lsc_opcode_is_store(opcode)) {
switch (lsc_msg_desc_cache_ctrl(devinfo, inst->desc)) { switch (lsc_msg_desc_cache_ctrl(devinfo, send->desc)) {
case LSC_CACHE_STORE_L1STATE_L3MOCS: case LSC_CACHE_STORE_L1STATE_L3MOCS:
case LSC_CACHE_STORE_L1WB_L3WB: case LSC_CACHE_STORE_L1WB_L3WB:
case LSC_CACHE_STORE_L1S_L3UC: case LSC_CACHE_STORE_L1S_L3UC:
@ -65,7 +69,7 @@ needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst)
} }
/* Any UGM Atomic message WITHOUT return value */ /* Any UGM Atomic message WITHOUT return value */
if (lsc_opcode_is_atomic(opcode) && inst->dst.is_null()) if (lsc_opcode_is_atomic(opcode) && send->dst.is_null())
return true; return true;
return false; return false;
@ -106,7 +110,7 @@ brw_workaround_memory_fence_before_eot(brw_shader &s)
const brw_builder ubld = brw_builder(inst).uniform(); const brw_builder ubld = brw_builder(inst).uniform();
brw_reg dst = ubld.vgrf(BRW_TYPE_UD); brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
brw_inst *dummy_fence = ubld.SEND(); brw_send_inst *dummy_fence = ubld.SEND();
dummy_fence->src[SEND_SRC_DESC] = brw_imm_ud(0); dummy_fence->src[SEND_SRC_DESC] = brw_imm_ud(0);
dummy_fence->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); dummy_fence->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);

View file

@ -44,7 +44,7 @@ emit_SEND(const brw_builder &bld, const brw_reg &dst,
{ {
brw_reg uniform_desc = component(desc, 0); brw_reg uniform_desc = component(desc, 0);
brw_inst *send = bld.SEND(); brw_send_inst *send = bld.SEND();
send->dst = dst; send->dst = dst;
send->src[SEND_SRC_DESC] = uniform_desc; send->src[SEND_SRC_DESC] = uniform_desc;