intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
Jason Ekstrand 2018-11-15 15:17:06 -06:00 committed by Jason Ekstrand
parent d6a6e10390
commit 8babaa84e8
4 changed files with 255 additions and 19 deletions

View file

@ -811,6 +811,17 @@ brw_send_indirect_message(struct brw_codegen *p,
struct brw_reg desc,
unsigned desc_imm);
void
brw_send_indirect_split_message(struct brw_codegen *p,
unsigned sfid,
struct brw_reg dst,
struct brw_reg payload0,
struct brw_reg payload1,
struct brw_reg desc,
unsigned desc_imm,
struct brw_reg ex_desc,
unsigned ex_desc_imm);
void brw_ff_sync(struct brw_codegen *p,
struct brw_reg dest,
unsigned msg_reg_nr,

View file

@ -96,7 +96,19 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
gen7_convert_mrf_to_grf(p, &dest);
{
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
assert(dest.subnr % 16 == 0);
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
dest.vstride == dest.width + 1);
assert(!dest.negate && !dest.abs);
brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
} else {
brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
@ -177,8 +189,11 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
gen7_convert_mrf_to_grf(p, &reg);
if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
if (devinfo->gen >= 6 &&
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
/* Any source modifiers or regions will be ignored, since this just
* identifies the MRF/GRF to start reading the message contents from.
* Check for some likely failures.
@ -188,7 +203,17 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
}
{
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
assert(reg.file == BRW_GENERAL_REGISTER_FILE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr % 16 == 0);
assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
reg.vstride == reg.width + 1);
assert(!reg.negate && !reg.abs);
brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
} else {
brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
brw_inst_set_src0_abs(devinfo, inst, reg.abs);
brw_inst_set_src0_negate(devinfo, inst, reg.negate);
@ -282,7 +307,18 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < 128);
{
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr == 0);
assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
reg.vstride == reg.width + 1);
assert(!reg.negate && !reg.abs);
brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
} else {
/* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
*
* "Accumulator registers may be accessed explicitly as src0
@ -2484,6 +2520,101 @@ brw_send_indirect_message(struct brw_codegen *p,
brw_inst_set_sfid(devinfo, send, sfid);
}
void
brw_send_indirect_split_message(struct brw_codegen *p,
unsigned sfid,
struct brw_reg dst,
struct brw_reg payload0,
struct brw_reg payload1,
struct brw_reg desc,
unsigned desc_imm,
struct brw_reg ex_desc,
unsigned ex_desc_imm)
{
const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *send;
dst = retype(dst, BRW_REGISTER_TYPE_UW);
assert(desc.type == BRW_REGISTER_TYPE_UD);
if (desc.file == BRW_IMMEDIATE_VALUE) {
desc.ud |= desc_imm;
} else {
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
/* Load the indirect descriptor to an address register using OR so the
* caller can specify additional descriptor bits with the desc_imm
* immediate.
*/
brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
brw_pop_insn_state(p);
desc = addr;
}
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
ex_desc.ud |= ex_desc_imm;
} else {
struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
/* Load the indirect extended descriptor to an address register using OR
* so the caller can specify additional descriptor bits with the
* desc_imm immediate.
*
* Even though the instruction dispatcher always pulls the SFID from the
* instruction itself, the extended descriptor sent to the actual unit
* gets the SFID from the extended descriptor which comes from the
* address register. If we don't OR it in, the external unit gets
* confused and hangs the GPU.
*/
brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid));
brw_pop_insn_state(p);
ex_desc = addr;
}
send = next_insn(p, BRW_OPCODE_SENDS);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
if (desc.file == BRW_IMMEDIATE_VALUE) {
brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
brw_inst_set_send_desc(devinfo, send, desc.ud);
} else {
assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
assert(desc.nr == BRW_ARF_ADDRESS);
assert(desc.subnr == 0);
brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
}
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud);
} else {
assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
assert(ex_desc.nr == BRW_ARF_ADDRESS);
assert((ex_desc.subnr & 0x3) == 0);
brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
}
brw_inst_set_sfid(devinfo, send, sfid);
}
static void
brw_send_indirect_surface_message(struct brw_codegen *p,
unsigned sfid,

View file

@ -102,6 +102,18 @@ inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst)
}
}
static bool
inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst)
{
switch (brw_inst_opcode(devinfo, inst)) {
case BRW_OPCODE_SENDS:
case BRW_OPCODE_SENDSC:
return true;
default:
return false;
}
}
static unsigned
signed_type(unsigned type)
{
@ -248,6 +260,12 @@ sources_not_null(const struct gen_device_info *devinfo,
if (num_sources == 3)
return (struct string){};
/* Nothing to test. Split sends can only encode a file in sources that are
* allowed to be NULL.
*/
if (inst_is_split_send(devinfo, inst))
return (struct string){};
if (num_sources >= 1)
ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
@ -263,8 +281,41 @@ send_restrictions(const struct gen_device_info *devinfo,
{
struct string error_msg = { .str = NULL, .len = 0 };
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
if (inst_is_split_send(devinfo, inst)) {
ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
"src1 of split send must be a GRF or NULL");
ERROR_IF(brw_inst_eot(devinfo, inst) &&
brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
"send with EOT must use g112-g127");
ERROR_IF(brw_inst_eot(devinfo, inst) &&
brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
"send with EOT must use g112-g127");
if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
/* Assume minimums if we don't know */
unsigned mlen = 1;
if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
const uint32_t desc = brw_inst_send_desc(devinfo, inst);
mlen = brw_message_desc_mlen(devinfo, desc);
}
unsigned ex_mlen = 1;
if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst);
ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
}
const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
ERROR_IF((src0_reg_nr <= src1_reg_nr &&
src1_reg_nr < src0_reg_nr + mlen) ||
(src1_reg_nr <= src0_reg_nr &&
src0_reg_nr < src1_reg_nr + ex_mlen),
"split send payloads must not overlap");
}
} else if (inst_is_send(devinfo, inst)) {
ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
"send must use direct addressing");
@ -534,6 +585,12 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
if (num_sources == 3)
return (struct string){};
/* Split sends don't have the bits in the instruction to encode regions so
* there's nothing to check.
*/
if (inst_is_split_send(devinfo, inst))
return (struct string){};
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
@ -1124,6 +1181,10 @@ special_requirements_for_handling_double_precision_data_types(
if (num_sources == 3 || num_sources == 0)
return (struct string){};
/* Split sends don't have types so there's no doubles there. */
if (inst_is_split_send(devinfo, inst))
return (struct string){};
enum brw_reg_type exec_type = execution_type(devinfo, inst);
unsigned exec_type_size = brw_reg_type_to_size(exec_type);

View file

@ -455,6 +455,19 @@ FJ(gen4_jump_count, 111, 96, devinfo->gen < 6)
FC(gen4_pop_count, 115, 112, devinfo->gen < 6)
/** @} */
/**
* SEND instructions:
* @{
*/
FC(send_ex_desc_ia_subreg_nr, 82, 80, devinfo->gen >= 9)
FC(send_src0_address_mode, 79, 79, devinfo->gen >= 9)
FC(send_sel_reg32_desc, 77, 77, devinfo->gen >= 9)
FC(send_sel_reg32_ex_desc, 61, 61, devinfo->gen >= 9)
FC(send_src1_reg_nr, 51, 44, devinfo->gen >= 9)
FC(send_src1_reg_file, 36, 36, devinfo->gen >= 9)
FC(send_dst_reg_file, 35, 35, devinfo->gen >= 9)
/** @} */
/* Message descriptor bits */
#define MD(x) ((x) + 96)
@ -513,11 +526,21 @@ brw_inst_set_send_ex_desc(const struct gen_device_info *devinfo,
brw_inst *inst, uint32_t value)
{
assert(devinfo->gen >= 9);
brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
assert(GET_BITS(value, 15, 0) == 0);
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
assert(GET_BITS(value, 15, 0) == 0);
} else {
assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
brw_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16));
assert(GET_BITS(value, 15, 10) == 0);
brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6));
assert(GET_BITS(value, 5, 0) == 0);
}
}
/**
@ -530,10 +553,18 @@ brw_inst_send_ex_desc(const struct gen_device_info *devinfo,
const brw_inst *inst)
{
assert(devinfo->gen >= 9);
return (brw_inst_bits(inst, 94, 91) << 28 |
brw_inst_bits(inst, 88, 85) << 24 |
brw_inst_bits(inst, 83, 80) << 20 |
brw_inst_bits(inst, 67, 64) << 16);
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
return (brw_inst_bits(inst, 94, 91) << 28 |
brw_inst_bits(inst, 88, 85) << 24 |
brw_inst_bits(inst, 83, 80) << 20 |
brw_inst_bits(inst, 67, 64) << 16);
} else {
assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
return (brw_inst_bits(inst, 95, 80) << 16 |
brw_inst_bits(inst, 67, 64) << 6);
}
}
/**
@ -956,9 +987,11 @@ brw_inst_##reg##_ia16_addr_imm(const struct gen_device_info *devinfo, \
* Compared to Align1, these are missing the low 4 bits.
* -Gen 4- ----Gen8----
*/
BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52)
BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52)
BRW_IA16_ADDR_IMM(send_src0, -1, -1, 78, 72, 68)
BRW_IA16_ADDR_IMM(send_dst, -1, -1, 62, 56, 52)
/**
* Fetch a set of contiguous bits from the instruction.