mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 19:50:11 +01:00
brw: Enumerate SHADER_OPCODE_SEND sources and standardize how many
This introduces enums for SHADER_OPCODE_SEND[_GATHER] sources, similar similar to what we've done for most of the newer logical opcodes. This allows us to use actual names for sources rather than remembering their order, or leaving ourselves comments like /* ex_desc */ all over. It will also make it easier to add or reorder sources in the future. While we're at it, we also standardize on the number of sources. Previously, we allowed SHADER_OPCODE_SEND to have either 3 (monosend) or 4 (split send) sources, but this is mostly for haphazard historical reasons. We now specify all sources every time, eliminating the need for careful inst->source checks before accessing the last source. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34040>
This commit is contained in:
parent
00d38b980d
commit
47fe9d28e7
14 changed files with 268 additions and 206 deletions
|
|
@ -640,22 +640,23 @@ brw_emit_repclear_shader(brw_shader &s)
|
|||
bld.uniform().MOV(component(header, 2), brw_imm_ud(i));
|
||||
|
||||
write = bld.emit(SHADER_OPCODE_SEND);
|
||||
write->resize_sources(3);
|
||||
write->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
/* We can use a headerless message for the first render target */
|
||||
write->header_size = i == 0 ? 0 : 2;
|
||||
write->mlen = 1 + write->header_size;
|
||||
|
||||
write->sfid = BRW_SFID_RENDER_CACHE;
|
||||
write->src[0] = brw_imm_ud(
|
||||
write->src[SEND_SRC_DESC] = brw_imm_ud(
|
||||
brw_fb_write_desc(
|
||||
s.devinfo, i,
|
||||
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED,
|
||||
i == key->nr_color_regions - 1, false) |
|
||||
brw_message_desc(s.devinfo, write->mlen,
|
||||
0 /* rlen */, write->header_size));
|
||||
write->src[1] = brw_imm_ud(0);
|
||||
write->src[2] = i == 0 ? color_output : header;
|
||||
write->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
write->src[SEND_SRC_PAYLOAD1] = i == 0 ? color_output : header;
|
||||
write->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
write->check_tdr = true;
|
||||
write->send_has_side_effects = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -567,6 +567,26 @@ enum opcode {
|
|||
SHADER_OPCODE_LOAD_REG,
|
||||
};
|
||||
|
||||
enum send_srcs {
|
||||
/** The 32-bit message descriptor (can be a register) */
|
||||
SEND_SRC_DESC,
|
||||
/** The 32-bit extended message descriptor (can be a register) */
|
||||
SEND_SRC_EX_DESC,
|
||||
/** The leading register for the first SEND payload */
|
||||
SEND_SRC_PAYLOAD1,
|
||||
/** The leading register for the second split-SEND payload */
|
||||
SEND_SRC_PAYLOAD2,
|
||||
|
||||
SEND_NUM_SRCS
|
||||
};
|
||||
|
||||
enum send_gather_srcs {
|
||||
SEND_GATHER_SRC_DESC,
|
||||
SEND_GATHER_SRC_EX_DESC,
|
||||
SEND_GATHER_SRC_SCALAR,
|
||||
SEND_GATHER_SRC_PAYLOAD
|
||||
};
|
||||
|
||||
enum fb_write_logical_srcs {
|
||||
FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */
|
||||
FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */
|
||||
|
|
|
|||
|
|
@ -4937,10 +4937,13 @@ emit_rt_lsc_fence(const brw_builder &bld,
|
|||
|
||||
const brw_builder ubld = bld.exec_all().group(8, 0);
|
||||
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||
brw_inst *send = ubld.emit(SHADER_OPCODE_SEND, tmp,
|
||||
brw_imm_ud(0) /* desc */,
|
||||
brw_imm_ud(0) /* ex_desc */,
|
||||
brw_vec8_grf(0, 0) /* payload */);
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_EX_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_PAYLOAD1] = brw_vec8_grf(0, 0),
|
||||
[SEND_SRC_PAYLOAD2] = brw_reg(),
|
||||
};
|
||||
brw_inst *send = ubld.emit(SHADER_OPCODE_SEND, tmp, srcs, SEND_NUM_SRCS);
|
||||
send->sfid = BRW_SFID_UGM;
|
||||
send->desc = lsc_fence_msg_desc(devinfo, scope, flush_type, true);
|
||||
send->mlen = reg_unit(devinfo); /* g0 header */
|
||||
|
|
|
|||
|
|
@ -1143,9 +1143,15 @@ brw_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
generate_send(inst, dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC],
|
||||
src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
generate_send(inst, dst, src[0], src[1], src[2],
|
||||
inst->ex_mlen > 0 ? src[3] : brw_null_reg());
|
||||
generate_send(inst, dst,
|
||||
src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC],
|
||||
src[SEND_GATHER_SRC_SCALAR], brw_null_reg());
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ brw_inst::is_control_source(unsigned arg) const
|
|||
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return arg == 0 || arg == 1;
|
||||
return arg < SEND_SRC_PAYLOAD1;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
|
|
@ -246,10 +246,10 @@ brw_inst::is_payload(unsigned arg) const
|
|||
return arg == 0;
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
return arg == 2 || arg == 3;
|
||||
return arg >= SEND_SRC_PAYLOAD1;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return arg >= 2;
|
||||
return arg >= SEND_GATHER_SRC_SCALAR;
|
||||
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -521,15 +521,15 @@ brw_inst::size_read(const struct intel_device_info *devinfo, int arg) const
|
|||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_SEND:
|
||||
if (arg == 2) {
|
||||
if (arg == SEND_SRC_PAYLOAD1) {
|
||||
return mlen * REG_SIZE;
|
||||
} else if (arg == 3) {
|
||||
} else if (arg == SEND_SRC_PAYLOAD2) {
|
||||
return ex_mlen * REG_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
if (arg >= 3) {
|
||||
if (arg >= SEND_GATHER_SRC_PAYLOAD) {
|
||||
/* SEND_GATHER is Xe3+, so no need to pass devinfo around. */
|
||||
const unsigned reg_unit = 2;
|
||||
return REG_SIZE * reg_unit;
|
||||
|
|
|
|||
|
|
@ -522,9 +522,12 @@ brw_lower_sends_overlapping_payload(brw_shader &s)
|
|||
|
||||
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
|
||||
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
|
||||
regions_overlap(inst->src[2], inst->mlen * REG_SIZE,
|
||||
inst->src[3], inst->ex_mlen * REG_SIZE)) {
|
||||
const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3;
|
||||
regions_overlap(inst->src[SEND_SRC_PAYLOAD1],
|
||||
inst->mlen * REG_SIZE,
|
||||
inst->src[SEND_SRC_PAYLOAD2],
|
||||
inst->ex_mlen * REG_SIZE)) {
|
||||
const unsigned arg = inst->mlen < inst->ex_mlen ?
|
||||
SEND_SRC_PAYLOAD1 : SEND_SRC_PAYLOAD2;
|
||||
const unsigned len = MIN2(inst->mlen, inst->ex_mlen);
|
||||
|
||||
brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD);
|
||||
|
|
|
|||
|
|
@ -65,12 +65,12 @@ lower_urb_read_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
inst->ex_mlen = 0;
|
||||
inst->send_is_volatile = true;
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = brw_reg();
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -123,13 +123,12 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
|
|||
inst->send_has_side_effects = true;
|
||||
inst->send_is_volatile = false;
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = brw_reg();
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -181,12 +180,12 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
inst->ex_mlen = 0;
|
||||
inst->send_has_side_effects = true;
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = brw_reg();
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -253,13 +252,12 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
|
|||
inst->send_has_side_effects = true;
|
||||
inst->send_is_volatile = false;
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = payload2;
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload2;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -492,11 +490,12 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
inst->ex_desc = ex_desc;
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->resize_sources(3);
|
||||
inst->sfid = BRW_SFID_RENDER_CACHE;
|
||||
inst->src[0] = desc;
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[2] = payload;
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->src[SEND_SRC_DESC] = desc;
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
inst->mlen = regs_written(load);
|
||||
inst->ex_mlen = 0;
|
||||
inst->header_size = header_size;
|
||||
|
|
@ -556,12 +555,12 @@ lower_fb_read_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
component(header, 0),
|
||||
brw_imm_ud(~INTEL_MASK(14, 11)));
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[2] = header;
|
||||
inst->src[3] = brw_reg();
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = header;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
inst->mlen = length;
|
||||
inst->header_size = length;
|
||||
inst->sfid = BRW_SFID_RENDER_CACHE;
|
||||
|
|
@ -1152,6 +1151,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
|
||||
/* Generate the SEND. */
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->mlen = mlen;
|
||||
inst->header_size = header_size;
|
||||
inst->sfid = BRW_SFID_SAMPLER;
|
||||
|
|
@ -1165,8 +1165,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
msg_type,
|
||||
simd_mode,
|
||||
sampler_ret_type);
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
} else if (surface_handle.file != BAD_FILE) {
|
||||
/* Bindless surface */
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
|
|
@ -1180,18 +1180,18 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
* header so we can leave the portion in the message descriptor 0.
|
||||
*/
|
||||
if (sampler_handle.file != BAD_FILE || sampler.file == IMM) {
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
} else {
|
||||
const brw_builder ubld = bld.uniform();
|
||||
brw_reg desc = ubld.vgrf(BRW_TYPE_UD);
|
||||
ubld.SHL(desc, sampler, brw_imm_ud(8));
|
||||
inst->src[0] = component(desc, 0);
|
||||
inst->src[SEND_SRC_DESC] = component(desc, 0);
|
||||
}
|
||||
|
||||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[1] = retype(surface_handle, BRW_TYPE_UD);
|
||||
inst->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD);
|
||||
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||
} else {
|
||||
/* Immediate portion of the descriptor */
|
||||
|
|
@ -1218,14 +1218,14 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
}
|
||||
ubld.AND(desc, desc, brw_imm_ud(0xfff));
|
||||
|
||||
inst->src[0] = component(desc, 0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[SEND_SRC_DESC] = component(desc, 0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
}
|
||||
|
||||
inst->ex_desc = 0;
|
||||
|
||||
inst->src[2] = src_payload;
|
||||
inst->resize_sources(3);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = src_payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
|
||||
/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
|
||||
assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE * reg_unit(devinfo));
|
||||
|
|
@ -1374,25 +1374,25 @@ setup_surface_descriptors(const brw_builder &bld, brw_inst *inst, uint32_t desc,
|
|||
|
||||
if (surface.file == IMM) {
|
||||
inst->desc = desc | (surface.ud & 0xff);
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
} else if (surface_handle.file != BAD_FILE) {
|
||||
/* Bindless surface */
|
||||
inst->desc = desc | GFX9_BTI_BINDLESS;
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
|
||||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[1] = retype(surface_handle, BRW_TYPE_UD);
|
||||
inst->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD);
|
||||
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||
} else {
|
||||
inst->desc = desc;
|
||||
const brw_builder ubld = bld.uniform();
|
||||
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||
ubld.AND(tmp, surface, brw_imm_ud(0xff));
|
||||
inst->src[0] = component(tmp, 0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[SEND_SRC_DESC] = component(tmp, 0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1406,8 +1406,8 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst,
|
|||
|
||||
assert(base_offset == 0 || devinfo->ver >= 20);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
|
||||
enum lsc_addr_surface_type surf_type = lsc_msg_desc_addr_type(devinfo, desc);
|
||||
|
||||
|
|
@ -1427,7 +1427,7 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst,
|
|||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[1] = retype(surface, BRW_TYPE_UD);
|
||||
inst->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD);
|
||||
/* Gfx20+ assumes ExBSO with UGM */
|
||||
if (devinfo->ver >= 20 && inst->sfid == BRW_SFID_UGM)
|
||||
inst->send_ex_bso = true;
|
||||
|
|
@ -1452,18 +1452,19 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst,
|
|||
case LSC_ADDR_SURFTYPE_BTI:
|
||||
assert(surface.file != BAD_FILE);
|
||||
if (surface.file == IMM) {
|
||||
inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud,
|
||||
base_offset_bits));
|
||||
inst->src[SEND_SRC_EX_DESC] =
|
||||
brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud, base_offset_bits));
|
||||
} else {
|
||||
assert(base_offset == 0);
|
||||
const brw_builder ubld = bld.uniform();
|
||||
brw_reg tmp = ubld.SHL(surface, brw_imm_ud(24));
|
||||
inst->src[1] = component(tmp, 0);
|
||||
inst->src[SEND_SRC_EX_DESC] = component(tmp, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case LSC_ADDR_SURFTYPE_FLAT:
|
||||
inst->src[1] = brw_imm_ud(lsc_flat_ex_desc(devinfo, base_offset_bits));
|
||||
inst->src[SEND_SRC_EX_DESC] =
|
||||
brw_imm_ud(lsc_flat_ex_desc(devinfo, base_offset_bits));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -1656,7 +1657,6 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
(1 << components) - 1 : components,
|
||||
transpose, cache_mode);
|
||||
|
||||
/* Set up extended descriptors, fills src[0] and src[1]. */
|
||||
setup_lsc_surface_descriptors(bld, inst, inst->desc, binding, base_offset);
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
|
|
@ -1667,11 +1667,11 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
inst->send_has_side_effects = has_side_effects;
|
||||
inst->send_is_volatile = !has_side_effects || volatile_access;
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
/* Finally, the payload */
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = payload2;
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload2;
|
||||
}
|
||||
|
||||
static brw_reg
|
||||
|
|
@ -1956,13 +1956,13 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
inst->exec_size = components > 8 ? 16 : 8;
|
||||
}
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
/* Set up descriptors */
|
||||
switch (binding_type) {
|
||||
case LSC_ADDR_SURFTYPE_FLAT:
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
break;
|
||||
case LSC_ADDR_SURFTYPE_BSS:
|
||||
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||
|
|
@ -1973,19 +1973,19 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = binding;
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = binding;
|
||||
break;
|
||||
case LSC_ADDR_SURFTYPE_BTI:
|
||||
if (binding.file == IMM) {
|
||||
desc |= binding.ud & 0xff;
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
} else {
|
||||
brw_reg tmp = ubld1.vgrf(BRW_TYPE_UD);
|
||||
ubld1.AND(tmp, binding, brw_imm_ud(0xff));
|
||||
inst->src[0] = component(tmp, 0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = component(tmp, 0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
@ -1995,8 +1995,8 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
inst->desc = desc;
|
||||
|
||||
/* Finally, the payloads */
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = payload2;
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload2;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2026,14 +2026,16 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld,
|
|||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->sfid = BRW_SFID_UGM;
|
||||
inst->resize_sources(3);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||
compiler->extended_bindless_surface_offset;
|
||||
|
||||
assert(!compiler->indirect_ubos_use_sampler);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[2] = ubo_offset; /* payload */
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = ubo_offset;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
|
||||
if (alignment >= 4) {
|
||||
inst->desc =
|
||||
|
|
@ -2077,8 +2079,8 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld,
|
|||
bld.emit(*inst);
|
||||
|
||||
/* Offset the source */
|
||||
inst->src[2] = bld.vgrf(BRW_TYPE_UD);
|
||||
bld.ADD(inst->src[2], ubo_offset, brw_imm_ud(c * 4));
|
||||
inst->src[SEND_SRC_PAYLOAD1] = bld.vgrf(BRW_TYPE_UD);
|
||||
bld.ADD(inst->src[SEND_SRC_PAYLOAD1], ubo_offset, brw_imm_ud(c * 4));
|
||||
|
||||
/* Offset the destination */
|
||||
inst->dst = offset(inst->dst, bld, 1);
|
||||
|
|
@ -2108,10 +2110,11 @@ lower_varying_pull_constant_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = inst->exec_size / 8;
|
||||
inst->resize_sources(3);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
/* src[0] & src[1] are filled by setup_surface_descriptors() */
|
||||
inst->src[2] = ubo_offset; /* payload */
|
||||
/* src[SEND_SRC_DESC/EX_DESC] are filled by setup_surface_descriptors() */
|
||||
inst->src[SEND_SRC_PAYLOAD1] = ubo_offset;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
|
||||
if (compiler->indirect_ubos_use_sampler) {
|
||||
const unsigned simd_mode =
|
||||
|
|
@ -2155,8 +2158,8 @@ lower_varying_pull_constant_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
bld.emit(*inst);
|
||||
|
||||
/* Offset the source */
|
||||
inst->src[2] = bld.vgrf(BRW_TYPE_UD);
|
||||
bld.ADD(inst->src[2], ubo_offset, brw_imm_ud(c * 4));
|
||||
inst->src[SEND_SRC_PAYLOAD1] = bld.vgrf(BRW_TYPE_UD);
|
||||
bld.ADD(inst->src[SEND_SRC_PAYLOAD1], ubo_offset, brw_imm_ud(c * 4));
|
||||
|
||||
/* Offset the destination */
|
||||
inst->dst = offset(inst->dst, bld, 1);
|
||||
|
|
@ -2284,10 +2287,12 @@ lower_interpolator_logical_send(const brw_builder &bld, brw_inst *inst,
|
|||
inst->send_has_side_effects = false;
|
||||
inst->send_is_volatile = false;
|
||||
|
||||
inst->resize_sources(3);
|
||||
inst->src[0] = component(desc, 0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[2] = payload;
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[SEND_SRC_DESC] = component(desc, 0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2363,11 +2368,13 @@ lower_btd_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
inst->sfid = BRW_SFID_BINDLESS_THREAD_DISPATCH;
|
||||
inst->desc = brw_btd_spawn_desc(devinfo, inst->exec_size,
|
||||
GEN_RT_BTD_MESSAGE_SPAWN);
|
||||
inst->resize_sources(4);
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[2] = header;
|
||||
inst->src[3] = payload;
|
||||
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = header;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2468,11 +2475,13 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
/* Set up SFID and descriptors */
|
||||
inst->sfid = BRW_SFID_RAY_TRACE_ACCELERATOR;
|
||||
inst->desc = brw_rt_trace_ray_desc(devinfo, inst->exec_size);
|
||||
inst->resize_sources(4);
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[2] = header;
|
||||
inst->src[3] = payload;
|
||||
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = header;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2490,12 +2499,13 @@ lower_get_buffer_size(const brw_builder &bld, brw_inst *inst)
|
|||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = inst->exec_size / 8;
|
||||
inst->resize_sources(3);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->ex_mlen = 0;
|
||||
inst->ex_desc = 0;
|
||||
|
||||
/* src[0] & src[1] are filled by setup_surface_descriptors() */
|
||||
inst->src[2] = lod;
|
||||
/* src[SEND_SRC_DESC/EX_DESC] are filled by setup_surface_descriptors() */
|
||||
inst->src[SEND_SRC_PAYLOAD1] = lod;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
|
||||
const uint32_t return_format = GFX8_SAMPLER_RETURN_FORMAT_32BITS;
|
||||
|
||||
|
|
@ -2524,14 +2534,14 @@ lower_lsc_memory_fence_and_interlock(const brw_builder &bld, brw_inst *inst)
|
|||
assert(inst->size_written == reg_unit(devinfo) * REG_SIZE);
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->check_tdr = interlock;
|
||||
inst->send_has_side_effects = true;
|
||||
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[2] = retype(vec1(header), BRW_TYPE_UD);
|
||||
inst->src[3] = brw_reg();
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = retype(vec1(header), BRW_TYPE_UD);
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
inst->mlen = reg_unit(devinfo);
|
||||
inst->ex_mlen = 0;
|
||||
|
||||
|
|
@ -2600,14 +2610,14 @@ lower_hdc_memory_fence_and_interlock(const brw_builder &bld, brw_inst *inst)
|
|||
assert(inst->size_written == (commit_enable ? REG_SIZE : 0));
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->check_tdr = interlock;
|
||||
inst->send_has_side_effects = true;
|
||||
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
inst->src[2] = retype(vec1(header), BRW_TYPE_UD);
|
||||
inst->src[3] = brw_reg();
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = retype(vec1(header), BRW_TYPE_UD);
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
inst->mlen = reg_unit(devinfo);
|
||||
inst->ex_mlen = 0;
|
||||
inst->header_size = 1;
|
||||
|
|
@ -2804,11 +2814,12 @@ brw_lower_uniform_pull_constant_loads(brw_shader &s)
|
|||
|
||||
/* Finally, the payload */
|
||||
|
||||
inst->resize_sources(3);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
setup_lsc_surface_descriptors(ubld, inst, inst->desc,
|
||||
surface.file != BAD_FILE ?
|
||||
surface : surface_handle, 0);
|
||||
inst->src[2] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg();
|
||||
|
||||
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||||
BRW_DEPENDENCY_VARIABLES);
|
||||
|
|
@ -2830,12 +2841,12 @@ brw_lower_uniform_pull_constant_loads(brw_shader &s)
|
|||
brw_dp_oword_block_rw_desc(devinfo, true /* align_16B */,
|
||||
size_B.ud / 4, false /* write */);
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
setup_surface_descriptors(ubld, inst, desc, surface, surface_handle);
|
||||
|
||||
inst->src[2] = header;
|
||||
inst->src[3] = brw_reg(); /* unused for reads */
|
||||
inst->src[SEND_SRC_PAYLOAD1] = header;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); /* unused for reads */
|
||||
|
||||
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||||
BRW_DEPENDENCY_VARIABLES);
|
||||
|
|
@ -2871,21 +2882,21 @@ brw_lower_send_descriptors(brw_shader &s)
|
|||
uint32_t desc_imm = inst->desc |
|
||||
brw_message_desc(devinfo, mlen, rlen, inst->header_size);
|
||||
|
||||
assert(inst->src[0].file != BAD_FILE);
|
||||
assert(inst->src[1].file != BAD_FILE);
|
||||
assert(inst->src[SEND_SRC_DESC].file != BAD_FILE);
|
||||
assert(inst->src[SEND_SRC_EX_DESC].file != BAD_FILE);
|
||||
|
||||
brw_reg desc = inst->src[0];
|
||||
brw_reg desc = inst->src[SEND_SRC_DESC];
|
||||
if (desc.file == IMM) {
|
||||
inst->src[0] = brw_imm_ud(desc.ud | desc_imm);
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(desc.ud | desc_imm);
|
||||
} else {
|
||||
brw_reg addr_reg = ubld.vaddr(BRW_TYPE_UD,
|
||||
BRW_ADDRESS_SUBREG_INDIRECT_DESC);
|
||||
ubld.OR(addr_reg, desc, brw_imm_ud(desc_imm));
|
||||
inst->src[0] = addr_reg;
|
||||
inst->src[SEND_SRC_DESC] = addr_reg;
|
||||
}
|
||||
|
||||
/* Extended descriptor */
|
||||
brw_reg ex_desc = inst->src[1];
|
||||
brw_reg ex_desc = inst->src[SEND_SRC_EX_DESC];
|
||||
uint32_t ex_desc_imm = inst->ex_desc |
|
||||
brw_message_ex_desc(devinfo, inst->ex_mlen);
|
||||
|
||||
|
|
@ -2919,9 +2930,9 @@ brw_lower_send_descriptors(brw_shader &s)
|
|||
ubld.MOV(addr_reg, ex_desc);
|
||||
else
|
||||
ubld.OR(addr_reg, ex_desc, brw_imm_ud(ex_desc_imm));
|
||||
inst->src[1] = addr_reg;
|
||||
inst->src[SEND_SRC_EX_DESC] = addr_reg;
|
||||
} else {
|
||||
inst->src[1] = brw_imm_ud(ex_desc_imm);
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(ex_desc_imm);
|
||||
}
|
||||
|
||||
progress = true;
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ brw_opt_split_sends(brw_shader &s)
|
|||
foreach_block_and_inst(block, brw_inst, send, s.cfg) {
|
||||
if (send->opcode != SHADER_OPCODE_SEND ||
|
||||
send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 ||
|
||||
send->src[2].file != VGRF)
|
||||
send->src[SEND_SRC_PAYLOAD1].file != VGRF)
|
||||
continue;
|
||||
|
||||
/* Currently don't split sends that reuse a previously used payload. */
|
||||
|
|
@ -350,7 +350,8 @@ brw_opt_split_sends(brw_shader &s)
|
|||
if (lp->is_head_sentinel() || lp->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
|
||||
continue;
|
||||
|
||||
if (lp->dst.file != send->src[2].file || lp->dst.nr != send->src[2].nr)
|
||||
if (lp->dst.file != send->src[SEND_SRC_PAYLOAD1].file ||
|
||||
lp->dst.nr != send->src[SEND_SRC_PAYLOAD1].nr)
|
||||
continue;
|
||||
|
||||
/* Split either after the header (if present), or when consecutive
|
||||
|
|
@ -389,9 +390,9 @@ brw_opt_split_sends(brw_shader &s)
|
|||
lp1->dst = retype(brw_allocate_vgrf_units(s, lp1->size_written / REG_SIZE), lp1->dst.type);
|
||||
lp2->dst = retype(brw_allocate_vgrf_units(s, lp2->size_written / REG_SIZE), lp2->dst.type);
|
||||
|
||||
send->resize_sources(4);
|
||||
send->src[2] = lp1->dst;
|
||||
send->src[3] = lp2->dst;
|
||||
send->resize_sources(SEND_NUM_SRCS);
|
||||
send->src[SEND_SRC_PAYLOAD1] = lp1->dst;
|
||||
send->src[SEND_SRC_PAYLOAD2] = lp2->dst;
|
||||
send->ex_mlen = lp2->size_written / REG_SIZE;
|
||||
send->mlen -= send->ex_mlen;
|
||||
|
||||
|
|
@ -625,8 +626,8 @@ brw_opt_send_to_send_gather(brw_shader &s)
|
|||
brw_reg src;
|
||||
unsigned phys_len;
|
||||
} payload[2] = {
|
||||
{ inst->src[2], inst->mlen / unit },
|
||||
{ inst->src[3], inst->ex_mlen / unit },
|
||||
{ inst->src[SEND_SRC_PAYLOAD1], inst->mlen / unit },
|
||||
{ inst->src[SEND_SRC_PAYLOAD2], inst->ex_mlen / unit },
|
||||
};
|
||||
|
||||
const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len;
|
||||
|
|
@ -640,11 +641,11 @@ brw_opt_send_to_send_gather(brw_shader &s)
|
|||
continue;
|
||||
}
|
||||
|
||||
inst->resize_sources(3 + num_payload_sources);
|
||||
inst->resize_sources(SEND_GATHER_SRC_PAYLOAD + num_payload_sources);
|
||||
/* Sources 0 and 1 remain the same. Source 2 will be filled
|
||||
* after register allocation.
|
||||
*/
|
||||
inst->src[2] = {};
|
||||
inst->src[SEND_GATHER_SRC_SCALAR] = {};
|
||||
|
||||
int idx = 3;
|
||||
for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) {
|
||||
|
|
@ -694,7 +695,7 @@ brw_opt_send_gather_to_send(brw_shader &s)
|
|||
continue;
|
||||
|
||||
assert(inst->sources > 2);
|
||||
assert(inst->src[2].file == BAD_FILE);
|
||||
assert(inst->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE);
|
||||
|
||||
const int num_payload_sources = inst->sources - 3;
|
||||
assert(num_payload_sources > 0);
|
||||
|
|
@ -707,7 +708,7 @@ brw_opt_send_gather_to_send(brw_shader &s)
|
|||
* and there's no need to use SEND_GATHER (which would set ARF scalar register
|
||||
* adding an extra instruction).
|
||||
*/
|
||||
const brw_reg *payload = &inst->src[3];
|
||||
const brw_reg *payload = &inst->src[SEND_GATHER_SRC_PAYLOAD];
|
||||
brw_reg payload1 = payload[0];
|
||||
brw_reg payload2 = {};
|
||||
int payload1_len = 0;
|
||||
|
|
@ -758,10 +759,10 @@ brw_opt_send_gather_to_send(brw_shader &s)
|
|||
continue;
|
||||
}
|
||||
|
||||
inst->resize_sources(4);
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->src[2] = payload1;
|
||||
inst->src[3] = payload2;
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload1;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload2;
|
||||
inst->mlen = payload1_len * unit;
|
||||
inst->ex_mlen = payload2_len * unit;
|
||||
|
||||
|
|
|
|||
|
|
@ -679,9 +679,9 @@ eot_send_has_constraint(brw_shader &s, brw_inst *inst, brw_reg val, int arg)
|
|||
* We need to pin both split SEND sources in g112-g126/127, so only
|
||||
* allow this if the registers aren't too large.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_SEND && inst->sources >= 4 &&
|
||||
val.file == VGRF) {
|
||||
int other_src = arg == 2 ? 3 : 2;
|
||||
if (inst->opcode == SHADER_OPCODE_SEND && val.file == VGRF) {
|
||||
const int other_src =
|
||||
arg == SEND_SRC_PAYLOAD1 ? SEND_SRC_PAYLOAD2 : SEND_SRC_PAYLOAD1;
|
||||
unsigned other_size = inst->src[other_src].file == VGRF ?
|
||||
s.alloc.sizes[inst->src[other_src].nr] :
|
||||
(inst->size_read(devinfo, other_src) / REG_SIZE);
|
||||
|
|
|
|||
|
|
@ -201,19 +201,21 @@ would_violate_eot_restriction(brw_shader &s,
|
|||
if (send->opcode != SHADER_OPCODE_SEND || !send->eot)
|
||||
continue;
|
||||
|
||||
if ((send->src[2].file == VGRF && send->src[2].nr == src_reg) ||
|
||||
(send->sources >= 4 &&
|
||||
send->src[3].file == VGRF && send->src[3].nr == src_reg)) {
|
||||
const unsigned s2 =
|
||||
send->src[2].file == VGRF ? s.alloc.sizes[send->src[2].nr] : 0;
|
||||
const unsigned s3 = send->sources >= 4 &&
|
||||
send->src[3].file == VGRF ?
|
||||
s.alloc.sizes[send->src[3].nr] : 0;
|
||||
if ((send->src[SEND_SRC_PAYLOAD1].file == VGRF &&
|
||||
send->src[SEND_SRC_PAYLOAD1].nr == src_reg) ||
|
||||
(send->src[SEND_SRC_PAYLOAD2].file == VGRF &&
|
||||
send->src[SEND_SRC_PAYLOAD2].nr == src_reg)) {
|
||||
const unsigned p1 =
|
||||
send->src[SEND_SRC_PAYLOAD1].file == VGRF ?
|
||||
s.alloc.sizes[send->src[SEND_SRC_PAYLOAD1].nr] : 0;
|
||||
const unsigned p2 =
|
||||
send->src[SEND_SRC_PAYLOAD2].file == VGRF ?
|
||||
s.alloc.sizes[send->src[SEND_SRC_PAYLOAD2].nr] : 0;
|
||||
|
||||
const unsigned increase =
|
||||
s.alloc.sizes[dst_reg] - s.alloc.sizes[src_reg];
|
||||
|
||||
if (s2 + s3 + increase > 15)
|
||||
if (p1 + p2 + increase > 15)
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -615,10 +615,13 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
|
|||
* interference here.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
|
||||
inst->src[2].file == VGRF && inst->src[3].file == VGRF &&
|
||||
inst->src[2].nr != inst->src[3].nr)
|
||||
ra_add_node_interference(g, first_vgrf_node + inst->src[2].nr,
|
||||
first_vgrf_node + inst->src[3].nr);
|
||||
inst->src[SEND_SRC_PAYLOAD1].file == VGRF &&
|
||||
inst->src[SEND_SRC_PAYLOAD2].file == VGRF &&
|
||||
inst->src[SEND_SRC_PAYLOAD1].nr != inst->src[SEND_SRC_PAYLOAD2].nr) {
|
||||
ra_add_node_interference(g,
|
||||
first_vgrf_node + inst->src[SEND_SRC_PAYLOAD1].nr,
|
||||
first_vgrf_node + inst->src[SEND_SRC_PAYLOAD2].nr);
|
||||
}
|
||||
|
||||
/* When we do send-from-GRF for FB writes, we need to ensure that the last
|
||||
* write instruction sends from a high register. This is because the
|
||||
|
|
@ -631,7 +634,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
|
|||
*/
|
||||
if (inst->eot && devinfo->ver < 30) {
|
||||
assert(inst->opcode == SHADER_OPCODE_SEND);
|
||||
const int vgrf = inst->src[2].nr;
|
||||
const int vgrf = inst->src[SEND_SRC_PAYLOAD1].nr;
|
||||
const int size = DIV_ROUND_UP(fs->alloc.sizes[vgrf], reg_unit(devinfo));
|
||||
int reg = BRW_MAX_GRF - size;
|
||||
|
||||
|
|
@ -646,7 +649,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst)
|
|||
ra_set_node_reg(g, first_vgrf_node + vgrf, reg);
|
||||
|
||||
if (inst->ex_mlen > 0) {
|
||||
const int vgrf = inst->src[3].nr;
|
||||
const int vgrf = inst->src[SEND_SRC_PAYLOAD2].nr;
|
||||
reg -= DIV_ROUND_UP(fs->alloc.sizes[vgrf], reg_unit(devinfo));
|
||||
assert(reg >= 112);
|
||||
ra_set_node_reg(g, first_vgrf_node + vgrf, reg);
|
||||
|
|
@ -881,11 +884,11 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
|
|||
offset = build_lane_offsets(ubld, spill_offset, ip);
|
||||
}
|
||||
|
||||
brw_reg srcs[] = {
|
||||
brw_imm_ud(0), /* desc */
|
||||
build_ex_desc(bld, reg_size, true),
|
||||
offset, /* payload */
|
||||
brw_reg(), /* payload2 */
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_EX_DESC] = build_ex_desc(bld, reg_size, true),
|
||||
[SEND_SRC_PAYLOAD1] = offset,
|
||||
[SEND_SRC_PAYLOAD2] = brw_reg(),
|
||||
};
|
||||
|
||||
uint32_t desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
|
||||
|
|
@ -920,10 +923,11 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld,
|
|||
|
||||
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||
|
||||
brw_reg srcs[] = {
|
||||
brw_imm_ud(0), /* desc */
|
||||
brw_imm_ud(0), /* ex_desc */
|
||||
header
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_EX_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_PAYLOAD1] = header,
|
||||
[SEND_SRC_PAYLOAD2] = brw_reg(),
|
||||
};
|
||||
unspill_inst = bld.emit(SHADER_OPCODE_SEND, dst,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
|
|
@ -968,11 +972,11 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
|
|||
if (devinfo->verx10 >= 125) {
|
||||
brw_reg offset = build_lane_offsets(bld, spill_offset, ip);
|
||||
|
||||
brw_reg srcs[] = {
|
||||
brw_imm_ud(0), /* desc */
|
||||
build_ex_desc(bld, reg_size, false),
|
||||
offset, /* payload */
|
||||
src, /* payload2 */
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_EX_DESC] = build_ex_desc(bld, reg_size, false),
|
||||
[SEND_SRC_PAYLOAD1] = offset,
|
||||
[SEND_SRC_PAYLOAD2] = src,
|
||||
};
|
||||
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
|
|
@ -1002,11 +1006,11 @@ brw_reg_alloc::emit_spill(const brw_builder &bld,
|
|||
brw_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
||||
|
||||
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||
brw_reg srcs[] = {
|
||||
brw_imm_ud(0), /* desc */
|
||||
brw_imm_ud(0), /* ex_desc */
|
||||
header,
|
||||
src
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_EX_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_PAYLOAD1] = header,
|
||||
[SEND_SRC_PAYLOAD2] = src
|
||||
};
|
||||
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
|
|
|
|||
|
|
@ -375,14 +375,15 @@ brw_shader::emit_cs_terminate()
|
|||
if (devinfo->ver < 11)
|
||||
desc |= (1 << 4); /* Do not dereference URB */
|
||||
|
||||
brw_reg srcs[4] = {
|
||||
brw_imm_ud(desc), /* desc */
|
||||
brw_imm_ud(0), /* ex_desc */
|
||||
payload, /* payload */
|
||||
brw_reg(), /* payload2 */
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(desc),
|
||||
[SEND_SRC_EX_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_PAYLOAD1] = payload,
|
||||
[SEND_SRC_PAYLOAD2] = brw_reg(),
|
||||
};
|
||||
|
||||
brw_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4);
|
||||
brw_inst *send =
|
||||
ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, SEND_NUM_SRCS);
|
||||
|
||||
/* On Alchemist and later, send an EOT message to the message gateway to
|
||||
* terminate a compute shader. For older GPUs, send to the thread spawner.
|
||||
|
|
@ -725,11 +726,11 @@ brw_shader::assign_curb_setup()
|
|||
addr = base_addr;
|
||||
}
|
||||
|
||||
brw_reg srcs[4] = {
|
||||
brw_imm_ud(0), /* desc */
|
||||
brw_imm_ud(0), /* ex_desc */
|
||||
addr, /* payload */
|
||||
brw_reg(), /* payload2 */
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_EX_DESC] = brw_imm_ud(0),
|
||||
[SEND_SRC_PAYLOAD1] = addr,
|
||||
[SEND_SRC_PAYLOAD2] = brw_reg(),
|
||||
};
|
||||
|
||||
brw_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0),
|
||||
|
|
@ -755,11 +756,11 @@ brw_shader::assign_curb_setup()
|
|||
(payload().num_regs + prog_data->curb_read_length));
|
||||
send->send_is_volatile = true;
|
||||
|
||||
send->src[0] = brw_imm_ud(desc |
|
||||
brw_message_desc(devinfo,
|
||||
send->mlen,
|
||||
send->size_written / REG_SIZE,
|
||||
send->header_size));
|
||||
send->src[SEND_SRC_DESC] =
|
||||
brw_imm_ud(desc | brw_message_desc(devinfo,
|
||||
send->mlen,
|
||||
send->size_written / REG_SIZE,
|
||||
send->header_size));
|
||||
|
||||
i += num_regs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -313,7 +313,11 @@ brw_validate(const brw_shader &s)
|
|||
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_SEND:
|
||||
fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
|
||||
fsv_assert(inst->sources == SEND_NUM_SRCS);
|
||||
fsv_assert(is_uniform(inst->src[SEND_SRC_DESC]));
|
||||
fsv_assert(is_uniform(inst->src[SEND_SRC_EX_DESC]));
|
||||
fsv_assert(inst->ex_mlen > 0 ||
|
||||
inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
|
|
|
|||
|
|
@ -43,7 +43,13 @@ emit_SEND(const brw_builder &bld, const brw_reg &dst,
|
|||
const brw_reg &desc, const brw_reg &payload)
|
||||
{
|
||||
brw_reg uniform_desc = component(desc, 0);
|
||||
brw_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, uniform_desc, uniform_desc, payload);
|
||||
brw_reg srcs[SEND_NUM_SRCS] = {
|
||||
[SEND_SRC_DESC] = uniform_desc,
|
||||
[SEND_SRC_EX_DESC] = uniform_desc,
|
||||
[SEND_SRC_PAYLOAD1] = payload,
|
||||
[SEND_SRC_PAYLOAD2] = brw_reg(),
|
||||
};
|
||||
brw_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, srcs, SEND_NUM_SRCS);
|
||||
inst->mlen = 1;
|
||||
return inst;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue