mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 09:10:11 +01:00
intel/brw: Replace CS_OPCODE_CS_TERMINATE with SHADER_OPCODE_SEND
There's no need for special handling here, it's just a send message with a trivial g0 header and descriptor. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27924>
This commit is contained in:
parent
f93f215898
commit
97bf3d3b2d
9 changed files with 42 additions and 92 deletions
|
|
@ -506,11 +506,6 @@ enum opcode {
|
|||
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
|
||||
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
|
||||
|
||||
/**
|
||||
* Terminate the compute shader.
|
||||
*/
|
||||
CS_OPCODE_CS_TERMINATE,
|
||||
|
||||
/**
|
||||
* GLSL barrier()
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -890,7 +890,6 @@ fs_inst::size_read(int arg) const
|
|||
return retype(src[arg], BRW_REGISTER_TYPE_UD).component_size(8);
|
||||
break;
|
||||
|
||||
case CS_OPCODE_CS_TERMINATE:
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
return REG_SIZE;
|
||||
|
||||
|
|
@ -2440,8 +2439,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
return "interp_per_slot_offset";
|
||||
|
||||
case CS_OPCODE_CS_TERMINATE:
|
||||
return "cs_terminate";
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
return "barrier";
|
||||
case SHADER_OPCODE_MULH:
|
||||
|
|
|
|||
|
|
@ -498,7 +498,6 @@ private:
|
|||
struct brw_reg payload2);
|
||||
void generate_fb_read(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg payload);
|
||||
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
|
||||
void generate_barrier(fs_inst *inst, struct brw_reg src);
|
||||
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg *src);
|
||||
|
|
|
|||
|
|
@ -611,46 +611,6 @@ fs_generator::generate_quad_swizzle(const fs_inst *inst,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
|
||||
{
|
||||
struct brw_inst *insn;
|
||||
|
||||
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
||||
|
||||
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
|
||||
brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
|
||||
if (devinfo->ver < 12)
|
||||
brw_set_src1(p, insn, brw_imm_ud(0u));
|
||||
|
||||
/* For XeHP and newer send a message to the message gateway to terminate a
|
||||
* compute shader. For older devices, a message is sent to the thread
|
||||
* spawner.
|
||||
*/
|
||||
if (devinfo->verx10 >= 125)
|
||||
brw_inst_set_sfid(devinfo, insn, BRW_SFID_MESSAGE_GATEWAY);
|
||||
else
|
||||
brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
|
||||
brw_inst_set_mlen(devinfo, insn, 1);
|
||||
brw_inst_set_rlen(devinfo, insn, 0);
|
||||
brw_inst_set_eot(devinfo, insn, inst->eot);
|
||||
brw_inst_set_header_present(devinfo, insn, false);
|
||||
|
||||
brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
|
||||
|
||||
if (devinfo->ver < 11) {
|
||||
brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
|
||||
|
||||
/* Note that even though the thread has a URB resource associated with it,
|
||||
* we set the "do not dereference URB" bit, because the URB resource is
|
||||
* managed by the fixed-function unit, so it will free it automatically.
|
||||
*/
|
||||
brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference URB */
|
||||
}
|
||||
|
||||
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
|
||||
{
|
||||
|
|
@ -1469,11 +1429,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
}
|
||||
break;
|
||||
|
||||
case CS_OPCODE_CS_TERMINATE:
|
||||
generate_cs_terminate(inst, src[0]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
generate_barrier(inst, src[0]);
|
||||
send_count++;
|
||||
|
|
|
|||
|
|
@ -221,24 +221,15 @@ void fs_visitor::calculate_payload_ranges(unsigned payload_node_count,
|
|||
}
|
||||
}
|
||||
|
||||
/* Special case instructions which have extra implied registers used. */
|
||||
switch (inst->opcode) {
|
||||
case CS_OPCODE_CS_TERMINATE:
|
||||
if (inst->eot) {
|
||||
/* We could omit this for the !inst->header_present case, except
|
||||
* that the simulator apparently incorrectly reads from g0/g1
|
||||
* instead of sideband. It also really freaks out driver
|
||||
* developers to see g0 used in unusual places, so just always
|
||||
* reserve it.
|
||||
*/
|
||||
payload_last_use_ip[0] = use_ip;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (inst->eot) {
|
||||
/* We could omit this for the !inst->header_present case, except
|
||||
* that the simulator apparently incorrectly reads from g0/g1
|
||||
* instead of sideband. It also really freaks out driver
|
||||
* developers to see g0 used in unusual places, so just always
|
||||
* reserve it.
|
||||
*/
|
||||
payload_last_use_ip[0] = use_ip;
|
||||
payload_last_use_ip[1] = use_ip;
|
||||
}
|
||||
break;
|
||||
payload_last_use_ip[1] = use_ip;
|
||||
}
|
||||
|
||||
ip++;
|
||||
|
|
|
|||
|
|
@ -970,7 +970,7 @@ fs_visitor::emit_urb_fence()
|
|||
void
|
||||
fs_visitor::emit_cs_terminate()
|
||||
{
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
const fs_builder ubld = fs_builder(this).at_end().exec_all();
|
||||
|
||||
/* We can't directly send from g0, since sends with EOT have to use
|
||||
* g112-127. So, copy it to a virtual register, The register allocator will
|
||||
|
|
@ -978,12 +978,36 @@ fs_visitor::emit_cs_terminate()
|
|||
*/
|
||||
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
||||
bld.group(8, 0).exec_all().MOV(payload, g0);
|
||||
ubld.group(8, 0).MOV(payload, g0);
|
||||
|
||||
/* Send a message to the thread spawner to terminate the thread. */
|
||||
fs_inst *inst = bld.exec_all()
|
||||
.emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
|
||||
inst->eot = true;
|
||||
/* Set the descriptor to "Dereference Resource" and "Root Thread" */
|
||||
unsigned desc = 0;
|
||||
|
||||
/* Set Resource Select to "Do not dereference URB" on Gfx < 11.
|
||||
*
|
||||
* Note that even though the thread has a URB resource associated with it,
|
||||
* we set the "do not dereference URB" bit, because the URB resource is
|
||||
* managed by the fixed-function unit, so it will free it automatically.
|
||||
*/
|
||||
if (devinfo->ver < 11)
|
||||
desc |= (1 << 4); /* Do not dereference URB */
|
||||
|
||||
fs_reg srcs[4] = {
|
||||
brw_imm_ud(desc), /* desc */
|
||||
brw_imm_ud(0), /* ex_desc */
|
||||
payload, /* payload */
|
||||
fs_reg(), /* payload2 */
|
||||
};
|
||||
|
||||
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4);
|
||||
|
||||
/* On Alchemist and later, send an EOT message to the message gateway to
|
||||
* terminate a compute shader. For older GPUs, send to the thread spawner.
|
||||
*/
|
||||
send->sfid = devinfo->verx10 >= 125 ? BRW_SFID_MESSAGE_GATEWAY
|
||||
: BRW_SFID_THREAD_SPAWNER;
|
||||
send->mlen = 1;
|
||||
send->eot = true;
|
||||
}
|
||||
|
||||
fs_visitor::fs_visitor(const struct brw_compiler *compiler,
|
||||
|
|
|
|||
|
|
@ -860,15 +860,6 @@ F(rt_slot_group, /* 9+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11))
|
|||
F(rt_message_type, /* 9+ */ MD(10), MD( 8), /* 12+ */ MD12(10), MD12(8))
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Thread Spawn message function control bits:
|
||||
* @{
|
||||
*/
|
||||
FC(ts_resource_select, /* 9+ */ MD( 4), MD( 4), /* 12+ */ -1, -1, devinfo->ver < 11)
|
||||
FC(ts_request_type, /* 9+ */ MD( 1), MD( 1), /* 12+ */ -1, -1, devinfo->ver < 11)
|
||||
F(ts_opcode, /* 9+ */ MD( 0), MD( 0), /* 12+ */ MD12(0), MD12(0))
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Pixel Interpolator message function control bits:
|
||||
* @{
|
||||
|
|
|
|||
|
|
@ -594,10 +594,6 @@ namespace {
|
|||
0 /* XXX */, 0,
|
||||
0, 0, 0, 0, 0, 0);
|
||||
|
||||
case CS_OPCODE_CS_TERMINATE:
|
||||
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
|
||||
10 /* XXX */, 0, 0, 0, 0, 0);
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
switch (info.sfid) {
|
||||
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
|
||||
|
|
@ -685,7 +681,8 @@ namespace {
|
|||
abort();
|
||||
}
|
||||
|
||||
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
|
||||
case BRW_SFID_MESSAGE_GATEWAY:
|
||||
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: /* or THREAD_SPAWNER */
|
||||
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
|
||||
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
|
||||
10 /* XXX */, 0, 0, 0, 0, 0);
|
||||
|
|
|
|||
|
|
@ -522,7 +522,8 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
|||
}
|
||||
break;
|
||||
|
||||
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
|
||||
case BRW_SFID_MESSAGE_GATEWAY:
|
||||
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: /* or THREAD_SPAWNER */
|
||||
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
|
||||
/* TODO.
|
||||
*
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue