intel/brw: Replace CS_OPCODE_CS_TERMINATE with SHADER_OPCODE_SEND

There's no need for special handling here, it's just a send message
with a trivial g0 header and descriptor.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27924>
This commit is contained in:
Kenneth Graunke 2024-02-28 02:58:00 -08:00 committed by Marge Bot
parent f93f215898
commit 97bf3d3b2d
9 changed files with 42 additions and 92 deletions

View file

@ -506,11 +506,6 @@ enum opcode {
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
/**
* Terminate the compute shader.
*/
CS_OPCODE_CS_TERMINATE,
/**
* GLSL barrier()
*/

View file

@ -890,7 +890,6 @@ fs_inst::size_read(int arg) const
return retype(src[arg], BRW_REGISTER_TYPE_UD).component_size(8);
break;
case CS_OPCODE_CS_TERMINATE:
case SHADER_OPCODE_BARRIER:
return REG_SIZE;
@ -2440,8 +2439,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return "interp_per_slot_offset";
case CS_OPCODE_CS_TERMINATE:
return "cs_terminate";
case SHADER_OPCODE_BARRIER:
return "barrier";
case SHADER_OPCODE_MULH:

View file

@ -498,7 +498,6 @@ private:
struct brw_reg payload2);
void generate_fb_read(fs_inst *inst, struct brw_reg dst,
struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
void generate_barrier(fs_inst *inst, struct brw_reg src);
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);

View file

@ -611,46 +611,6 @@ fs_generator::generate_quad_swizzle(const fs_inst *inst,
}
}
void
fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
{
struct brw_inst *insn;
insn = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
if (devinfo->ver < 12)
brw_set_src1(p, insn, brw_imm_ud(0u));
/* For XeHP and newer send a message to the message gateway to terminate a
* compute shader. For older devices, a message is sent to the thread
* spawner.
*/
if (devinfo->verx10 >= 125)
brw_inst_set_sfid(devinfo, insn, BRW_SFID_MESSAGE_GATEWAY);
else
brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
brw_inst_set_mlen(devinfo, insn, 1);
brw_inst_set_rlen(devinfo, insn, 0);
brw_inst_set_eot(devinfo, insn, inst->eot);
brw_inst_set_header_present(devinfo, insn, false);
brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
if (devinfo->ver < 11) {
brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
/* Note that even though the thread has a URB resource associated with it,
* we set the "do not dereference URB" bit, because the URB resource is
* managed by the fixed-function unit, so it will free it automatically.
*/
brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference URB */
}
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
void
fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
{
@ -1469,11 +1429,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
}
break;
case CS_OPCODE_CS_TERMINATE:
generate_cs_terminate(inst, src[0]);
send_count++;
break;
case SHADER_OPCODE_BARRIER:
generate_barrier(inst, src[0]);
send_count++;

View file

@ -221,24 +221,15 @@ void fs_visitor::calculate_payload_ranges(unsigned payload_node_count,
}
}
/* Special case instructions which have extra implied registers used. */
switch (inst->opcode) {
case CS_OPCODE_CS_TERMINATE:
if (inst->eot) {
/* We could omit this for the !inst->header_present case, except
* that the simulator apparently incorrectly reads from g0/g1
* instead of sideband. It also really freaks out driver
* developers to see g0 used in unusual places, so just always
* reserve it.
*/
payload_last_use_ip[0] = use_ip;
break;
default:
if (inst->eot) {
/* We could omit this for the !inst->header_present case, except
* that the simulator apparently incorrectly reads from g0/g1
* instead of sideband. It also really freaks out driver
* developers to see g0 used in unusual places, so just always
* reserve it.
*/
payload_last_use_ip[0] = use_ip;
payload_last_use_ip[1] = use_ip;
}
break;
payload_last_use_ip[1] = use_ip;
}
ip++;

View file

@ -970,7 +970,7 @@ fs_visitor::emit_urb_fence()
void
fs_visitor::emit_cs_terminate()
{
const fs_builder bld = fs_builder(this).at_end();
const fs_builder ubld = fs_builder(this).at_end().exec_all();
/* We can't directly send from g0, since sends with EOT have to use
* g112-127. So, copy it to a virtual register, The register allocator will
@ -978,12 +978,36 @@ fs_visitor::emit_cs_terminate()
*/
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
bld.group(8, 0).exec_all().MOV(payload, g0);
ubld.group(8, 0).MOV(payload, g0);
/* Send a message to the thread spawner to terminate the thread. */
fs_inst *inst = bld.exec_all()
.emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
inst->eot = true;
/* Set the descriptor to "Dereference Resource" and "Root Thread" */
unsigned desc = 0;
/* Set Resource Select to "Do not dereference URB" on Gfx < 11.
*
* Note that even though the thread has a URB resource associated with it,
* we set the "do not dereference URB" bit, because the URB resource is
* managed by the fixed-function unit, so it will free it automatically.
*/
if (devinfo->ver < 11)
desc |= (1 << 4); /* Do not dereference URB */
fs_reg srcs[4] = {
brw_imm_ud(desc), /* desc */
brw_imm_ud(0), /* ex_desc */
payload, /* payload */
fs_reg(), /* payload2 */
};
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4);
/* On Alchemist and later, send an EOT message to the message gateway to
* terminate a compute shader. For older GPUs, send to the thread spawner.
*/
send->sfid = devinfo->verx10 >= 125 ? BRW_SFID_MESSAGE_GATEWAY
: BRW_SFID_THREAD_SPAWNER;
send->mlen = 1;
send->eot = true;
}
fs_visitor::fs_visitor(const struct brw_compiler *compiler,

View file

@ -860,15 +860,6 @@ F(rt_slot_group, /* 9+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11))
F(rt_message_type, /* 9+ */ MD(10), MD( 8), /* 12+ */ MD12(10), MD12(8))
/** @} */
/**
* Thread Spawn message function control bits:
* @{
*/
FC(ts_resource_select, /* 9+ */ MD( 4), MD( 4), /* 12+ */ -1, -1, devinfo->ver < 11)
FC(ts_request_type, /* 9+ */ MD( 1), MD( 1), /* 12+ */ -1, -1, devinfo->ver < 11)
F(ts_opcode, /* 9+ */ MD( 0), MD( 0), /* 12+ */ MD12(0), MD12(0))
/** @} */
/**
* Pixel Interpolator message function control bits:
* @{

View file

@ -594,10 +594,6 @@ namespace {
0 /* XXX */, 0,
0, 0, 0, 0, 0, 0);
case CS_OPCODE_CS_TERMINATE:
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
10 /* XXX */, 0, 0, 0, 0, 0);
case SHADER_OPCODE_SEND:
switch (info.sfid) {
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
@ -685,7 +681,8 @@ namespace {
abort();
}
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
case BRW_SFID_MESSAGE_GATEWAY:
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: /* or THREAD_SPAWNER */
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
10 /* XXX */, 0, 0, 0, 0, 0);

View file

@ -522,7 +522,8 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
}
break;
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
case BRW_SFID_MESSAGE_GATEWAY:
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: /* or THREAD_SPAWNER */
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
/* TODO.
*