mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 13:30:12 +01:00
intel/brw: Replace CS_OPCODE_CS_TERMINATE with SHADER_OPCODE_SEND
There's no need for special handling here, it's just a send message with a trivial g0 header and descriptor. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27924>
This commit is contained in:
parent
f93f215898
commit
97bf3d3b2d
9 changed files with 42 additions and 92 deletions
|
|
@ -506,11 +506,6 @@ enum opcode {
|
||||||
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
|
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
|
||||||
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
|
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
|
||||||
|
|
||||||
/**
|
|
||||||
* Terminate the compute shader.
|
|
||||||
*/
|
|
||||||
CS_OPCODE_CS_TERMINATE,
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GLSL barrier()
|
* GLSL barrier()
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -890,7 +890,6 @@ fs_inst::size_read(int arg) const
|
||||||
return retype(src[arg], BRW_REGISTER_TYPE_UD).component_size(8);
|
return retype(src[arg], BRW_REGISTER_TYPE_UD).component_size(8);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CS_OPCODE_CS_TERMINATE:
|
|
||||||
case SHADER_OPCODE_BARRIER:
|
case SHADER_OPCODE_BARRIER:
|
||||||
return REG_SIZE;
|
return REG_SIZE;
|
||||||
|
|
||||||
|
|
@ -2440,8 +2439,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
||||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||||
return "interp_per_slot_offset";
|
return "interp_per_slot_offset";
|
||||||
|
|
||||||
case CS_OPCODE_CS_TERMINATE:
|
|
||||||
return "cs_terminate";
|
|
||||||
case SHADER_OPCODE_BARRIER:
|
case SHADER_OPCODE_BARRIER:
|
||||||
return "barrier";
|
return "barrier";
|
||||||
case SHADER_OPCODE_MULH:
|
case SHADER_OPCODE_MULH:
|
||||||
|
|
|
||||||
|
|
@ -498,7 +498,6 @@ private:
|
||||||
struct brw_reg payload2);
|
struct brw_reg payload2);
|
||||||
void generate_fb_read(fs_inst *inst, struct brw_reg dst,
|
void generate_fb_read(fs_inst *inst, struct brw_reg dst,
|
||||||
struct brw_reg payload);
|
struct brw_reg payload);
|
||||||
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
|
|
||||||
void generate_barrier(fs_inst *inst, struct brw_reg src);
|
void generate_barrier(fs_inst *inst, struct brw_reg src);
|
||||||
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
|
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
|
||||||
struct brw_reg *src);
|
struct brw_reg *src);
|
||||||
|
|
|
||||||
|
|
@ -611,46 +611,6 @@ fs_generator::generate_quad_swizzle(const fs_inst *inst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
|
|
||||||
{
|
|
||||||
struct brw_inst *insn;
|
|
||||||
|
|
||||||
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
||||||
|
|
||||||
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
|
|
||||||
brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
|
|
||||||
if (devinfo->ver < 12)
|
|
||||||
brw_set_src1(p, insn, brw_imm_ud(0u));
|
|
||||||
|
|
||||||
/* For XeHP and newer send a message to the message gateway to terminate a
|
|
||||||
* compute shader. For older devices, a message is sent to the thread
|
|
||||||
* spawner.
|
|
||||||
*/
|
|
||||||
if (devinfo->verx10 >= 125)
|
|
||||||
brw_inst_set_sfid(devinfo, insn, BRW_SFID_MESSAGE_GATEWAY);
|
|
||||||
else
|
|
||||||
brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
|
|
||||||
brw_inst_set_mlen(devinfo, insn, 1);
|
|
||||||
brw_inst_set_rlen(devinfo, insn, 0);
|
|
||||||
brw_inst_set_eot(devinfo, insn, inst->eot);
|
|
||||||
brw_inst_set_header_present(devinfo, insn, false);
|
|
||||||
|
|
||||||
brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
|
|
||||||
|
|
||||||
if (devinfo->ver < 11) {
|
|
||||||
brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
|
|
||||||
|
|
||||||
/* Note that even though the thread has a URB resource associated with it,
|
|
||||||
* we set the "do not dereference URB" bit, because the URB resource is
|
|
||||||
* managed by the fixed-function unit, so it will free it automatically.
|
|
||||||
*/
|
|
||||||
brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference URB */
|
|
||||||
}
|
|
||||||
|
|
||||||
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
|
fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
|
||||||
{
|
{
|
||||||
|
|
@ -1469,11 +1429,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CS_OPCODE_CS_TERMINATE:
|
|
||||||
generate_cs_terminate(inst, src[0]);
|
|
||||||
send_count++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SHADER_OPCODE_BARRIER:
|
case SHADER_OPCODE_BARRIER:
|
||||||
generate_barrier(inst, src[0]);
|
generate_barrier(inst, src[0]);
|
||||||
send_count++;
|
send_count++;
|
||||||
|
|
|
||||||
|
|
@ -221,24 +221,15 @@ void fs_visitor::calculate_payload_ranges(unsigned payload_node_count,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Special case instructions which have extra implied registers used. */
|
if (inst->eot) {
|
||||||
switch (inst->opcode) {
|
/* We could omit this for the !inst->header_present case, except
|
||||||
case CS_OPCODE_CS_TERMINATE:
|
* that the simulator apparently incorrectly reads from g0/g1
|
||||||
|
* instead of sideband. It also really freaks out driver
|
||||||
|
* developers to see g0 used in unusual places, so just always
|
||||||
|
* reserve it.
|
||||||
|
*/
|
||||||
payload_last_use_ip[0] = use_ip;
|
payload_last_use_ip[0] = use_ip;
|
||||||
break;
|
payload_last_use_ip[1] = use_ip;
|
||||||
|
|
||||||
default:
|
|
||||||
if (inst->eot) {
|
|
||||||
/* We could omit this for the !inst->header_present case, except
|
|
||||||
* that the simulator apparently incorrectly reads from g0/g1
|
|
||||||
* instead of sideband. It also really freaks out driver
|
|
||||||
* developers to see g0 used in unusual places, so just always
|
|
||||||
* reserve it.
|
|
||||||
*/
|
|
||||||
payload_last_use_ip[0] = use_ip;
|
|
||||||
payload_last_use_ip[1] = use_ip;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ip++;
|
ip++;
|
||||||
|
|
|
||||||
|
|
@ -970,7 +970,7 @@ fs_visitor::emit_urb_fence()
|
||||||
void
|
void
|
||||||
fs_visitor::emit_cs_terminate()
|
fs_visitor::emit_cs_terminate()
|
||||||
{
|
{
|
||||||
const fs_builder bld = fs_builder(this).at_end();
|
const fs_builder ubld = fs_builder(this).at_end().exec_all();
|
||||||
|
|
||||||
/* We can't directly send from g0, since sends with EOT have to use
|
/* We can't directly send from g0, since sends with EOT have to use
|
||||||
* g112-127. So, copy it to a virtual register, The register allocator will
|
* g112-127. So, copy it to a virtual register, The register allocator will
|
||||||
|
|
@ -978,12 +978,36 @@ fs_visitor::emit_cs_terminate()
|
||||||
*/
|
*/
|
||||||
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
|
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
|
||||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
||||||
bld.group(8, 0).exec_all().MOV(payload, g0);
|
ubld.group(8, 0).MOV(payload, g0);
|
||||||
|
|
||||||
/* Send a message to the thread spawner to terminate the thread. */
|
/* Set the descriptor to "Dereference Resource" and "Root Thread" */
|
||||||
fs_inst *inst = bld.exec_all()
|
unsigned desc = 0;
|
||||||
.emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
|
|
||||||
inst->eot = true;
|
/* Set Resource Select to "Do not dereference URB" on Gfx < 11.
|
||||||
|
*
|
||||||
|
* Note that even though the thread has a URB resource associated with it,
|
||||||
|
* we set the "do not dereference URB" bit, because the URB resource is
|
||||||
|
* managed by the fixed-function unit, so it will free it automatically.
|
||||||
|
*/
|
||||||
|
if (devinfo->ver < 11)
|
||||||
|
desc |= (1 << 4); /* Do not dereference URB */
|
||||||
|
|
||||||
|
fs_reg srcs[4] = {
|
||||||
|
brw_imm_ud(desc), /* desc */
|
||||||
|
brw_imm_ud(0), /* ex_desc */
|
||||||
|
payload, /* payload */
|
||||||
|
fs_reg(), /* payload2 */
|
||||||
|
};
|
||||||
|
|
||||||
|
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4);
|
||||||
|
|
||||||
|
/* On Alchemist and later, send an EOT message to the message gateway to
|
||||||
|
* terminate a compute shader. For older GPUs, send to the thread spawner.
|
||||||
|
*/
|
||||||
|
send->sfid = devinfo->verx10 >= 125 ? BRW_SFID_MESSAGE_GATEWAY
|
||||||
|
: BRW_SFID_THREAD_SPAWNER;
|
||||||
|
send->mlen = 1;
|
||||||
|
send->eot = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_visitor::fs_visitor(const struct brw_compiler *compiler,
|
fs_visitor::fs_visitor(const struct brw_compiler *compiler,
|
||||||
|
|
|
||||||
|
|
@ -860,15 +860,6 @@ F(rt_slot_group, /* 9+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11))
|
||||||
F(rt_message_type, /* 9+ */ MD(10), MD( 8), /* 12+ */ MD12(10), MD12(8))
|
F(rt_message_type, /* 9+ */ MD(10), MD( 8), /* 12+ */ MD12(10), MD12(8))
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
|
||||||
* Thread Spawn message function control bits:
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
FC(ts_resource_select, /* 9+ */ MD( 4), MD( 4), /* 12+ */ -1, -1, devinfo->ver < 11)
|
|
||||||
FC(ts_request_type, /* 9+ */ MD( 1), MD( 1), /* 12+ */ -1, -1, devinfo->ver < 11)
|
|
||||||
F(ts_opcode, /* 9+ */ MD( 0), MD( 0), /* 12+ */ MD12(0), MD12(0))
|
|
||||||
/** @} */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pixel Interpolator message function control bits:
|
* Pixel Interpolator message function control bits:
|
||||||
* @{
|
* @{
|
||||||
|
|
|
||||||
|
|
@ -594,10 +594,6 @@ namespace {
|
||||||
0 /* XXX */, 0,
|
0 /* XXX */, 0,
|
||||||
0, 0, 0, 0, 0, 0);
|
0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
case CS_OPCODE_CS_TERMINATE:
|
|
||||||
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
|
|
||||||
10 /* XXX */, 0, 0, 0, 0, 0);
|
|
||||||
|
|
||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
switch (info.sfid) {
|
switch (info.sfid) {
|
||||||
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
|
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
|
||||||
|
|
@ -685,7 +681,8 @@ namespace {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
|
case BRW_SFID_MESSAGE_GATEWAY:
|
||||||
|
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: /* or THREAD_SPAWNER */
|
||||||
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
|
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
|
||||||
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
|
return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0,
|
||||||
10 /* XXX */, 0, 0, 0, 0, 0);
|
10 /* XXX */, 0, 0, 0, 0, 0);
|
||||||
|
|
|
||||||
|
|
@ -522,7 +522,8 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
|
case BRW_SFID_MESSAGE_GATEWAY:
|
||||||
|
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: /* or THREAD_SPAWNER */
|
||||||
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
|
case GEN_RT_SFID_RAY_TRACE_ACCELERATOR:
|
||||||
/* TODO.
|
/* TODO.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue