aco: update sendmsg enum from LLVM

Add GFX11 enums and some new ones that apparently existed before.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17710>
This commit is contained in:
Rhys Perry 2022-07-21 15:45:11 +01:00 committed by Marge Bot
parent 7cecc81683
commit 6407d783ea
3 changed files with 62 additions and 38 deletions

View file

@ -85,15 +85,26 @@ aco_ptr<Instruction> create_s_mov(Definition dst, Operand src);
enum sendmsg {
sendmsg_none = 0,
_sendmsg_gs = 2,
_sendmsg_gs_done = 3,
sendmsg_save_wave = 4,
sendmsg_stall_wave_gen = 5,
sendmsg_halt_waves = 6,
sendmsg_ordered_ps_done = 7,
sendmsg_early_prim_dealloc = 8,
sendmsg_gs_alloc_req = 9,
sendmsg_id_mask = 0xf,
_sendmsg_gs = 2, /* gfx6 to gfx10.3 */
_sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
sendmsg_hs_tessfactor = 2, /* gfx11+ */
sendmsg_dealloc_vgprs = 3, /* gfx11+ */
sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */
sendmsg_stall_wave_gen = 5, /* gfx9+ */
sendmsg_halt_waves = 6, /* gfx9+ */
sendmsg_ordered_ps_done = 7, /* gfx9+ */
sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */
sendmsg_gs_alloc_req = 9, /* gfx9+ */
sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */
sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */
sendmsg_rtn_get_doorbell = 128, /* gfx11+ */
sendmsg_rtn_get_ddid = 129, /* gfx11+ */
sendmsg_rtn_get_tma = 130, /* gfx11+ */
sendmsg_rtn_get_realtime = 131, /* gfx11+ */
sendmsg_rtn_save_wave = 132, /* gfx11+ */
sendmsg_rtn_get_tba = 133, /* gfx11+ */
sendmsg_id_mask_gfx6 = 0xf,
sendmsg_id_mask_gfx11 = 0xff,
};
inline sendmsg

View file

@ -333,16 +333,25 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
break;
}
case aco_opcode::s_sendmsg: {
unsigned id = imm & sendmsg_id_mask;
unsigned id =
gfx_level >= GFX11 ? (imm & sendmsg_id_mask_gfx11) : (imm & sendmsg_id_mask_gfx6);
static_assert(_sendmsg_gs == sendmsg_hs_tessfactor);
static_assert(_sendmsg_gs_done == sendmsg_dealloc_vgprs);
switch (id) {
case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;
case _sendmsg_gs:
fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
imm & 0x20 ? ", emit" : "", imm >> 8);
if (gfx_level >= GFX11)
fprintf(output, " sendmsg(hs_tessfactor)");
else
fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
imm & 0x20 ? ", emit" : "", imm >> 8);
break;
case _sendmsg_gs_done:
fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
imm & 0x20 ? ", emit" : "", imm >> 8);
if (gfx_level >= GFX11)
fprintf(output, " sendmsg(dealloc_vgprs)");
else
fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
imm & 0x20 ? ", emit" : "", imm >> 8);
break;
case sendmsg_save_wave: fprintf(output, " sendmsg(save_wave)"); break;
case sendmsg_stall_wave_gen: fprintf(output, " sendmsg(stall_wave_gen)"); break;
@ -350,6 +359,15 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
case sendmsg_ordered_ps_done: fprintf(output, " sendmsg(ordered_ps_done)"); break;
case sendmsg_early_prim_dealloc: fprintf(output, " sendmsg(early_prim_dealloc)"); break;
case sendmsg_gs_alloc_req: fprintf(output, " sendmsg(gs_alloc_req)"); break;
case sendmsg_get_doorbell: fprintf(output, " sendmsg(get_doorbell)"); break;
case sendmsg_get_ddid: fprintf(output, " sendmsg(get_ddid)"); break;
case sendmsg_rtn_get_doorbell: fprintf(output, " sendmsg(rtn_get_doorbell)"); break;
case sendmsg_rtn_get_ddid: fprintf(output, " sendmsg(rtn_get_ddid)"); break;
case sendmsg_rtn_get_tma: fprintf(output, " sendmsg(rtn_get_Tma)"); break;
case sendmsg_rtn_get_realtime: fprintf(output, " sendmsg(rtn_get_realtime)"); break;
case sendmsg_rtn_save_wave: fprintf(output, " sendmsg(rtn_save_wave)"); break;
case sendmsg_rtn_get_tba: fprintf(output, " sendmsg(rtn_get_Tba)"); break;
default: fprintf(output, " imm:%u", imm);
}
break;
}

View file

@ -122,6 +122,7 @@ struct MoveState {
};
struct sched_ctx {
amd_gfx_level gfx_level;
int16_t num_waves;
int16_t last_SMEM_stall;
int last_SMEM_dep_idx;
@ -420,20 +421,10 @@ MoveState::upwards_skip(UpwardsCursor& cursor)
}
bool
is_gs_or_done_sendmsg(const Instruction* instr)
is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr)
{
if (instr->opcode == aco_opcode::s_sendmsg) {
uint16_t imm = instr->sopp().imm;
return (imm & sendmsg_id_mask) == _sendmsg_gs || (imm & sendmsg_id_mask) == _sendmsg_gs_done;
}
return false;
}
bool
is_done_sendmsg(const Instruction* instr)
{
if (instr->opcode == aco_opcode::s_sendmsg)
return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg)
return (instr->sopp().imm & sendmsg_id_mask_gfx6) == _sendmsg_gs_done;
return false;
}
@ -464,6 +455,7 @@ struct memory_event_set {
};
struct hazard_query {
amd_gfx_level gfx_level;
bool contains_spill;
bool contains_sendmsg;
bool uses_exec;
@ -473,8 +465,9 @@ struct hazard_query {
};
void
init_hazard_query(hazard_query* query)
init_hazard_query(const sched_ctx& ctx, hazard_query* query)
{
query->gfx_level = ctx.gfx_level;
query->contains_spill = false;
query->contains_sendmsg = false;
query->uses_exec = false;
@ -484,9 +477,10 @@ init_hazard_query(hazard_query* query)
}
void
add_memory_event(memory_event_set* set, Instruction* instr, memory_sync_info* sync)
add_memory_event(amd_gfx_level gfx_level, memory_event_set* set, Instruction* instr,
memory_sync_info* sync)
{
set->has_control_barrier |= is_done_sendmsg(instr);
set->has_control_barrier |= is_done_sendmsg(gfx_level, instr);
if (instr->opcode == aco_opcode::p_barrier) {
Pseudo_barrier_instruction& bar = instr->barrier();
if (bar.sync.semantics & semantic_acquire)
@ -524,7 +518,7 @@ add_to_hazard_query(hazard_query* query, Instruction* instr)
memory_sync_info sync = get_sync_info_with_hack(instr);
add_memory_event(&query->mem_events, instr, &sync);
add_memory_event(query->gfx_level, &query->mem_events, instr, &sync);
if (!(sync.semantics & semantic_can_reorder)) {
unsigned storage = sync.storage;
@ -580,7 +574,7 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
memory_event_set instr_set;
memset(&instr_set, 0, sizeof(instr_set));
memory_sync_info sync = get_sync_info_with_hack(instr);
add_memory_event(&instr_set, instr, &sync);
add_memory_event(query->gfx_level, &instr_set, instr, &sync);
memory_event_set* first = &instr_set;
memory_event_set* second = &query->mem_events;
@ -655,7 +649,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
/* first, check if we have instructions before current to move down */
hazard_query hq;
init_hazard_query(&hq);
init_hazard_query(ctx, &hq);
add_to_hazard_query(&hq, current);
DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
@ -751,7 +745,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
if (is_dependency) {
if (!found_dependency) {
ctx.mv.upwards_update_insert_idx(up_cursor);
init_hazard_query(&hq);
init_hazard_query(ctx, &hq);
found_dependency = true;
}
}
@ -797,8 +791,8 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
/* first, check if we have instructions before current to move down */
hazard_query indep_hq;
hazard_query clause_hq;
init_hazard_query(&indep_hq);
init_hazard_query(&clause_hq);
init_hazard_query(ctx, &indep_hq);
init_hazard_query(ctx, &clause_hq);
add_to_hazard_query(&indep_hq, current);
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
@ -923,7 +917,7 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
if (is_dependency) {
if (!found_dependency) {
ctx.mv.upwards_update_insert_idx(up_cursor);
init_hazard_query(&indep_hq);
init_hazard_query(ctx, &indep_hq);
found_dependency = true;
}
} else if (is_vmem) {
@ -967,7 +961,7 @@ schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDeman
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
hazard_query hq;
init_hazard_query(&hq);
init_hazard_query(ctx, &hq);
add_to_hazard_query(&hq, current);
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
@ -1054,6 +1048,7 @@ schedule_program(Program* program, live& live_vars)
demand.vgpr += program->config->num_shared_vgprs / 2;
sched_ctx ctx;
ctx.gfx_level = program->gfx_level;
ctx.mv.depends_on.resize(program->peekAllocationId());
ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
ctx.mv.RAR_dependencies_clause.resize(program->peekAllocationId());