aco/gfx12: implement subgroup shader clock

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29330>
This commit is contained in:
Rhys Perry 2024-04-12 12:33:56 +01:00 committed by Marge Bot
parent 872dda2bc5
commit fae2a85d57
4 changed files with 23 additions and 0 deletions

View file

@ -8977,6 +8977,15 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_shader_clock: {
Temp dst = get_ssa_temp(ctx, &instr->def);
if (nir_intrinsic_memory_scope(instr) == SCOPE_SUBGROUP &&
ctx->options->gfx_level >= GFX12) {
Temp hi0 = bld.tmp(s1);
Temp hi1 = bld.tmp(s1);
Temp lo = bld.tmp(s1);
bld.pseudo(aco_opcode::p_shader_cycles_hi_lo_hi, Definition(hi0), Definition(lo), Definition(hi1));
Temp hi_eq = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), hi0, hi1);
lo = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), lo, Operand::zero(), bld.scc(hi_eq));
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi1);
} else if (nir_intrinsic_memory_scope(instr) == SCOPE_SUBGROUP &&
ctx->options->gfx_level >= GFX10_3) {
/* "((size - 1) << 11) | register" (SHADER_CYCLES is encoded as register 29) */
Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);

View file

@ -2726,6 +2726,17 @@ lower_to_hw_instr(Program* program)
end_with_regs_block_index = block->index;
break;
}
case aco_opcode::p_shader_cycles_hi_lo_hi: {
unsigned shader_cycles_lo = 29;
unsigned shader_cycles_hi = 30;
bld.sopk(aco_opcode::s_getreg_b32, instr->definitions[0],
((32 - 1) << 11) | shader_cycles_hi);
bld.sopk(aco_opcode::s_getreg_b32, instr->definitions[1],
((32 - 1) << 11) | shader_cycles_lo);
bld.sopk(aco_opcode::s_getreg_b32, instr->definitions[2],
((32 - 1) << 11) | shader_cycles_hi);
break;
}
default: break;
}
} else if (instr->isBranch()) {

View file

@ -445,6 +445,8 @@ insn("p_dual_src_export_gfx11")
# shader to pass arguments to next part.
insn("p_end_with_regs")
insn("p_shader_cycles_hi_lo_hi")
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
SOP2 = {
("s_add_u32", dst(1, SCC), src(1, 1), op(0x00)),

View file

@ -596,6 +596,7 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
/* don't move non-reorderable instructions */
if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
instr->opcode == aco_opcode::p_shader_cycles_hi_lo_hi ||
instr->opcode == aco_opcode::p_init_scratch ||
instr->opcode == aco_opcode::p_jump_to_epilog ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||