mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
aco: Add preload_preserved pseudo instruction
These are helper instructions for the spill_preserved pass to insert reloads for registers that are preserved by the ABI, yet clobbered by the callee shader. There is one p_reload_preserved instruction at the end of each block. This allows us to insert reloads early, to alleviate the high latency of scratch reloads. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37381>
This commit is contained in:
parent
9dbf49de2c
commit
f0c613765c
12 changed files with 39 additions and 22 deletions
|
|
@ -1998,7 +1998,8 @@ is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
|
|||
{
|
||||
if (instr->definitions.empty() || instr->isBranch() || instr->isCall() ||
|
||||
instr->opcode == aco_opcode::p_startpgm || instr->opcode == aco_opcode::p_init_scratch ||
|
||||
instr->opcode == aco_opcode::p_dual_src_export_gfx11)
|
||||
instr->opcode == aco_opcode::p_dual_src_export_gfx11 ||
|
||||
instr->opcode == aco_opcode::p_reload_preserved)
|
||||
return false;
|
||||
|
||||
if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
|
|
|
|||
|
|
@ -262,7 +262,8 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
|
|||
insn->operands[5].setLateKill(true); /* we re-use the destination reg in the middle */
|
||||
} else if (insn->opcode == aco_opcode::v_interp_p1_f32 && ctx.program->dev.has_16bank_lds) {
|
||||
insn->operands[0].setLateKill(true);
|
||||
} else if (insn->opcode == aco_opcode::p_init_scratch) {
|
||||
} else if (insn->opcode == aco_opcode::p_init_scratch ||
|
||||
insn->opcode == aco_opcode::p_reload_preserved) {
|
||||
insn->operands.back().setLateKill(true);
|
||||
} else if (instr_info.classes[(int)insn->opcode] == instr_class::wmma) {
|
||||
insn->operands[0].setLateKill(true);
|
||||
|
|
|
|||
|
|
@ -356,6 +356,8 @@ insn("p_unit_test")
|
|||
|
||||
insn("p_callee_stack_ptr")
|
||||
|
||||
insn("p_reload_preserved")
|
||||
|
||||
insn("p_create_vector")
|
||||
insn("p_extract_vector")
|
||||
insn("p_split_vector")
|
||||
|
|
|
|||
|
|
@ -316,8 +316,8 @@ can_eliminate(aco_ptr<Instruction>& instr)
|
|||
if (instr->definitions.empty() || instr->opcode == aco_opcode::p_phi ||
|
||||
instr->opcode == aco_opcode::p_linear_phi ||
|
||||
instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id ||
|
||||
instr->opcode == aco_opcode::p_shader_cycles_hi_lo_hi ||
|
||||
instr->definitions[0].isNoCSE())
|
||||
instr->opcode == aco_opcode::p_shader_cycles_hi_lo_hi || instr->definitions[0].isNoCSE() ||
|
||||
instr->opcode == aco_opcode::p_reload_preserved)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -488,7 +488,8 @@ is_reorderable(const Instruction* instr)
|
|||
instr->opcode != aco_opcode::p_end_with_regs && instr->opcode != aco_opcode::s_nop &&
|
||||
instr->opcode != aco_opcode::s_sleep && instr->opcode != aco_opcode::s_trap &&
|
||||
instr->opcode != aco_opcode::p_call && instr->opcode != aco_opcode::p_logical_start &&
|
||||
instr->opcode != aco_opcode::p_logical_end;
|
||||
instr->opcode != aco_opcode::p_logical_end &&
|
||||
instr->opcode != aco_opcode::p_reload_preserved;
|
||||
}
|
||||
|
||||
struct memory_event_set {
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ void end_empty_exec_skip(isel_context* ctx);
|
|||
|
||||
/* aco_isel_helpers.cpp */
|
||||
void append_logical_start(Block* b);
|
||||
void append_logical_end(Block* b);
|
||||
void append_logical_end(isel_context* ctx, bool append_reload_preserved = true);
|
||||
Temp get_ssa_temp_tex(struct isel_context* ctx, nir_def* def, bool is_16bit);
|
||||
Temp bool_to_vector_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s2));
|
||||
Temp bool_to_scalar_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s1));
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
|
|||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Block* logical_target;
|
||||
append_logical_end(ctx->block);
|
||||
append_logical_end(ctx);
|
||||
unsigned idx = ctx->block->index;
|
||||
|
||||
if (is_break) {
|
||||
|
|
@ -119,7 +119,7 @@ update_exec_info(isel_context* ctx)
|
|||
void
|
||||
begin_loop(isel_context* ctx, loop_context* lc)
|
||||
{
|
||||
append_logical_end(ctx->block);
|
||||
append_logical_end(ctx);
|
||||
ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bld.branch(aco_opcode::p_branch);
|
||||
|
|
@ -158,7 +158,7 @@ end_loop(isel_context* ctx, loop_context* lc)
|
|||
if (!ctx->cf_info.has_branch) {
|
||||
unsigned loop_header_idx = ctx->cf_info.parent_loop.header_idx;
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
append_logical_end(ctx->block);
|
||||
append_logical_end(ctx);
|
||||
|
||||
ctx->block->kind |= (block_kind_continue | block_kind_uniform);
|
||||
if (!ctx->cf_info.has_divergent_branch)
|
||||
|
|
@ -201,7 +201,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
|
|||
|
||||
ic->cond = cond;
|
||||
|
||||
append_logical_end(ctx->block);
|
||||
append_logical_end(ctx);
|
||||
ctx->block->kind |= block_kind_uniform;
|
||||
|
||||
aco_ptr<Instruction> branch;
|
||||
|
|
@ -239,7 +239,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else)
|
|||
Block* BB_then = ctx->block;
|
||||
|
||||
if (!ctx->cf_info.has_branch) {
|
||||
append_logical_end(BB_then);
|
||||
append_logical_end(ctx);
|
||||
/* branch from then block to endif block */
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
|
||||
|
|
@ -272,7 +272,7 @@ end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else)
|
|||
|
||||
if (!ctx->cf_info.has_branch) {
|
||||
if (logical_else)
|
||||
append_logical_end(BB_else);
|
||||
append_logical_end(ctx);
|
||||
/* branch from then block to endif block */
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
|
||||
|
|
@ -306,7 +306,7 @@ void
|
|||
begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
|
||||
nir_selection_control sel_ctrl)
|
||||
{
|
||||
append_logical_end(ctx->block);
|
||||
append_logical_end(ctx);
|
||||
ctx->block->kind |= block_kind_branch;
|
||||
|
||||
/* branch to linear then block */
|
||||
|
|
@ -346,7 +346,7 @@ void
|
|||
begin_divergent_if_else(isel_context* ctx, if_context* ic, nir_selection_control sel_ctrl)
|
||||
{
|
||||
Block* BB_then_logical = ctx->block;
|
||||
append_logical_end(BB_then_logical);
|
||||
append_logical_end(ctx);
|
||||
/* branch from logical then block to invert block */
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
|
||||
|
|
@ -398,7 +398,7 @@ void
|
|||
end_divergent_if(isel_context* ctx, if_context* ic)
|
||||
{
|
||||
Block* BB_else_logical = ctx->block;
|
||||
append_logical_end(BB_else_logical);
|
||||
append_logical_end(ctx);
|
||||
|
||||
/* branch from logical else block to endif block */
|
||||
aco_ptr<Instruction> branch;
|
||||
|
|
|
|||
|
|
@ -41,9 +41,21 @@ append_logical_start(Block* b)
|
|||
}
|
||||
|
||||
void
|
||||
append_logical_end(Block* b)
|
||||
append_logical_end(isel_context* ctx, bool append_reload_preserved)
|
||||
{
|
||||
Builder(NULL, b).pseudo(aco_opcode::p_logical_end);
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
if (append_reload_preserved && ctx->program->is_callee) {
|
||||
Operand stack_ptr_op;
|
||||
if (ctx->program->gfx_level >= GFX9)
|
||||
stack_ptr_op = Operand(ctx->callee_info.stack_ptr.def.getTemp());
|
||||
else
|
||||
stack_ptr_op = Operand(load_scratch_resource(ctx->program, bld, -1u, false));
|
||||
bld.pseudo(aco_opcode::p_reload_preserved, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
|
||||
stack_ptr_op);
|
||||
}
|
||||
|
||||
bld.pseudo(aco_opcode::p_logical_end);
|
||||
}
|
||||
|
||||
Temp
|
||||
|
|
|
|||
|
|
@ -1200,7 +1200,7 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
|
|||
append_logical_start(ctx.block);
|
||||
split_arguments(&ctx, startpgm);
|
||||
visit_cf_list(&ctx, &nir_shader_get_entrypoint(nir)->body);
|
||||
append_logical_end(ctx.block);
|
||||
append_logical_end(&ctx);
|
||||
ctx.block->kind |= block_kind_uniform;
|
||||
|
||||
/* Fix output registers and jump to next shader. We can skip this when dealing with a raygen
|
||||
|
|
@ -1359,7 +1359,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons
|
|||
if (need_endpgm) {
|
||||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||
|
||||
append_logical_end(ctx.block);
|
||||
append_logical_end(&ctx);
|
||||
ctx.block->kind |= block_kind_uniform;
|
||||
|
||||
if ((!program->info.ps.has_epilog && !is_first_stage_of_merged_shader) ||
|
||||
|
|
|
|||
|
|
@ -470,7 +470,7 @@ select_ps_epilog(Program* program, void* pinfo, ac_shader_config* config,
|
|||
|
||||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||
|
||||
append_logical_end(ctx.block);
|
||||
append_logical_end(&ctx);
|
||||
ctx.block->kind |= block_kind_export_end;
|
||||
bld.reset(ctx.block);
|
||||
bld.sopp(aco_opcode::s_endpgm);
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config,
|
|||
|
||||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||
|
||||
append_logical_end(ctx.block);
|
||||
append_logical_end(&ctx);
|
||||
|
||||
build_end_with_regs(&ctx, regs);
|
||||
|
||||
|
|
|
|||
|
|
@ -499,7 +499,7 @@ select_trap_handler_shader(Program* program, ac_shader_config* config,
|
|||
|
||||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||
|
||||
append_logical_end(ctx.block);
|
||||
append_logical_end(&ctx);
|
||||
ctx.block->kind |= block_kind_uniform;
|
||||
bld.sopp(aco_opcode::s_endpgm);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue