mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
aco: implement nir_intrinsic_load_resume_shader_address_amd
Similar to p_constaddr but targeting BBs. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22096>
This commit is contained in:
parent
be9f4a80b8
commit
f66f274304
5 changed files with 62 additions and 3 deletions
|
|
@ -46,6 +46,7 @@ struct asm_context {
|
|||
enum amd_gfx_level gfx_level;
|
||||
std::vector<std::pair<int, SOPP_instruction*>> branches;
|
||||
std::map<unsigned, constaddr_info> constaddrs;
|
||||
std::map<unsigned, constaddr_info> resumeaddrs;
|
||||
std::vector<struct aco_symbol>* symbols;
|
||||
const int16_t* opcode;
|
||||
// TODO: keep track of branch instructions referring blocks
|
||||
|
|
@ -133,6 +134,19 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
} else if (instr->opcode == aco_opcode::p_constaddr_addlo) {
|
||||
ctx.constaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1;
|
||||
|
||||
instr->opcode = aco_opcode::s_add_u32;
|
||||
instr->operands.pop_back();
|
||||
assert(instr->operands[1].isConstant());
|
||||
/* in case it's an inline constant, make it a literal */
|
||||
instr->operands[1] = Operand::literal32(instr->operands[1].constantValue());
|
||||
} else if (instr->opcode == aco_opcode::p_resumeaddr_getpc) {
|
||||
ctx.resumeaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;
|
||||
|
||||
instr->opcode = aco_opcode::s_getpc_b64;
|
||||
instr->operands.pop_back();
|
||||
} else if (instr->opcode == aco_opcode::p_resumeaddr_addlo) {
|
||||
ctx.resumeaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1;
|
||||
|
||||
instr->opcode = aco_opcode::s_add_u32;
|
||||
instr->operands.pop_back();
|
||||
assert(instr->operands[1].isConstant());
|
||||
|
|
@ -1049,6 +1063,13 @@ insert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned insert_before
|
|||
if (info.add_literal >= insert_before)
|
||||
info.add_literal += insert_count;
|
||||
}
|
||||
for (auto& constaddr : ctx.resumeaddrs) {
|
||||
constaddr_info& info = constaddr.second;
|
||||
if (info.getpc_end >= insert_before)
|
||||
info.getpc_end += insert_count;
|
||||
if (info.add_literal >= insert_before)
|
||||
info.add_literal += insert_count;
|
||||
}
|
||||
|
||||
if (ctx.symbols) {
|
||||
for (auto& symbol : *ctx.symbols) {
|
||||
|
|
@ -1188,6 +1209,12 @@ fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
|
|||
constaddr_info& info = constaddr.second;
|
||||
out[info.add_literal] += (out.size() - info.getpc_end) * 4u;
|
||||
}
|
||||
for (auto& addr : ctx.resumeaddrs) {
|
||||
constaddr_info& info = addr.second;
|
||||
const Block& block = ctx.program->blocks[out[info.add_literal]];
|
||||
assert(block.kind & block_kind_resume);
|
||||
out[info.add_literal] = (block.offset - info.getpc_end) * 4u;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
|||
|
|
@ -9004,6 +9004,12 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
|
||||
get_arg(ctx, ctx->args->rt.dynamic_callable_stack_base));
|
||||
break;
|
||||
case nir_intrinsic_load_resume_shader_address_amd: {
|
||||
bld.pseudo(aco_opcode::p_resume_shader_address,
|
||||
Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc),
|
||||
Operand::c32(nir_intrinsic_call_idx(instr)));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_overwrite_vs_arguments_amd: {
|
||||
ctx->arg_temps[ctx->args->vertex_id.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
ctx->arg_temps[ctx->args->instance_id.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
|
||||
|
|
|
|||
|
|
@ -1873,6 +1873,7 @@ enum block_kind {
|
|||
block_kind_discard_early_exit = 1 << 11,
|
||||
block_kind_uses_discard = 1 << 12,
|
||||
block_kind_needs_lowering = 1 << 13,
|
||||
block_kind_resume = 1 << 14,
|
||||
block_kind_export_end = 1 << 15,
|
||||
};
|
||||
|
||||
|
|
@ -1960,12 +1961,12 @@ struct Block {
|
|||
std::vector<unsigned> logical_succs;
|
||||
std::vector<unsigned> linear_succs;
|
||||
RegisterDemand register_demand = RegisterDemand();
|
||||
uint32_t kind = 0;
|
||||
int32_t logical_idom = -1;
|
||||
int32_t linear_idom = -1;
|
||||
uint16_t loop_nest_depth = 0;
|
||||
uint16_t divergent_if_logical_depth = 0;
|
||||
uint16_t uniform_if_depth = 0;
|
||||
uint16_t kind = 0;
|
||||
int logical_idom = -1;
|
||||
int linear_idom = -1;
|
||||
|
||||
/* this information is needed for predecessors to blocks with phis when
|
||||
* moving out of ssa */
|
||||
|
|
|
|||
|
|
@ -2423,6 +2423,28 @@ lower_to_hw_instr(Program* program)
|
|||
/* s_addc_u32 not needed because the program is in a 32-bit VA range */
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_resume_shader_address: {
|
||||
/* Find index of resume block. */
|
||||
unsigned resume_idx = instr->operands[0].constantValue();
|
||||
unsigned resume_block_idx = 0;
|
||||
for (Block& resume_block : program->blocks) {
|
||||
if (resume_block.kind & block_kind_resume) {
|
||||
if (resume_idx == 0) {
|
||||
resume_block_idx = resume_block.index;
|
||||
break;
|
||||
}
|
||||
resume_idx--;
|
||||
}
|
||||
}
|
||||
assert(resume_block_idx != 0);
|
||||
unsigned id = instr->definitions[0].tempId();
|
||||
PhysReg reg = instr->definitions[0].physReg();
|
||||
bld.sop1(aco_opcode::p_resumeaddr_getpc, instr->definitions[0], Operand::c32(id));
|
||||
bld.sop2(aco_opcode::p_resumeaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
|
||||
Operand(reg, s1), Operand::c32(resume_block_idx), Operand::c32(id));
|
||||
/* s_addc_u32 not needed because the program is in a 32-bit VA range */
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_extract: {
|
||||
assert(instr->operands[1].isConstant());
|
||||
assert(instr->operands[2].isConstant());
|
||||
|
|
|
|||
|
|
@ -334,6 +334,7 @@ opcode("p_bpermute_gfx11w64")
|
|||
opcode("p_elect")
|
||||
|
||||
opcode("p_constaddr")
|
||||
opcode("p_resume_shader_address")
|
||||
|
||||
# These don't have to be pseudo-ops, but it makes optimization easier to only
|
||||
# have to consider two instructions.
|
||||
|
|
@ -414,6 +415,7 @@ SOP2 = {
|
|||
( -1, -1, -1, 0x2d, 0x36, 0x2e, "s_mul_hi_i32"),
|
||||
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
|
||||
( -1, -1, -1, -1, -1, -1, "p_constaddr_addlo"),
|
||||
( -1, -1, -1, -1, -1, -1, "p_resumeaddr_addlo"),
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOP2, InstrClass.Salu):
|
||||
opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP2, cls)
|
||||
|
|
@ -530,6 +532,7 @@ SOP1 = {
|
|||
( -1, -1, -1, -1, -1, 0x4d, "s_sendmsg_rtn_b64"),
|
||||
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
|
||||
( -1, -1, -1, -1, -1, -1, "p_constaddr_getpc"),
|
||||
( -1, -1, -1, -1, -1, -1, "p_resumeaddr_getpc"),
|
||||
( -1, -1, -1, -1, -1, -1, "p_load_symbol"),
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOP1, InstrClass.Salu):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue