aco: get scratch addr from symbol for radeonsi

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22727>
This commit is contained in:
Qiang Yu 2023-04-26 14:58:58 +08:00
parent 360176b671
commit 3c59df7318
5 changed files with 46 additions and 18 deletions

View file

@ -7519,9 +7519,16 @@ get_scratch_resource(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
Temp scratch_addr = ctx->program->private_segment_buffer;
if (ctx->stage.hw != HWStage::CS)
if (!scratch_addr.bytes()) {
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_hi));
scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
} else if (ctx->stage.hw != HWStage::CS) {
scratch_addr =
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
}
uint32_t rsrc_conf =
S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2);
@ -11138,22 +11145,25 @@ add_startpgm(struct isel_context* ctx)
}
}
if (ctx->args->ring_offsets.used) {
if (ctx->program->gfx_level < GFX9) {
/* Stash these in the program so that they can be accessed later when
* handling spilling.
*/
if (ctx->program->gfx_level < GFX9) {
/* Stash these in the program so that they can be accessed later when
* handling spilling.
*/
if (ctx->args->ring_offsets.used)
ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
} else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
/* Manually initialize scratch. For RT stages scratch initialization is done in the prolog. */
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
scratch_offset.setLateKill(true);
Builder bld(ctx->program, ctx->block);
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
get_arg(ctx, ctx->args->ring_offsets), scratch_offset);
}
ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
} else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
/* Manually initialize scratch. For RT stages scratch initialization is done in the prolog. */
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
scratch_offset.setLateKill(true);
Operand scratch_addr = ctx->args->ring_offsets.used ?
Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2);
Builder bld(ctx->program, ctx->block);
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
scratch_addr, scratch_offset);
}
return startpgm;

View file

@ -2489,7 +2489,14 @@ lower_to_hw_instr(Program* program)
break;
Operand scratch_addr = instr->operands[0];
if (program->stage.hw != HWStage::CS) {
if (scratch_addr.isUndefined()) {
PhysReg reg = instr->definitions[0].physReg();
bld.sop1(aco_opcode::p_load_symbol, Definition(reg, s1),
Operand::c32(aco_symbol_scratch_addr_lo));
bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1),
Operand::c32(aco_symbol_scratch_addr_hi));
scratch_addr.setFixed(reg);
} else if (program->stage.hw != HWStage::CS) {
bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
Operand::zero());
scratch_addr.setFixed(instr->definitions[0].physReg());

View file

@ -162,6 +162,8 @@ enum aco_statistic {
enum aco_symbol_id {
aco_symbol_invalid,
aco_symbol_scratch_addr_lo,
aco_symbol_scratch_addr_hi,
};
struct aco_symbol {

View file

@ -1416,9 +1416,17 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
return bld.copy(bld.def(s1), Operand::c32(offset));
Temp private_segment_buffer = ctx.program->private_segment_buffer;
if (ctx.program->stage.hw != HWStage::CS)
if (!private_segment_buffer.bytes()) {
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_hi));
private_segment_buffer =
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
} else if (ctx.program->stage.hw != HWStage::CS) {
private_segment_buffer =
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
}
if (offset)
scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc),

View file

@ -270,7 +270,8 @@ validate_ir(Program* program)
(instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
(instr->isScratch() && i == 0);
(instr->isScratch() && i == 0) ||
(instr->opcode == aco_opcode::p_init_scratch && i == 0);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
} else {
check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||