mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
aco: Add common utility to load scratch descriptor
Also modifies the scratch descriptor to take the stack pointer into account. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35031>
This commit is contained in:
parent
cd2caa5e2b
commit
4a62b342f3
4 changed files with 66 additions and 94 deletions
|
|
@ -12,6 +12,9 @@
|
|||
|
||||
#include "c11/threads.h"
|
||||
|
||||
#include "ac_descriptors.h"
|
||||
#include "amdgfxregs.h"
|
||||
|
||||
namespace aco {
|
||||
|
||||
thread_local aco::monotonic_buffer_resource* instruction_buffer = nullptr;
|
||||
|
|
@ -1661,4 +1664,59 @@ create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
|
|||
return inst;
|
||||
}
|
||||
|
||||
Temp
|
||||
load_scratch_resource(Program* program, Builder& bld, unsigned resume_idx,
|
||||
bool apply_scratch_offset)
|
||||
{
|
||||
Temp private_segment_buffer;
|
||||
if (!program->private_segment_buffers.empty())
|
||||
private_segment_buffer = program->private_segment_buffers[resume_idx];
|
||||
if (!private_segment_buffer.bytes()) {
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (program->stage.hw != AC_HW_COMPUTE_SHADER) {
|
||||
private_segment_buffer =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
|
||||
}
|
||||
|
||||
if (apply_scratch_offset && !program->scratch_offsets.empty()) {
|
||||
Temp addr_lo = bld.tmp(s1);
|
||||
Temp addr_hi = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(addr_lo), Definition(addr_hi),
|
||||
private_segment_buffer);
|
||||
|
||||
Temp carry = bld.tmp(s1);
|
||||
Temp scratch_offset = program->scratch_offsets[resume_idx];
|
||||
addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
|
||||
scratch_offset);
|
||||
addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
|
||||
Operand::c32(0), bld.scc(carry));
|
||||
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
}
|
||||
|
||||
struct ac_buffer_state ac_state = {0};
|
||||
uint32_t desc[4];
|
||||
|
||||
ac_state.size = 0xffffffff;
|
||||
ac_state.format = PIPE_FORMAT_R32_FLOAT;
|
||||
for (int i = 0; i < 4; i++)
|
||||
ac_state.swizzle[i] = PIPE_SWIZZLE_0;
|
||||
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
||||
ac_state.element_size = program->gfx_level <= GFX8 ? 1u : 0u;
|
||||
ac_state.index_stride = program->wave_size == 64 ? 3u : 2u;
|
||||
ac_state.add_tid = true;
|
||||
ac_state.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW;
|
||||
|
||||
ac_build_buffer_descriptor(program->gfx_level, &ac_state, desc);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
|
||||
Operand::c32(desc[2]), Operand::c32(desc[3]));
|
||||
}
|
||||
|
||||
} // namespace aco
|
||||
|
|
|
|||
|
|
@ -2356,6 +2356,9 @@ RegisterDemand get_addr_regs_from_waves(Program* program, uint16_t waves);
|
|||
|
||||
bool uses_scratch(Program* program);
|
||||
|
||||
Temp load_scratch_resource(Program* program, Builder& bld, unsigned resume_idx,
|
||||
bool apply_scratch_offset);
|
||||
|
||||
inline bool
|
||||
dominates_logical(const Block& parent, const Block& child)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1136,60 +1136,6 @@ spill_block(spill_ctx& ctx, unsigned block_idx)
|
|||
}
|
||||
}
|
||||
|
||||
Temp
|
||||
load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
|
||||
{
|
||||
Temp private_segment_buffer;
|
||||
if (!ctx.program->private_segment_buffers.empty())
|
||||
private_segment_buffer = ctx.program->private_segment_buffers[ctx.resume_idx];
|
||||
|
||||
if (!private_segment_buffer.bytes()) {
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
|
||||
private_segment_buffer =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
|
||||
}
|
||||
|
||||
if (apply_scratch_offset) {
|
||||
Temp addr_lo = bld.tmp(s1);
|
||||
Temp addr_hi = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(addr_lo), Definition(addr_hi),
|
||||
private_segment_buffer);
|
||||
|
||||
Temp carry = bld.tmp(s1);
|
||||
addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
|
||||
ctx.program->scratch_offsets[ctx.resume_idx]);
|
||||
addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
|
||||
Operand::c32(0), bld.scc(carry));
|
||||
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
}
|
||||
|
||||
struct ac_buffer_state ac_state = {0};
|
||||
uint32_t desc[4];
|
||||
|
||||
ac_state.size = 0xffffffff;
|
||||
ac_state.format = PIPE_FORMAT_R32_FLOAT;
|
||||
for (int i = 0; i < 4; i++)
|
||||
ac_state.swizzle[i] = PIPE_SWIZZLE_0;
|
||||
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
||||
ac_state.element_size = ctx.program->gfx_level <= GFX8 ? 1u : 0u;
|
||||
ac_state.index_stride = ctx.program->wave_size == 64 ? 3u : 2u;
|
||||
ac_state.add_tid = true;
|
||||
ac_state.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW;
|
||||
|
||||
ac_build_buffer_descriptor(ctx.program->gfx_level, &ac_state, desc);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
|
||||
Operand::c32(desc[2]), Operand::c32(desc[3]));
|
||||
}
|
||||
|
||||
void
|
||||
setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
|
||||
std::vector<aco_ptr<Instruction>>& instructions, uint32_t spill_slot,
|
||||
|
|
@ -1254,7 +1200,7 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
|
|||
}
|
||||
} else {
|
||||
if (ctx.scratch_rsrc == Temp())
|
||||
ctx.scratch_rsrc = load_scratch_resource(ctx, rsrc_bld, overflow);
|
||||
ctx.scratch_rsrc = load_scratch_resource(ctx.program, rsrc_bld, ctx.resume_idx, overflow);
|
||||
|
||||
if (overflow) {
|
||||
uint32_t soffset =
|
||||
|
|
|
|||
|
|
@ -3235,43 +3235,6 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
}
|
||||
|
||||
Temp
|
||||
get_scratch_resource(isel_context* ctx)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp scratch_addr;
|
||||
if (!ctx->program->private_segment_buffers.empty())
|
||||
scratch_addr = ctx->program->private_segment_buffers.back();
|
||||
if (!scratch_addr.bytes()) {
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (ctx->stage.hw != AC_HW_COMPUTE_SHADER) {
|
||||
scratch_addr =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
|
||||
}
|
||||
|
||||
struct ac_buffer_state ac_state = {0};
|
||||
uint32_t desc[4];
|
||||
|
||||
ac_state.size = 0xffffffff;
|
||||
ac_state.format = PIPE_FORMAT_R32_FLOAT;
|
||||
for (int i = 0; i < 4; i++)
|
||||
ac_state.swizzle[i] = PIPE_SWIZZLE_0;
|
||||
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
||||
ac_state.element_size = ctx->program->gfx_level <= GFX8 ? 1u : 0u;
|
||||
ac_state.index_stride = ctx->program->wave_size == 64 ? 3u : 2u;
|
||||
ac_state.add_tid = true;
|
||||
ac_state.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW;
|
||||
|
||||
ac_build_buffer_descriptor(ctx->program->gfx_level, &ac_state, desc);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand::c32(desc[2]),
|
||||
Operand::c32(desc[3]));
|
||||
}
|
||||
|
||||
void
|
||||
visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
|
|
@ -3297,7 +3260,8 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
params.max_const_offset = ctx->program->dev.scratch_global_offset_max;
|
||||
emit_load(ctx, bld, info, params);
|
||||
} else {
|
||||
info.resource = get_scratch_resource(ctx);
|
||||
info.resource = load_scratch_resource(
|
||||
ctx->program, bld, ctx->program->private_segment_buffers.size() - 1, false);
|
||||
info.offset = Operand(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)));
|
||||
info.soffset = ctx->program->scratch_offsets.back();
|
||||
emit_load(ctx, bld, info, scratch_mubuf_load_params);
|
||||
|
|
@ -3351,7 +3315,8 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
memory_sync_info(storage_scratch, semantic_private));
|
||||
}
|
||||
} else {
|
||||
Temp rsrc = get_scratch_resource(ctx);
|
||||
Temp rsrc = load_scratch_resource(ctx->program, bld,
|
||||
ctx->program->private_segment_buffers.size() - 1, false);
|
||||
offset = as_vgpr(ctx, offset);
|
||||
for (unsigned i = 0; i < write_count; i++) {
|
||||
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue