mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco/gfx12: increase maximum vbuffer offset
fossil-db (gfx1201): Totals from 301 (0.38% of 79377) affected shaders: Instrs: 2734478 -> 2728816 (-0.21%); split: -0.21%, +0.00% CodeSize: 14347476 -> 14306568 (-0.29%) Latency: 15508055 -> 15502202 (-0.04%); split: -0.04%, +0.00% InvThroughput: 2846419 -> 2842387 (-0.14%); split: -0.14%, +0.00% VClause: 68286 -> 68101 (-0.27%); split: -0.30%, +0.03% SClause: 49487 -> 49500 (+0.03%) Copies: 207179 -> 206093 (-0.52%); split: -0.57%, +0.04% Branches: 72941 -> 72942 (+0.00%); split: -0.00%, +0.00% VALU: 1549156 -> 1544727 (-0.29%); split: -0.29%, +0.00% SALU: 339620 -> 338989 (-0.19%); split: -0.19%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34730>
This commit is contained in:
parent
d987d5e341
commit
6338ed44c5
5 changed files with 45 additions and 27 deletions
|
|
@ -4783,7 +4783,7 @@ lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
|
|||
if (bld.program->gfx_level >= GFX9)
|
||||
max_const_offset_plus_one = bld.program->dev.scratch_global_offset_max;
|
||||
else if (bld.program->gfx_level == GFX6)
|
||||
max_const_offset_plus_one = 4096; /* MUBUF has a 12-bit unsigned offset field */
|
||||
max_const_offset_plus_one = bld.program->dev.buf_offset_max + 1;
|
||||
uint64_t excess_offset = const_offset - (const_offset % max_const_offset_plus_one);
|
||||
const_offset %= max_const_offset_plus_one;
|
||||
|
||||
|
|
@ -5297,9 +5297,10 @@ create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_t
|
|||
inline unsigned
|
||||
resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_offset)
|
||||
{
|
||||
if (const_offset >= 4096) {
|
||||
unsigned excess_const_offset = const_offset / 4096u * 4096u;
|
||||
const_offset %= 4096u;
|
||||
uint32_t limit = bld.program->dev.buf_offset_max + 1;
|
||||
if (const_offset >= limit) {
|
||||
unsigned excess_const_offset = const_offset / limit * limit;
|
||||
const_offset %= limit;
|
||||
|
||||
if (!voffset.id())
|
||||
voffset = bld.copy(bld.def(v1), Operand::c32(excess_const_offset));
|
||||
|
|
@ -6990,14 +6991,18 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
info.component_stride = can_split ? vtx_info->chan_byte_size : 0;
|
||||
info.split_by_component_stride = false;
|
||||
|
||||
emit_load(ctx, bld, info, mtbuf_load_params);
|
||||
EmitLoadParameters params = mtbuf_load_params;
|
||||
params.max_const_offset_plus_one = ctx->program->dev.buf_offset_max + 1;
|
||||
emit_load(ctx, bld, info, params);
|
||||
} else {
|
||||
assert(intrin->intrinsic == nir_intrinsic_load_buffer_amd);
|
||||
|
||||
if (nir_intrinsic_access(intrin) & ACCESS_USES_FORMAT_AMD) {
|
||||
assert(!swizzled);
|
||||
|
||||
emit_load(ctx, bld, info, mubuf_load_format_params);
|
||||
EmitLoadParameters params = mubuf_load_format_params;
|
||||
params.max_const_offset_plus_one = ctx->program->dev.buf_offset_max + 1;
|
||||
emit_load(ctx, bld, info, params);
|
||||
} else {
|
||||
const unsigned swizzle_element_size =
|
||||
swizzled ? (ctx->program->gfx_level <= GFX8 ? 4 : 16) : 0;
|
||||
|
|
@ -7007,7 +7012,9 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
info.align_mul = align_mul;
|
||||
info.align_offset = align_offset;
|
||||
|
||||
emit_load(ctx, bld, info, mubuf_load_params);
|
||||
EmitLoadParameters params = mubuf_load_params;
|
||||
params.max_const_offset_plus_one = ctx->program->dev.buf_offset_max + 1;
|
||||
emit_load(ctx, bld, info, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -183,6 +183,11 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->dev.scratch_global_offset_max = 4095;
|
||||
}
|
||||
|
||||
if (program->gfx_level >= GFX12)
|
||||
program->dev.buf_offset_max = 0x7fffff;
|
||||
else
|
||||
program->dev.buf_offset_max = 0xfff;
|
||||
|
||||
if (program->gfx_level >= GFX12)
|
||||
program->dev.smem_offset_max = 0x7fffff;
|
||||
else if (program->gfx_level >= GFX8)
|
||||
|
|
|
|||
|
|
@ -1627,15 +1627,14 @@ static_assert(sizeof(LDSDIR_instruction) == sizeof(Instruction) + 8, "Unexpected
|
|||
struct MUBUF_instruction : public Instruction {
|
||||
memory_sync_info sync;
|
||||
ac_hw_cache_flags cache;
|
||||
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
|
||||
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||
bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
|
||||
bool tfe : 1; /* texture fail enable */
|
||||
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint8_t padding0 : 2;
|
||||
uint8_t padding1;
|
||||
uint16_t offset; /* Unsigned byte offset - 12 bit */
|
||||
uint32_t offset : 23; /* Unsigned byte offset */
|
||||
uint32_t offen : 1; /* Supply an offset from VGPR (VADDR) */
|
||||
uint32_t idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||
uint32_t addr64 : 1; /* SI, CIK: Address size is 64-bit */
|
||||
uint32_t tfe : 1; /* texture fail enable */
|
||||
uint32_t lds : 1; /* Return read-data to LDS instead of VGPRs */
|
||||
uint32_t disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint32_t padding : 3;
|
||||
};
|
||||
static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
|
|
@ -1656,10 +1655,12 @@ struct MTBUF_instruction : public Instruction {
|
|||
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||
bool tfe : 1; /* texture fail enable */
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint8_t padding : 5;
|
||||
uint16_t offset; /* Unsigned byte offset - 12 bit */
|
||||
uint8_t padding0 : 5;
|
||||
uint16_t padding1;
|
||||
uint32_t offset : 23; /* Unsigned byte offset */
|
||||
uint32_t padding2 : 9;
|
||||
};
|
||||
static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 12, "Unexpected padding");
|
||||
|
||||
/**
|
||||
* Vector Memory Image Instructions
|
||||
|
|
@ -2105,6 +2106,7 @@ struct DeviceInfo {
|
|||
int32_t scratch_global_offset_max;
|
||||
unsigned max_nsa_vgprs;
|
||||
|
||||
uint32_t buf_offset_max;
|
||||
/* Note that GFX6/7 ignore the low 2 bits and this is only for positive offsets. */
|
||||
uint32_t smem_offset_max;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1499,35 +1499,38 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
* MUBUF accesses. */
|
||||
bool vaddr_prevent_overflow = swizzled && ctx.program->gfx_level < GFX9;
|
||||
|
||||
uint32_t const_max = ctx.program->dev.buf_offset_max;
|
||||
|
||||
if (mubuf.offen && mubuf.idxen && i == 1 && info.is_vec() &&
|
||||
info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() &&
|
||||
info.instr->operands[0].regClass() == v1 && info.instr->operands[1].isConstant() &&
|
||||
mubuf.offset + info.instr->operands[1].constantValue() < 4096) {
|
||||
mubuf.offset + info.instr->operands[1].constantValue() <= const_max) {
|
||||
instr->operands[1] = info.instr->operands[0];
|
||||
mubuf.offset += info.instr->operands[1].constantValue();
|
||||
mubuf.offen = false;
|
||||
continue;
|
||||
} else if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) &&
|
||||
mubuf.offset + info.val < 4096) {
|
||||
mubuf.offset + info.val <= const_max) {
|
||||
assert(!mubuf.idxen);
|
||||
instr->operands[1] = Operand(v1);
|
||||
mubuf.offset += info.val;
|
||||
mubuf.offen = false;
|
||||
continue;
|
||||
} else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
|
||||
} else if (i == 2 && info.is_constant_or_literal(32) &&
|
||||
mubuf.offset + info.val <= const_max) {
|
||||
instr->operands[2] = Operand::c32(0);
|
||||
mubuf.offset += info.val;
|
||||
continue;
|
||||
} else if (mubuf.offen && i == 1 &&
|
||||
parse_base_offset(ctx, instr.get(), i, &base, &offset,
|
||||
vaddr_prevent_overflow) &&
|
||||
base.regClass() == v1 && mubuf.offset + offset < 4096) {
|
||||
base.regClass() == v1 && mubuf.offset + offset <= const_max) {
|
||||
assert(!mubuf.idxen);
|
||||
instr->operands[1].setTemp(base);
|
||||
mubuf.offset += offset;
|
||||
continue;
|
||||
} else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
|
||||
base.regClass() == s1 && mubuf.offset + offset < 4096 && !swizzled) {
|
||||
base.regClass() == s1 && mubuf.offset + offset <= const_max && !swizzled) {
|
||||
instr->operands[i].setTemp(base);
|
||||
mubuf.offset += offset;
|
||||
continue;
|
||||
|
|
@ -1542,7 +1545,8 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (mtbuf.offen && mtbuf.idxen && i == 1 && info.is_vec() &&
|
||||
info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() &&
|
||||
info.instr->operands[0].regClass() == v1 && info.instr->operands[1].isConstant() &&
|
||||
mtbuf.offset + info.instr->operands[1].constantValue() < 4096) {
|
||||
mtbuf.offset + info.instr->operands[1].constantValue() <=
|
||||
ctx.program->dev.buf_offset_max) {
|
||||
instr->operands[1] = info.instr->operands[0];
|
||||
mtbuf.offset += info.instr->operands[1].constantValue();
|
||||
mtbuf.offen = false;
|
||||
|
|
|
|||
|
|
@ -1202,8 +1202,8 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
|
|||
offset_range =
|
||||
ctx.program->dev.scratch_global_offset_max - ctx.program->dev.scratch_global_offset_min;
|
||||
} else {
|
||||
if (scratch_size < 4095)
|
||||
offset_range = 4095 - scratch_size;
|
||||
if (scratch_size < ctx.program->dev.buf_offset_max)
|
||||
offset_range = ctx.program->dev.buf_offset_max - scratch_size;
|
||||
else
|
||||
offset_range = 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue