mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
aco/gfx6: allow both constant and gpr offset for global with sgpr address
fossil-db (pitcairn): Totals from 81 (0.13% of 62069) affected shaders: MaxWaves: 332 -> 335 (+0.90%) Instrs: 150087 -> 149737 (-0.23%); split: -0.30%, +0.06% CodeSize: 754636 -> 752712 (-0.25%); split: -0.31%, +0.06% SGPRs: 6128 -> 6184 (+0.91%) VGPRs: 7220 -> 7208 (-0.17%); split: -0.28%, +0.11% SpillSGPRs: 288 -> 287 (-0.35%) Latency: 2199197 -> 2198338 (-0.04%); split: -0.20%, +0.17% InvThroughput: 1613474 -> 1614303 (+0.05%); split: -0.07%, +0.12% VClause: 2905 -> 2862 (-1.48%); split: -2.34%, +0.86% SClause: 2366 -> 2378 (+0.51%); split: -0.17%, +0.68% Copies: 17312 -> 17264 (-0.28%); split: -1.03%, +0.76% PreSGPRs: 5080 -> 5004 (-1.50%) PreVGPRs: 5656 -> 5640 (-0.28%) VALU: 114097 -> 113831 (-0.23%); split: -0.31%, +0.07% SALU: 16004 -> 15944 (-0.37%); split: -0.41%, +0.04% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35465>
This commit is contained in:
parent
684943bd1f
commit
d7dcd81c77
1 changed files with 11 additions and 7 deletions
|
|
@ -261,6 +261,7 @@ struct LoadEmitInfo {
|
|||
unsigned align_offset = 0;
|
||||
pipe_format format;
|
||||
nir_src* offset_src = NULL; /* should be equal to offset or NULL */
|
||||
isel_context* ctx;
|
||||
|
||||
ac_hw_cache_flags cache = {{0, 0, 0, 0, 0}};
|
||||
bool split_by_component_stride = true;
|
||||
|
|
@ -315,6 +316,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
|||
|
||||
/* reduce constant offset */
|
||||
LoadEmitInfo new_info = info;
|
||||
new_info.ctx = ctx;
|
||||
Operand offset = info.offset;
|
||||
unsigned reduced_const_offset = const_offset;
|
||||
if (const_offset > params.max_const_offset) {
|
||||
|
|
@ -817,8 +819,8 @@ get_gfx6_global_rsrc(Builder& bld, Temp addr)
|
|||
}
|
||||
|
||||
void
|
||||
lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
|
||||
uint32_t* const_offset_inout, Temp* offset_inout)
|
||||
lower_global_address(isel_context* ctx, Builder& bld, uint32_t offset_in, Temp* address_inout,
|
||||
uint32_t* const_offset_inout, Temp* offset_inout, nir_src* offset_src)
|
||||
{
|
||||
Temp address = *address_inout;
|
||||
uint64_t const_offset = *const_offset_inout + offset_in;
|
||||
|
|
@ -857,9 +859,10 @@ lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
|
|||
if (bld.program->gfx_level == GFX6) {
|
||||
/* GFX6 (MUBUF): (SGPR address, SGPR offset) or (SGPR address, VGPR offset) */
|
||||
/* GFX6 (MUBUF-addr64): (VGPR address, SGPR offset) */
|
||||
/* Disallow SGPR address with both a const_offset and offset because of possible overflow. */
|
||||
/* Disallow SGPR address with both a const_offset and offset in case of possible overflow. */
|
||||
if (offset.id() &&
|
||||
(address.type() == RegType::vgpr ? offset.type() != RegType::sgpr : const_offset > 0)) {
|
||||
(address.type() == RegType::vgpr ? offset.type() != RegType::sgpr
|
||||
: add_might_overflow(ctx, offset_src, const_offset))) {
|
||||
address = add64_32(bld, address, Operand(offset));
|
||||
offset = Temp();
|
||||
}
|
||||
|
|
@ -898,7 +901,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_need
|
|||
offset = Temp();
|
||||
}
|
||||
uint32_t const_offset = info.const_offset;
|
||||
lower_global_address(bld, 0, &addr, &const_offset, &offset);
|
||||
lower_global_address(info.ctx, bld, 0, &addr, &const_offset, &offset, info.offset_src);
|
||||
|
||||
unsigned bytes_size = 0;
|
||||
bool use_mubuf = bld.program->gfx_level == GFX6;
|
||||
|
|
@ -2518,7 +2521,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp write_address = addr;
|
||||
uint32_t write_const_offset = const_offset;
|
||||
Temp write_offset = offset;
|
||||
lower_global_address(bld, offsets[i], &write_address, &write_const_offset, &write_offset);
|
||||
lower_global_address(ctx, bld, offsets[i], &write_address, &write_const_offset, &write_offset,
|
||||
&instr->src[2]);
|
||||
|
||||
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_STORE;
|
||||
if (write_datas[i].bytes() < 4)
|
||||
|
|
@ -2613,7 +2617,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp addr, offset;
|
||||
uint32_t const_offset;
|
||||
parse_global(ctx, instr, &addr, &const_offset, &offset);
|
||||
lower_global_address(bld, 0, &addr, &const_offset, &offset);
|
||||
lower_global_address(ctx, bld, 0, &addr, &const_offset, &offset, &instr->src[2]);
|
||||
|
||||
if (ctx->options->gfx_level >= GFX7) {
|
||||
bool global = ctx->options->gfx_level >= GFX9;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue