mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 04:10:09 +01:00
aco: avoid unaligned offsets when selecting load_global_amd
SMEM instructions mask off the low bits for the base and offset sources both before and after they're added. However, NIR expects ACO to only care about the alignment of the final address. fossil-db (gfx1201): Totals from 21 (0.03% of 79839) affected shaders: Instrs: 229780 -> 229876 (+0.04%) CodeSize: 1267724 -> 1268080 (+0.03%) Latency: 2800924 -> 2800978 (+0.00%) InvThroughput: 520250 -> 520256 (+0.00%) Copies: 27878 -> 27876 (-0.01%); split: -0.01%, +0.00% SALU: 29591 -> 29643 (+0.18%) fossil-db (polaris10): Totals from 3 (0.00% of 62201) affected shaders: Latency: 2651 -> 2652 (+0.04%) InvThroughput: 662 -> 663 (+0.15%) PreSGPRs: 51 -> 54 (+5.88%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37301>
This commit is contained in:
parent
6d71521ecd
commit
81df517553
3 changed files with 39 additions and 1 deletions
|
|
@ -138,6 +138,7 @@ struct isel_context {
|
|||
|
||||
/* NIR range analysis. */
|
||||
struct hash_table* range_ht;
|
||||
struct hash_table* numlsb_ht;
|
||||
|
||||
Temp arg_temps[AC_MAX_ARGS];
|
||||
Operand workgroup_id[3];
|
||||
|
|
|
|||
|
|
@ -375,6 +375,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
|
||||
/* Init NIR range analysis. */
|
||||
ctx->range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
ctx->numlsb_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
uint32_t options =
|
||||
shader->options->divergence_analysis_options | nir_divergence_ignore_undef_if_phi_srcs;
|
||||
|
|
@ -728,6 +729,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
void
|
||||
cleanup_context(isel_context* ctx)
|
||||
{
|
||||
_mesa_hash_table_destroy(ctx->numlsb_ht, NULL);
|
||||
_mesa_hash_table_destroy(ctx->range_ht, NULL);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@
|
|||
#include "aco_instruction_selection.h"
|
||||
#include "aco_ir.h"
|
||||
|
||||
#include "nir_range_analysis.h"
|
||||
|
||||
#include "ac_descriptors.h"
|
||||
#include "ac_nir.h"
|
||||
#include "amdgfxregs.h"
|
||||
|
|
@ -281,6 +283,7 @@ struct LoadEmitInfo {
|
|||
unsigned align_mul = 0;
|
||||
unsigned align_offset = 0;
|
||||
pipe_format format;
|
||||
nir_src* resource_src = NULL; /* should be equal to resource or NULL */
|
||||
nir_src* offset_src = NULL; /* should be equal to offset or NULL */
|
||||
isel_context* ctx;
|
||||
|
||||
|
|
@ -350,6 +353,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
|||
if (new_info.resource.id() && new_info.resource.size() == 2 &&
|
||||
add_might_overflow(ctx, info.offset_src, to_add)) {
|
||||
new_info.resource = add64_32(bld, new_info.resource, Operand::c32(to_add));
|
||||
new_info.resource_src = NULL;
|
||||
offset_changed = false;
|
||||
} else if (offset.isConstant()) {
|
||||
offset = Operand::c32(offset.constantValue() + to_add);
|
||||
|
|
@ -502,6 +506,12 @@ get_smem_opcode(amd_gfx_level level, unsigned bytes, bool buffer, bool round_dow
|
|||
return {buffer ? aco_opcode::s_buffer_load_dwordx16 : aco_opcode::s_load_dwordx16, 64};
|
||||
}
|
||||
|
||||
unsigned
|
||||
src_has_req_lsb(isel_context* ctx, nir_src* src, unsigned req)
|
||||
{
|
||||
return src && nir_def_num_lsb_zero(ctx->numlsb_ht, nir_get_scalar(src->ssa, 0)) >= req;
|
||||
}
|
||||
|
||||
Temp
|
||||
smem_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_needed, unsigned align)
|
||||
{
|
||||
|
|
@ -540,6 +550,30 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_needed
|
|||
*/
|
||||
RegClass rc(RegType::sgpr, DIV_ROUND_UP(util_next_power_of_two(bytes_needed), 4u));
|
||||
|
||||
unsigned req_lsb_zero = bytes_needed == 1 ? 0 : (bytes_needed == 2 ? 1 : 2);
|
||||
if (!buffer) {
|
||||
/* We require each offset source and the final address to be aligned, so ensure at least
|
||||
* two sources are aligned. The remaining one can then be assumed to be aligned, otherwise the
|
||||
* final address is unaligned. */
|
||||
// TODO: lower in NIR
|
||||
bool addr_aligned = src_has_req_lsb(info.ctx, info.resource_src, req_lsb_zero);
|
||||
bool offset_aligned =
|
||||
!offset.id() || src_has_req_lsb(info.ctx, info.offset_src, req_lsb_zero);
|
||||
bool const_aligned = !const_offset || ffs(const_offset) > req_lsb_zero;
|
||||
|
||||
if (!offset_aligned && (!addr_aligned || !const_aligned)) {
|
||||
addr = add64_32(bld, addr, Operand(offset));
|
||||
offset = Temp();
|
||||
}
|
||||
if (!const_aligned && (!addr_aligned || !offset_aligned)) {
|
||||
addr = add64_32(bld, addr, Operand::c32(const_offset));
|
||||
const_offset = 0;
|
||||
}
|
||||
} else {
|
||||
/* We assume the buffer resource is also aligned. */
|
||||
assert(!const_offset || ffs(const_offset) > req_lsb_zero);
|
||||
}
|
||||
|
||||
bool soe = !buffer && offset.id() && const_offset && bld.program->gfx_level >= GFX9;
|
||||
aco_ptr<Instruction> load{create_instruction(op, Format::SMEM, 2 + soe, 1)};
|
||||
if (buffer) {
|
||||
|
|
@ -2334,7 +2368,8 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
info.align_mul = nir_intrinsic_align_mul(instr);
|
||||
info.align_offset = nir_intrinsic_align_offset(instr);
|
||||
info.sync = get_memory_sync_info(instr, storage_buffer, 0);
|
||||
info.offset_src = &instr->src[1];
|
||||
info.resource_src = &instr->src[0];
|
||||
info.offset_src = offset.id() ? &instr->src[1] : NULL;
|
||||
info.cache = get_cache_flags(ctx, access, ac_access_type_load);
|
||||
info.disable_wqm = access & ACCESS_SKIP_HELPERS;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue