mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
ac/nir_lower_mem_access_bit_sizes: Split unsupported shared memory instructions
Totals from 1400 (1.75% of 79839) affected shaders: (Navi48) MaxWaves: 38313 -> 38317 (+0.01%); split: +0.06%, -0.05% Instrs: 1162521 -> 1199627 (+3.19%); split: -0.01%, +3.20% CodeSize: 5874288 -> 6146832 (+4.64%); split: -0.01%, +4.65% VGPRs: 79948 -> 79984 (+0.05%); split: -0.12%, +0.17% Latency: 3703961 -> 3741457 (+1.01%); split: -0.02%, +1.04% InvThroughput: 589594 -> 590597 (+0.17%); split: -0.06%, +0.23% VClause: 22561 -> 22564 (+0.01%) SClause: 19615 -> 19611 (-0.02%); split: -0.03%, +0.01% Copies: 70721 -> 71678 (+1.35%); split: -0.25%, +1.60% PreVGPRs: 61068 -> 61101 (+0.05%); split: -0.00%, +0.06% VALU: 651754 -> 651785 (+0.00%); split: -0.07%, +0.07% SALU: 141953 -> 141955 (+0.00%) VOPD: 489 -> 485 (-0.82%); split: +0.41%, -1.23% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36133>
This commit is contained in:
parent
63f7a03dd1
commit
52cd5f7e69
1 changed files with 15 additions and 7 deletions
|
|
@ -4,6 +4,7 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/blake3/blake3_impl.h"
|
||||
#include "ac_nir.h"
|
||||
#include "ac_nir_helpers.h"
|
||||
|
||||
|
|
@ -62,6 +63,20 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
const bool is_load = nir_intrinsic_infos[intrin].has_dest;
|
||||
const bool is_smem = intrin == nir_intrinsic_load_push_constant || (access & ACCESS_SMEM_AMD);
|
||||
const uint32_t combined_align = nir_combined_align(align_mul, align_offset);
|
||||
nir_mem_access_size_align res;
|
||||
|
||||
if (intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared) {
|
||||
/* Split unsupported shared access. */
|
||||
res.bit_size = MIN2(bit_size, combined_align * 8ull);
|
||||
res.align = res.bit_size / 8;
|
||||
/* Don't use >64-bit LDS loads for performance reasons. */
|
||||
unsigned max_bytes = intrin == nir_intrinsic_store_shared && cb_data->gfx_level >= GFX7 ? 16 : 8;
|
||||
bytes = MIN3(bytes, combined_align, max_bytes);
|
||||
bytes = bytes == 12 ? bytes : round_down_to_power_of_2(bytes);
|
||||
res.num_components = bytes / res.align;
|
||||
res.shift = nir_mem_access_shift_method_bytealign_amd;
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Make 8-bit accesses 16-bit if possible */
|
||||
if (is_load && bit_size == 8 && combined_align >= 2 && bytes % 2 == 0)
|
||||
|
|
@ -79,18 +94,11 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
else if (is_smem)
|
||||
max_components = MIN2(512 / bit_size, 16);
|
||||
|
||||
nir_mem_access_size_align res;
|
||||
res.num_components = MIN2(DIV_ROUND_UP(bytes, bit_size / 8), max_components);
|
||||
res.bit_size = bit_size;
|
||||
res.align = MIN2(bit_size / 8, 4); /* 64-bit access only requires 4 byte alignment. */
|
||||
res.shift = nir_mem_access_shift_method_shift64;
|
||||
|
||||
if ((intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared)) {
|
||||
/* Split unaligned shared access to create more read2/write2. */
|
||||
if (combined_align < 16 && bytes < 16)
|
||||
res.num_components = MIN2(res.num_components, 64 / bit_size);
|
||||
}
|
||||
|
||||
if (!is_load)
|
||||
return res;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue