mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
ac/nir_lower_mem_access_bit_sizes: Split unsupported shared memory instructions
Totals from 1400 (1.75% of 79839) affected shaders: (Navi48) MaxWaves: 38313 -> 38317 (+0.01%); split: +0.06%, -0.05% Instrs: 1162521 -> 1199627 (+3.19%); split: -0.01%, +3.20% CodeSize: 5874288 -> 6146832 (+4.64%); split: -0.01%, +4.65% VGPRs: 79948 -> 79984 (+0.05%); split: -0.12%, +0.17% Latency: 3703961 -> 3741457 (+1.01%); split: -0.02%, +1.04% InvThroughput: 589594 -> 590597 (+0.17%); split: -0.06%, +0.23% VClause: 22561 -> 22564 (+0.01%) SClause: 19615 -> 19611 (-0.02%); split: -0.03%, +0.01% Copies: 70721 -> 71678 (+1.35%); split: -0.25%, +1.60% PreVGPRs: 61068 -> 61101 (+0.05%); split: -0.00%, +0.06% VALU: 651754 -> 651785 (+0.00%); split: -0.07%, +0.07% SALU: 141953 -> 141955 (+0.00%) VOPD: 489 -> 485 (-0.82%); split: +0.41%, -1.23% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36133>
This commit is contained in:
parent
63f7a03dd1
commit
52cd5f7e69
1 changed files with 15 additions and 7 deletions
|
|
@ -4,6 +4,7 @@
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "util/blake3/blake3_impl.h"
|
||||||
#include "ac_nir.h"
|
#include "ac_nir.h"
|
||||||
#include "ac_nir_helpers.h"
|
#include "ac_nir_helpers.h"
|
||||||
|
|
||||||
|
|
@ -62,6 +63,20 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
||||||
const bool is_load = nir_intrinsic_infos[intrin].has_dest;
|
const bool is_load = nir_intrinsic_infos[intrin].has_dest;
|
||||||
const bool is_smem = intrin == nir_intrinsic_load_push_constant || (access & ACCESS_SMEM_AMD);
|
const bool is_smem = intrin == nir_intrinsic_load_push_constant || (access & ACCESS_SMEM_AMD);
|
||||||
const uint32_t combined_align = nir_combined_align(align_mul, align_offset);
|
const uint32_t combined_align = nir_combined_align(align_mul, align_offset);
|
||||||
|
nir_mem_access_size_align res;
|
||||||
|
|
||||||
|
if (intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared) {
|
||||||
|
/* Split unsupported shared access. */
|
||||||
|
res.bit_size = MIN2(bit_size, combined_align * 8ull);
|
||||||
|
res.align = res.bit_size / 8;
|
||||||
|
/* Don't use >64-bit LDS loads for performance reasons. */
|
||||||
|
unsigned max_bytes = intrin == nir_intrinsic_store_shared && cb_data->gfx_level >= GFX7 ? 16 : 8;
|
||||||
|
bytes = MIN3(bytes, combined_align, max_bytes);
|
||||||
|
bytes = bytes == 12 ? bytes : round_down_to_power_of_2(bytes);
|
||||||
|
res.num_components = bytes / res.align;
|
||||||
|
res.shift = nir_mem_access_shift_method_bytealign_amd;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
/* Make 8-bit accesses 16-bit if possible */
|
/* Make 8-bit accesses 16-bit if possible */
|
||||||
if (is_load && bit_size == 8 && combined_align >= 2 && bytes % 2 == 0)
|
if (is_load && bit_size == 8 && combined_align >= 2 && bytes % 2 == 0)
|
||||||
|
|
@ -79,18 +94,11 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
||||||
else if (is_smem)
|
else if (is_smem)
|
||||||
max_components = MIN2(512 / bit_size, 16);
|
max_components = MIN2(512 / bit_size, 16);
|
||||||
|
|
||||||
nir_mem_access_size_align res;
|
|
||||||
res.num_components = MIN2(DIV_ROUND_UP(bytes, bit_size / 8), max_components);
|
res.num_components = MIN2(DIV_ROUND_UP(bytes, bit_size / 8), max_components);
|
||||||
res.bit_size = bit_size;
|
res.bit_size = bit_size;
|
||||||
res.align = MIN2(bit_size / 8, 4); /* 64-bit access only requires 4 byte alignment. */
|
res.align = MIN2(bit_size / 8, 4); /* 64-bit access only requires 4 byte alignment. */
|
||||||
res.shift = nir_mem_access_shift_method_shift64;
|
res.shift = nir_mem_access_shift_method_shift64;
|
||||||
|
|
||||||
if ((intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared)) {
|
|
||||||
/* Split unaligned shared access to create more read2/write2. */
|
|
||||||
if (combined_align < 16 && bytes < 16)
|
|
||||||
res.num_components = MIN2(res.num_components, 64 / bit_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is_load)
|
if (!is_load)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue