mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
ac/nir: allow less than one register of overfetch
This is to allow vectorization of 8/16-bit loads, which can later be
cheaply lowered to a 32-bit load.
fossil-db (gfx1201):
Totals from 178 (0.22% of 79377) affected shaders:
MaxWaves: 4138 -> 4102 (-0.87%)
Instrs: 619714 -> 617917 (-0.29%); split: -0.32%, +0.03%
CodeSize: 3364396 -> 3352724 (-0.35%); split: -0.38%, +0.03%
VGPRs: 12896 -> 12980 (+0.65%); split: -0.19%, +0.84%
SpillSGPRs: 546 -> 545 (-0.18%)
Latency: 7589585 -> 7406076 (-2.42%); split: -2.45%, +0.04%
InvThroughput: 1926356 -> 1879866 (-2.41%); split: -2.42%, +0.00%
VClause: 12301 -> 11750 (-4.48%)
SClause: 13614 -> 13583 (-0.23%); split: -0.45%, +0.22%
Copies: 82207 -> 82265 (+0.07%); split: -0.10%, +0.17%
Branches: 19284 -> 19266 (-0.09%)
PreSGPRs: 9525 -> 9457 (-0.71%)
PreVGPRs: 12366 -> 12421 (+0.44%)
VALU: 347928 -> 348020 (+0.03%); split: -0.01%, +0.04%
SALU: 82620 -> 82519 (-0.12%); split: -0.19%, +0.07%
VMEM: 22248 -> 21430 (-3.68%)
SMEM: 17951 -> 17843 (-0.60%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34162>
This commit is contained in:
parent
ddef4bddf8
commit
6dbf44ad9c
1 changed files with 6 additions and 5 deletions
|
|
@ -575,13 +575,14 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
|
|||
if (config->uses_aco && uses_smem && aligned_new_size >= 128)
|
||||
overfetch_size = 32;
|
||||
|
||||
/* Allow overfetching from 8/16 bits to 32 bits. */
|
||||
int64_t aligned_unvectorized_size =
|
||||
align_load_store_size(config->gfx_level, low->num_components * low->def.bit_size,
|
||||
uses_smem, is_shared) +
|
||||
align_load_store_size(config->gfx_level, high->num_components * high->def.bit_size,
|
||||
uses_smem, is_shared);
|
||||
ALIGN_POT(align_load_store_size(config->gfx_level, low->num_components * low->def.bit_size,
|
||||
uses_smem, is_shared), 32) +
|
||||
ALIGN_POT(align_load_store_size(config->gfx_level, high->num_components * high->def.bit_size,
|
||||
uses_smem, is_shared), 32);
|
||||
|
||||
if (aligned_new_size > aligned_unvectorized_size + overfetch_size)
|
||||
if (ALIGN_POT(aligned_new_size, 32) > aligned_unvectorized_size + overfetch_size)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue