mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 08:40:11 +01:00
ac/nir: allow 8/16-bit smem loads
fossil-db (gfx1201): Totals from 295 (0.37% of 79377) affected shaders: Instrs: 314018 -> 313355 (-0.21%); split: -0.22%, +0.00% CodeSize: 1697996 -> 1696528 (-0.09%); split: -0.11%, +0.02% Latency: 4197719 -> 4197106 (-0.01%) InvThroughput: 1258891 -> 1258744 (-0.01%) PreSGPRs: 12232 -> 12230 (-0.02%) SALU: 66762 -> 66269 (-0.74%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34162>
This commit is contained in:
parent
5b116c4de9
commit
3b42626973
1 changed files with 14 additions and 6 deletions
|
|
@ -35,7 +35,13 @@ use_smem_for_load(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data_)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (intrin->def.divergent || (cb_data->after_lowering && intrin->def.bit_size < 32))
|
||||
if (intrin->def.divergent)
|
||||
return false;
|
||||
|
||||
/* ACO doesn't support instruction selection for multi-component 8/16-bit SMEM loads. */
|
||||
const bool supports_scalar_subdword = cb_data->gfx_level >= GFX12 && !cb_data->use_llvm;
|
||||
if (cb_data->after_lowering && intrin->def.bit_size < 32 &&
|
||||
(intrin->def.num_components > 1 || !supports_scalar_subdword))
|
||||
return false;
|
||||
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||
|
|
@ -83,12 +89,14 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
if (!is_load)
|
||||
return res;
|
||||
|
||||
/* Lower 8/16-bit loads to 32-bit, unless it's a VMEM scalar load. */
|
||||
/* Lower 8/16-bit loads to 32-bit, unless it's a VMEM (or SMEM on GFX12+) scalar load. */
|
||||
|
||||
const bool support_subdword = res.num_components == 1 && !is_smem &&
|
||||
(!cb_data->use_llvm || intrin != nir_intrinsic_load_ubo);
|
||||
const bool supports_scalar_subdword =
|
||||
!is_smem || (cb_data->gfx_level >= GFX12 && intrin != nir_intrinsic_load_push_constant);
|
||||
const bool supported_subdword = res.num_components == 1 && supports_scalar_subdword &&
|
||||
(!cb_data->use_llvm || intrin != nir_intrinsic_load_ubo);
|
||||
|
||||
if (res.bit_size >= 32 || support_subdword)
|
||||
if (res.bit_size >= 32 || supported_subdword)
|
||||
return res;
|
||||
|
||||
const uint32_t max_pad = 4 - MIN2(combined_align, 4);
|
||||
|
|
@ -143,4 +151,4 @@ ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_lev
|
|||
.cb_data = &cb_data,
|
||||
};
|
||||
return nir_lower_mem_access_bit_sizes(shader, &lower_mem_access_options);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue