mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
amd/lower_mem_access_bit_sizes: also use SMEM for subdword loads
We can simply extract from the loaded dwords as per nir_lower_mem_access_bit_sizes() lowering. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37843>
This commit is contained in:
parent
fbf0399517
commit
8ff44f17ef
5 changed files with 5 additions and 13 deletions
|
|
@ -413,7 +413,7 @@ bool
|
||||||
ac_nir_opt_shared_append(nir_shader *shader);
|
ac_nir_opt_shared_append(nir_shader *shader);
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering);
|
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
|
ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@
|
||||||
typedef struct {
|
typedef struct {
|
||||||
enum amd_gfx_level gfx_level;
|
enum amd_gfx_level gfx_level;
|
||||||
bool use_llvm;
|
bool use_llvm;
|
||||||
bool after_lowering;
|
|
||||||
} mem_access_cb_data;
|
} mem_access_cb_data;
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -39,12 +38,6 @@ use_smem_for_load(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data_)
|
||||||
if (intrin->def.divergent)
|
if (intrin->def.divergent)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* ACO doesn't support instruction selection for multi-component 8/16-bit SMEM loads. */
|
|
||||||
const bool supports_scalar_subdword = cb_data->gfx_level >= GFX12 && !cb_data->use_llvm;
|
|
||||||
if (cb_data->after_lowering && intrin->def.bit_size < 32 &&
|
|
||||||
(intrin->def.num_components > 1 || !supports_scalar_subdword))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||||
bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
|
bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
|
||||||
bool reorder = nir_intrinsic_can_reorder(intrin) || ((access & ACCESS_NON_WRITEABLE) && !(access & ACCESS_VOLATILE));
|
bool reorder = nir_intrinsic_can_reorder(intrin) || ((access & ACCESS_NON_WRITEABLE) && !(access & ACCESS_VOLATILE));
|
||||||
|
|
@ -169,12 +162,11 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering)
|
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm)
|
||||||
{
|
{
|
||||||
mem_access_cb_data cb_data = {
|
mem_access_cb_data cb_data = {
|
||||||
.gfx_level = gfx_level,
|
.gfx_level = gfx_level,
|
||||||
.use_llvm = use_llvm,
|
.use_llvm = use_llvm,
|
||||||
.after_lowering = after_lowering,
|
|
||||||
};
|
};
|
||||||
return nir_shader_intrinsics_pass(shader, &use_smem_for_load, nir_metadata_all, &cb_data);
|
return nir_shader_intrinsics_pass(shader, &use_smem_for_load, nir_metadata_all, &cb_data);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -381,7 +381,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||||
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
|
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
|
||||||
|
|
||||||
apply_nuw_to_offsets(ctx, impl);
|
apply_nuw_to_offsets(ctx, impl);
|
||||||
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true);
|
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false);
|
||||||
|
|
||||||
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
|
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||||
nir_opt_load_skip_helpers_options skip_helper_options = {};
|
nir_opt_load_skip_helpers_options skip_helper_options = {};
|
||||||
|
|
|
||||||
|
|
@ -354,7 +354,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
||||||
NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
|
NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
|
||||||
|
|
||||||
nir_divergence_analysis(stage->nir);
|
nir_divergence_analysis(stage->nir);
|
||||||
NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm, false);
|
NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm);
|
||||||
NIR_PASS(_, stage->nir, radv_nir_opt_access_can_speculate);
|
NIR_PASS(_, stage->nir, radv_nir_opt_access_can_speculate);
|
||||||
|
|
||||||
NIR_PASS(_, stage->nir, nir_lower_memory_model);
|
NIR_PASS(_, stage->nir, nir_lower_memory_model);
|
||||||
|
|
|
||||||
|
|
@ -1627,7 +1627,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
||||||
nir_divergence_analysis(nir); /* required by ac_nir_flag_smem_for_loads */
|
nir_divergence_analysis(nir); /* required by ac_nir_flag_smem_for_loads */
|
||||||
/* This is required by ac_nir_scalarize_overfetching_loads_callback. */
|
/* This is required by ac_nir_scalarize_overfetching_loads_callback. */
|
||||||
NIR_PASS(progress, nir, ac_nir_flag_smem_for_loads, sel->screen->info.gfx_level,
|
NIR_PASS(progress, nir, ac_nir_flag_smem_for_loads, sel->screen->info.gfx_level,
|
||||||
!sel->info.base.use_aco_amd, false);
|
!sel->info.base.use_aco_amd);
|
||||||
/* Scalarize overfetching loads, so that we don't load more components than necessary.
|
/* Scalarize overfetching loads, so that we don't load more components than necessary.
|
||||||
* Adjacent loads will be re-vectorized with a conservative overfetching limit.
|
* Adjacent loads will be re-vectorized with a conservative overfetching limit.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue