diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index c4606738d80..581c97a4041 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -218,6 +218,39 @@ ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct a return ac_nir_unpack_value(b, value, rshift, bitwidth); } +/* This lowers small indirect array derefs to if-else trees. We might want to do this before + * ac_nir_lower_indirect_derefs() to lower small array derefs to if-else trees earlier than + * lowering large array derefs to scratch. This is because we want to do the scratch lowering + * as late as possible (because scratch access isn't very optimizable), but the if-else tree + * lowering can be optimized. For example, an indirect access where we can know that all + * elements that might be accessed are equal could be replaced with a use of that element, or + * nir_opt_peephole_select() can flatten some of the if-else tree. */ +bool +ac_nir_lower_indirect_derefs_early(nir_shader *shader) +{ + struct set vars; + _mesa_pointer_set_init(&vars, NULL); + nir_foreach_function_impl(impl, shader) { + nir_foreach_function_temp_variable(var, impl) { + unsigned var_size, var_align; + glsl_get_natural_size_align_bytes(var->type, &var_size, &var_align); + if (var_size < 256) + _mesa_set_add(&vars, var); + } + } + + bool progress = false; + if (vars.entries) + NIR_PASS(progress, shader, nir_lower_indirect_var_derefs_to_if_else_trees, &vars); + + _mesa_set_fini(&vars, NULL); + + return progress; +} + +/* This lowers all indirect array derefs to either scratch adccess or if-else trees, ensuring + * that none remains. + */ bool ac_nir_lower_indirect_derefs(nir_shader *shader) { diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index eb785c3c84a..b7e6c313e2a 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -176,6 +176,9 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader, enum amd_gfx_level gfx_level, bool triangle_strip_adjacency_fix); +bool +ac_nir_lower_indirect_derefs_early(nir_shader *shader); + bool ac_nir_lower_indirect_derefs(nir_shader *shader); diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 93223d811bd..d1b67a0c3e1 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -106,6 +106,9 @@ radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shad /* Compile SPIR-V shader to NIR. */ cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal); + NIR_PASS(_, cs_stage->nir, ac_nir_lower_indirect_derefs); + NIR_PASS(_, cs_stage->nir, nir_lower_vars_to_ssa); + radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled); /* Run the shader info pass. */ diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 7a61709e6ff..54ec910224d 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2611,6 +2611,9 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st radv_foreach_stage (i, active_nir_stages) { int64_t stage_start = os_time_get_nano(); + ac_nir_lower_indirect_derefs_early(stages[i].nir); + NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa); + radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled); stages[i].feedback.duration += os_time_get_nano() - stage_start; @@ -2635,6 +2638,28 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st stages[i].feedback.duration += os_time_get_nano() - stage_start; } + radv_foreach_stage (i, active_nir_stages) { + int64_t stage_start = os_time_get_nano(); + + /* Indirect lowering must be called after the radv_optimize_nir() loop + * has been called at least once. Otherwise indirect lowering can + * bloat the instruction count of the loop and cause it to be + * considered too large for unrolling. + * + * We want to do this as late as possible because scratch access isn't + * very optimizable. We lower smaller arrays to SSA earlier with + * ac_nir_lower_indirect_derefs_early, because that can actually enable + * optimizations. + */ + bool indirect_derefs_lowered = false; + NIR_PASS(indirect_derefs_lowered, stages[i].nir, ac_nir_lower_indirect_derefs); + NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa); + if (indirect_derefs_lowered && !stages[i].key.optimisations_disabled) + radv_optimize_nir(stages[i].nir, false); + + stages[i].feedback.duration += os_time_get_nano() - stage_start; + } + radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages); radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug); diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index d1ab88c6aaf..4a849e688ce 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -657,6 +657,12 @@ radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv { stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false); + bool indirect_derefs_lowered = false; + NIR_PASS(indirect_derefs_lowered, stage->nir, ac_nir_lower_indirect_derefs); + NIR_PASS(_, stage->nir, nir_lower_vars_to_ssa); + if (indirect_derefs_lowered && !stage->key.optimisations_disabled) + radv_optimize_nir(stage->nir, false); + nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) { unsigned size, alignment; glsl_get_natural_size_align_bytes(var->type, &size, &alignment); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 707d167862f..533120f9b12 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -877,21 +877,6 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct } } - /* Indirect lowering must be called after the radv_optimize_nir() loop - * has been called at least once. Otherwise indirect lowering can - * bloat the instruction count of the loop and cause it to be - * considered too large for unrolling. - */ - bool indirect_derefs_lowered = false; - NIR_PASS(indirect_derefs_lowered, nir, ac_nir_lower_indirect_derefs); - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - - if (indirect_derefs_lowered && !stage->key.optimisations_disabled && - nir->info.stage != MESA_SHADER_COMPUTE) { - /* Optimize the lowered code before the linking optimizations. */ - radv_optimize_nir(nir, false); - } - return nir; } diff --git a/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c b/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c index 2f65315c571..57b148fb1e0 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c +++ b/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c @@ -250,7 +250,7 @@ nir_lower_indirect_var_derefs_to_if_else_trees(nir_shader *shader, bool progress = false; nir_foreach_function_impl(impl, shader) { - progress = lower_indirects_impl(impl, nir_var_uniform, vars, UINT_MAX) || + progress = lower_indirects_impl(impl, nir_var_all, vars, UINT_MAX) || progress; }