diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources index 881eb17e1f8..fc378929f6f 100644 --- a/src/amd/Makefile.sources +++ b/src/amd/Makefile.sources @@ -44,6 +44,7 @@ AMD_COMMON_FILES = \ common/ac_gpu_info.h \ common/ac_msgpack.c \ common/ac_msgpack.h \ + common/ac_nir.c \ common/ac_nir.h \ common/ac_nir_lower_esgs_io_to_mem.c \ common/ac_nir_lower_tess_io_to_mem.c \ diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c new file mode 100644 index 00000000000..8a8e2020008 --- /dev/null +++ b/src/amd/common/ac_nir.c @@ -0,0 +1,67 @@ +/* + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ac_nir.h" + +bool +ac_nir_lower_indirect_derefs(nir_shader *shader, + enum chip_class chip_class) +{ + bool progress = false; + + /* Lower large variables to scratch first so that we won't bloat the + * shader by generating large if ladders for them. We later lower + * scratch to alloca's, assuming LLVM won't generate VGPR indexing. + */ + NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256, + glsl_get_natural_size_align_bytes); + + /* LLVM doesn't support VGPR indexing on GFX9. */ + bool llvm_has_working_vgpr_indexing = chip_class != GFX9; + + /* TODO: Indirect indexing of GS inputs is unimplemented. + * + * TCS and TES load inputs directly from LDS or offchip memory, so + * indirect indexing is trivial. + */ + nir_variable_mode indirect_mask = 0; + if (shader->info.stage == MESA_SHADER_GEOMETRY || + (shader->info.stage != MESA_SHADER_TESS_CTRL && shader->info.stage != MESA_SHADER_TESS_EVAL && + !llvm_has_working_vgpr_indexing)) { + indirect_mask |= nir_var_shader_in; + } + if (!llvm_has_working_vgpr_indexing && shader->info.stage != MESA_SHADER_TESS_CTRL) + indirect_mask |= nir_var_shader_out; + + /* TODO: We shouldn't need to do this, however LLVM isn't currently + * smart enough to handle indirects without causing excess spilling + * causing the gpu to hang. + * + * See the following thread for more details of the problem: + * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html + */ + indirect_mask |= nir_var_function_temp; + + progress |= nir_lower_indirect_derefs(shader, indirect_mask, UINT32_MAX); + return progress; +} diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index a6847bf8a3c..b3d4c6b56f7 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -82,6 +82,10 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader, enum chip_class chip_class, unsigned num_reserved_es_outputs); +bool +ac_nir_lower_indirect_derefs(nir_shader *shader, + enum chip_class chip_class); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 885f88ebd4f..fc8d0fe7e49 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -87,6 +87,7 @@ amd_common_files = files( 'ac_msgpack.c', 'ac_msgpack.h', 'ac_rgp_elf_object_pack.c', + 'ac_nir.c', 'ac_nir.h', 'ac_nir_lower_esgs_io_to_mem.c', 'ac_nir_lower_tess_io_to_mem.c', diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index cd1a612985f..359500cd2bb 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -5127,47 +5127,6 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ralloc_free(ctx.verified_interp); } -bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) -{ - bool progress = false; - - /* Lower large variables to scratch first so that we won't bloat the - * shader by generating large if ladders for them. We later lower - * scratch to alloca's, assuming LLVM won't generate VGPR indexing. - */ - NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256, - glsl_get_natural_size_align_bytes); - - /* LLVM doesn't support VGPR indexing on GFX9. */ - bool llvm_has_working_vgpr_indexing = chip_class != GFX9; - - /* TODO: Indirect indexing of GS inputs is unimplemented. - * - * TCS and TES load inputs directly from LDS or offchip memory, so - * indirect indexing is trivial. - */ - nir_variable_mode indirect_mask = 0; - if (nir->info.stage == MESA_SHADER_GEOMETRY || - (nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL && - !llvm_has_working_vgpr_indexing)) { - indirect_mask |= nir_var_shader_in; - } - if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL) - indirect_mask |= nir_var_shader_out; - - /* TODO: We shouldn't need to do this, however LLVM isn't currently - * smart enough to handle indirects without causing excess spilling - * causing the gpu to hang. - * - * See the following thread for more details of the problem: - * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html - */ - indirect_mask |= nir_var_function_temp; - - progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX); - return progress; -} - static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) { if (intrin->intrinsic != nir_intrinsic_store_output) diff --git a/src/amd/llvm/ac_nir_to_llvm.h b/src/amd/llvm/ac_nir_to_llvm.h index eab16252174..2b7d825f846 100644 --- a/src/amd/llvm/ac_nir_to_llvm.h +++ b/src/amd/llvm/ac_nir_to_llvm.h @@ -47,8 +47,6 @@ static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan) return (index * 4) + chan; } -bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); - bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir); void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 9f4289c598b..ab2149f9ec5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -43,6 +43,7 @@ #include "ac_exp_param.h" #include "ac_llvm_util.h" #include "ac_nir_to_llvm.h" +#include "ac_nir.h" #include "ac_shader_util.h" #include "aco_interface.h" #include "sid.h" @@ -2339,16 +2340,16 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders, if (progress) { if (nir_lower_global_vars_to_local(ordered_shaders[i])) { - ac_lower_indirect_derefs(ordered_shaders[i], - pipeline->device->physical_device->rad_info.chip_class); + ac_nir_lower_indirect_derefs(ordered_shaders[i], + pipeline->device->physical_device->rad_info.chip_class); /* remove dead writes, which can remove input loads */ nir_lower_vars_to_ssa(ordered_shaders[i]); nir_opt_dce(ordered_shaders[i]); } if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) { - ac_lower_indirect_derefs(ordered_shaders[i - 1], - pipeline->device->physical_device->rad_info.chip_class); + ac_nir_lower_indirect_derefs(ordered_shaders[i - 1], + pipeline->device->physical_device->rad_info.chip_class); } } } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 3d0e8acddaf..ab1c6e9d156 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -644,7 +644,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module * * bloat the instruction count of the loop and cause it to be * considered too large for unrolling. */ - if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) && + if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) && !(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) && nir->info.stage != MESA_SHADER_COMPUTE) { /* Optimize the lowered code before the linking optimizations. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index c941fff6ee8..06df038598a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -23,6 +23,7 @@ */ #include "ac_nir_to_llvm.h" +#include "ac_nir.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_deref.h" @@ -871,7 +872,7 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16); } - changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class); + changed |= ac_nir_lower_indirect_derefs(nir, sscreen->info.chip_class); if (changed) si_nir_opts(sscreen, nir, false);