diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7400f42d533..be91829f21d 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12201,8 +12201,6 @@ void select_program(Program *program, bld.barrier(aco_opcode::p_barrier, memory_sync_info(storage_vmem_output, semantic_release, scope_device)); bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx.gs_wave_id), -1, sendmsg_gs_done(false, false, 0)); - } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - write_tcs_tess_factors(&ctx); } if (ctx.stage == fragment_fs) { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 06c6b85525b..4e0e1bbb303 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -860,6 +860,35 @@ load_patch_vertices_in(struct ac_shader_abi *abi) return LLVMConstInt(ctx->ac.i32, ctx->args->options->key.tcs.input_vertices, false); } +static LLVMValueRef +load_ring_tess_factors(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + assert(ctx->stage == MESA_SHADER_TESS_CTRL); + + return ctx->hs_ring_tess_factor; +} + +static LLVMValueRef +load_ring_tess_offchip(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + assert(ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL); + + return ctx->hs_ring_tess_offchip; +} + +static LLVMValueRef +load_ring_esgs(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + assert(ctx->stage == MESA_SHADER_VERTEX || + ctx->stage == MESA_SHADER_TESS_EVAL || + ctx->stage == MESA_SHADER_GEOMETRY); + + return ctx->esgs_ring; +} static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero) { @@ -3498,7 +3527,6 @@ write_tess_factors(struct radv_shader_context *ctx) static void handle_tcs_outputs_post(struct radv_shader_context *ctx) { - write_tess_factors(ctx); } static bool @@ -3902,6 +3930,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.load_ssbo = radv_load_ssbo; ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.load_resource = radv_load_resource; + ctx.abi.load_ring_tess_factors = load_ring_tess_factors; + ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip; + ctx.abi.load_ring_esgs = load_ring_esgs; ctx.abi.clamp_shadow_reference = false; ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z; ctx.abi.robust_buffer_access = args->options->robust_buffer_access; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index df8568bcc50..19cad8b981f 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3454,6 +3454,9 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies); nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies); + /* Lower I/O intrinsics to memory instructions. */ + bool io_to_mem = radv_lower_io_to_mem(device, nir[i], &infos[i], pipeline_key); + /* optimize the lowered ALU operations */ bool more_algebraic = true; while (more_algebraic) { @@ -3465,7 +3468,7 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, NIR_PASS(more_algebraic, nir[i], nir_opt_algebraic); } - if (i == MESA_SHADER_COMPUTE) + if (io_to_mem || i == MESA_SHADER_COMPUTE) NIR_PASS_V(nir[i], nir_opt_offsets); /* Do late algebraic optimization to turn add(a, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 680532d47df..b94e89acc7c 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -40,6 +40,7 @@ #include "sid.h" #include "ac_binary.h" #include "ac_llvm_util.h" +#include "ac_nir.h" #include "ac_nir_to_llvm.h" #include "ac_rtld.h" #include "vk_format.h" @@ -759,6 +760,77 @@ radv_lower_io(struct radv_device *device, nir_shader *nir) nir_var_shader_in | nir_var_shader_out); } +bool +radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir, + struct radv_shader_info *info, const struct radv_pipeline_key *pl_key) +{ + bool llvm = radv_use_llvm_for_stage(device, nir->info.stage); + + if (nir->info.stage == MESA_SHADER_VERTEX) { + if (info->vs.as_ls) { + ac_nir_lower_ls_outputs_to_mem( + nir, + info->vs.tcs_in_out_eq, + info->vs.tcs_temp_only_input_mask, + info->vs.num_linked_outputs); + return true; + } else if (info->vs.as_es) { + ac_nir_lower_es_outputs_to_mem( + nir, + device->physical_device->rad_info.chip_class, + info->vs.num_linked_outputs); + return true; + } + } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + ac_nir_lower_hs_inputs_to_mem( + nir, + info->vs.tcs_in_out_eq, + info->tcs.num_linked_inputs); + ac_nir_lower_hs_outputs_to_mem( + nir, device->physical_device->rad_info.chip_class, + info->tcs.tes_reads_tess_factors, + llvm ? UINT64_MAX : info->tcs.tes_inputs_read, + llvm ? UINT64_MAX : info->tcs.tes_patch_inputs_read, + info->tcs.num_linked_inputs, + info->tcs.num_linked_outputs, + info->tcs.num_linked_patch_outputs, + true); + ac_nir_lower_tess_to_const( + nir, + pl_key->tess_input_vertices, + info->num_tess_patches, + ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches); + + return true; + } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + ac_nir_lower_tes_inputs_to_mem( + nir, + info->tes.num_linked_inputs, + info->tes.num_linked_patch_inputs); + ac_nir_lower_tess_to_const( + nir, + nir->info.tess.tcs_vertices_out, + info->num_tess_patches, + ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches); + + if (info->tes.as_es) { + ac_nir_lower_es_outputs_to_mem( + nir, + device->physical_device->rad_info.chip_class, + info->tes.num_linked_outputs); + } + + return true; + } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { + ac_nir_lower_gs_inputs_to_mem( + nir, + device->physical_device->rad_info.chip_class, + info->gs.num_linked_inputs); + return true; + } + + return false; +} static void * radv_alloc_shader_memory(struct radv_device *device, diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index a7ebdb82e74..f59cf9ff9e2 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -594,4 +594,8 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, void radv_lower_io(struct radv_device *device, nir_shader *nir); +bool +radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir, + struct radv_shader_info *info, const struct radv_pipeline_key *pl_key); + #endif