diff --git a/.pick_status.json b/.pick_status.json index a91cd5bd113..b5c310ab598 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1704,7 +1704,7 @@ "description": "radv,aco: wait for all VMEM loads when the prolog loads large 64-bit attributes", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp index 3638fbc03a5..25d763df1d6 100644 --- a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp @@ -643,6 +643,14 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh continue_pc = Operand(prolog_input, s2); } + /* Wait for all pending VMEM loads when the prolog loads large 64-bit + * attributes because the vertex shader isn't required to consume all of + * them and they might be overwritten. This isn't the most optimal solution + * but 64-bit vertex attributes are rarely used. + */ + if (is_last_attr_large) + wait_for_vmem_loads(bld); + bld.sop1(aco_opcode::s_setpc_b64, continue_pc); program->config->float_mode = program->blocks[0].fp_mode.val; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index f0189e34206..b65cf086979 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -191,6 +191,11 @@ declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_in unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask); for (unsigned i = 0; i < num_attributes; i++) { ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_VALUE, &args->vs_inputs[i]); + + /* The vertex shader isn't required to consume all components that are loaded by the prolog + * and it's possible that more VGPRs are written. This specific case is handled at the end + * of the prolog which waits for all pending VMEM loads if needed. + */ args->ac.args[args->vs_inputs[i].arg_index].pending_vmem = true; } }