diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 603d260f9db..5fbaa58dbc4 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1550,6 +1550,8 @@ tu6_emit_gmem_resolves(struct tu_cmd_buffer *cmd, */ perf_debug(cmd->device, "TODO: missing GMEM->GMEM resolve path\n"); + if (CHIP >= A7XX) + tu_emit_event_write(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE); tu_load_gmem_attachment(cmd, cs, resolve_group, a, false, true); } } @@ -5583,6 +5585,19 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM); + /* This appears to be necessary when stores are followed by loads to the + * same memory in GMEM, to prevent the loads from starting before the + * stores have completed. See + * dEQP-VK.pipeline.monolithic.multisample.multisampled_render_to_single_sampled.input_attachments.initialize.r8g8b8a8_unorm_r16g16b16a16_sfloat_r16g16b16a16_sint_d16_unorm.2x.ds_resolve_sample_zero.whole_framebuffer + * for a testcase. + * + * TODO: why is this not necessary between the end of one tile and the + * start of another? + */ + if (subpass_idx != 0) { + tu_emit_event_write(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE); + } + /* Emit gmem loads that are first used in this subpass. */ bool emitted_scissor = false; for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {