From 44bdac984990ff61569075cffa8f73b31609e3d2 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Wed, 27 Oct 2021 19:23:19 +0300 Subject: [PATCH] tu: Implement VK_AMD_buffer_marker to support Graphics Flight Recorder Graphics Flight Recorder is: "The Graphics Flight Recorder (GFR) is a Vulkan layer to help trackdown and identify the cause of GPU hangs and crashes. It works by instrumenting command buffers with completion tags." This is a nice little tool which could help quickly identify the call which hanged. Or if command buffer is executed for too long. The tiling nature of our GPU shouldn't be a big issue aside from lower performance. For non-segfault case, if: - Hang happens at the same place in cmdbuf and draw/dispatch is not finished at that point - it is likely that there is an infinite loop in some of the shaders in this draw. - Hang happens always in different place - likely there is nothing wrong and command buffer just takes too long to execute and you should try increasing hangcheck_period_ms. If it doesn't help it is likely a synchronization issue. Signed-off-by: Danylo Piliaiev Part-of: --- docs/features.txt | 2 +- src/freedreno/vulkan/tu_cmd_buffer.c | 70 ++++++++++++++++++++++++++++ src/freedreno/vulkan/tu_device.c | 2 + 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 95498feb316..d9faddfe6bc 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -580,7 +580,7 @@ Khronos extensions that are not part of any Vulkan version: VK_NV_compute_shader_derivatives DONE (anv, radv) VK_EXT_acquire_drm_display DONE (radv, anv) VK_VALVE_mutable_descriptor_type DONE (anv, radv, tu) - VK_AMD_buffer_marker DONE (radv) + VK_AMD_buffer_marker DONE (radv, tu) VK_AMD_device_coherent_memory DONE (radv) VK_AMD_draw_indirect_count DONE (radv) VK_AMD_gcn_shader DONE (radv) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index e029794f544..3c8d0211f24 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -5007,3 +5007,73 @@ tu_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) tu_cs_emit(cs, 0); } +void +tu_CmdWriteBufferMarkerAMD(VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + uint32_t marker) +{ + /* Almost the same as write_event, but also allowed in renderpass */ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); + + uint64_t va = buffer->bo->iova + dstOffset; + + struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs; + struct tu_cache_state *cache = + cmd->state.pass ? &cmd->state.renderpass_cache : &cmd->state.cache; + + /* From the Vulkan 1.2.203 spec: + * + * The access scope for buffer marker writes falls under + * the VK_ACCESS_TRANSFER_WRITE_BIT, and the pipeline stages for + * identifying the synchronization scope must include both pipelineStage + * and VK_PIPELINE_STAGE_TRANSFER_BIT. + * + * Transfer operations use CCU however here we write via CP. + * Flush CCU in order to make the results of previous transfer + * operation visible to CP. + */ + tu_flush_for_access(cache, 0, TU_ACCESS_SYSMEM_WRITE); + + /* Flags that only require a top-of-pipe event. DrawIndirect parameters are + * read by the CP, so the draw indirect stage counts as top-of-pipe too. + */ + VkPipelineStageFlags top_of_pipe_flags = + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + + bool is_top_of_pipe = !(pipelineStage & ~top_of_pipe_flags); + + /* We have to WFI only if we flushed CCU here and are using CP_MEM_WRITE. + * Otherwise: + * - We do CP_EVENT_WRITE(RB_DONE_TS) which should wait for flushes; + * - There was a barrier to synchronize other writes with WriteBufferMarkerAMD + * and they had to include our pipelineStage which forces the WFI. + */ + if (cache->flush_bits != 0 && is_top_of_pipe) { + cache->flush_bits |= TU_CMD_FLAG_WAIT_FOR_IDLE; + } + + if (cmd->state.pass) { + tu_emit_cache_flush_renderpass(cmd, cs); + } else { + tu_emit_cache_flush(cmd, cs); + } + + if (is_top_of_pipe) { + tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3); + tu_cs_emit_qw(cs, va); /* ADDR_LO/HI */ + tu_cs_emit(cs, marker); + } else { + /* Use a RB_DONE_TS event to wait for everything to complete. */ + tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 4); + tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS)); + tu_cs_emit_qw(cs, va); + tu_cs_emit(cs, marker); + } + + /* Make sure the result of this write is visible to others. */ + tu_flush_for_access(cache, TU_ACCESS_CP_WRITE, 0); +} diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index e11d7720dab..acca697208a 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -202,6 +202,8 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_line_rasterization = true, .EXT_subgroup_size_control = true, .EXT_image_robustness = true, + /* For Graphics Flight Recorder (GFR) */ + .AMD_buffer_marker = true, #ifdef ANDROID .ANDROID_native_buffer = true, #endif