diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index d91ea77effa..af1c52c3a5c 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -2305,6 +2305,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd, trace_start_slow_clear_lrz(&cmd->trace, &cmd->cs, cmd, image->vk.format, image->vk.extent.width, image->vk.extent.height); + trace_warning_slow_clear_lrz(&cmd->trace, cs, cmd); /* It is assumed that LRZ cache is invalidated at this point for * the writes here to become visible to LRZ. diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 2949c06399b..9e00175d80f 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -3051,6 +3051,10 @@ tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem) static void tu_renderpass_begin(struct tu_cmd_buffer *cmd) { + if (cmd->state.pass->warn_fdm_force_disabled) { + trace_warning_fdm_force_disabled(&cmd->trace, &cmd->cs, cmd); + } + /* We need to re-emit any draw states that are patched in order for them to * be correctly added to the per-renderpass patchpoint list, even if they * are the same as before. diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 771956b97db..7ae3a8a5763 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -89,6 +89,7 @@ tu_lrz_disable_reason(struct tu_cmd_buffer *cmd, const char *reason) { cmd->state.rp.lrz_disabled_at_draw = cmd->state.rp.drawcall_count; perf_debug(cmd->device, "Disabling LRZ because '%s' at draw %u", reason, cmd->state.rp.lrz_disabled_at_draw); + trace_warning_lrz_disabled(&cmd->rp_trace, &cmd->draw_cs, cmd, reason); } void @@ -104,6 +105,7 @@ tu_lrz_disable_write_for_rp(struct tu_cmd_buffer *cmd, const char *reason) cmd->device, "Disabling LRZ write for the rest of the RP because '%s' at draw %u", reason, cmd->state.rp.lrz_write_disabled_at_draw); + trace_warning_lrz_write_disabled(&cmd->rp_trace, &cmd->draw_cs, cmd, reason); } template @@ -217,7 +219,7 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd, const struct tu_image_view *view) { if (!view->image->lrz_layout.lrz_total_size) { - assert(!cmd->device->use_lrz || !vk_format_has_depth(att->format)); + trace_warning_depth_image_no_lrz(&cmd->trace, &cmd->draw_cs, cmd); return; } @@ -243,11 +245,15 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd, */ if ((view->image->vk.create_flags & VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_EXT) && - !clears_depth) + !clears_depth) { + tu_lrz_disable_reason(cmd, "FRAGMENT_DENSITY_MAP_OFFSET_BIT attachment used without depth attachment clear"); return; + } - if (!clears_depth && !att->load) + if (!clears_depth && !att->load) { + tu_lrz_disable_reason(cmd, "Depth attachment isn't loaded or cleared"); return; + } cmd->state.lrz.valid = true; cmd->state.lrz.valid_at_start = true; diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index 735707d8a1b..3a4193f0bf6 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -540,6 +540,7 @@ tu_render_pass_disable_fdm(struct tu_device *dev, struct tu_render_pass *pass) if (att->samples > 1 && (att->load || att->load_stencil || att->store || att->store_stencil)) { + pass->warn_fdm_force_disabled = true; perf_debug(dev, "Disabling fragment density map due to %s of multisample attachment", (att->load || att->load_stencil) ? "load" : "store"); return true; diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index e2f0f112a72..2a2a80c1be9 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -162,6 +162,8 @@ struct tu_render_pass bool allow_ib2_skipping; bool has_layered_fdm; + bool warn_fdm_force_disabled; + struct tu_subpass_barrier end_barrier; struct tu_subpass subpasses[0]; }; diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index 2d0eef9026f..34219e15bc8 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -76,7 +76,25 @@ static const struct { [UPDATE_BUFFER_STAGE_ID] = { "Update Buffer", "" }, [SLOW_CLEAR_LRZ_STAGE_ID] = { "Slow Clear LRZ", "Perform slow clear of LRZ for this image, should be avoided" }, [DISABLE_LRZ_STAGE_ID] = { "Disable LRZ", "Disable LRZ for this image, should be avoided" }, - // TODO add the rest + [WARNING_SLOW_CLEAR_LRZ_STAGE_ID] = { + "Slow LRZ Clear", + "LRZ fast clear is not used. Possible causes:\n" + "- The depth image is too large (width x height x layers x msaa) for LRZ fast clear\n" + "- [Adreno A6XX] LRZ is being cleared with a depth clear value other than 0.0 or 1.0" + }, + [WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID] = { + "Depth Image Without LRZ", + "LRZ isn't used because the depth image width x height x layers x msaa is too large" + }, + [WARNING_LRZ_DISABLED_STAGE_ID] = { + "LRZ Read/Write Disabled", + "LRZ read/write is disabled for the rest of the RP. This should be avoided near the start of the RP, but is OK near the end" }, + [WARNING_LRZ_WRITE_DISABLED_STAGE_ID] = { + "LRZ Write Disabled", + "LRZ write is disabled for the rest of the RP. Avoid this near the start of the RP, it is OK near the end" }, + [WARNING_FDM_FORCE_DISABLED_STAGE_ID] = { + "FDM Force Disabled", + "FDM is disabled due to the presence of LOAD_OP_LOAD or LOAD_OP_STORE" }, }; static uint32_t gpu_clock_id; @@ -245,6 +263,12 @@ get_stack(struct tu_device *dev, enum tu_stage_id stage_id) case CMD_BUFFER_ANNOTATION_STAGE_ID: case CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID: return &dev->perfetto.annotations_stack; + case WARNING_SLOW_CLEAR_LRZ_STAGE_ID: + case WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID: + case WARNING_LRZ_DISABLED_STAGE_ID: + case WARNING_LRZ_WRITE_DISABLED_STAGE_ID: + case WARNING_FDM_FORCE_DISABLED_STAGE_ID: + return &dev->perfetto.sticky_warnings_stack; default: return &dev->perfetto.render_stack; } @@ -359,6 +383,14 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id, case CONCURRENT_BINNING_STAGE_ID: case CONCURRENT_BINNING_BARRIER_STAGE_ID: queue_id = BV_HW_QUEUE_ID; + break; + case WARNING_SLOW_CLEAR_LRZ_STAGE_ID: + case WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID: + case WARNING_LRZ_DISABLED_STAGE_ID: + case WARNING_LRZ_WRITE_DISABLED_STAGE_ID: + case WARNING_FDM_FORCE_DISABLED_STAGE_ID: + queue_id = PERF_WARNINGS_QUEUE_ID; + break; default: break; } @@ -644,6 +676,11 @@ CREATE_EVENT_CALLBACK(update_buffer, UPDATE_BUFFER_STAGE_ID) CREATE_EVENT_CALLBACK(resolve_image, RESOLVE_IMAGE_STAGE_ID) CREATE_EVENT_CALLBACK(slow_clear_lrz, SLOW_CLEAR_LRZ_STAGE_ID) CREATE_EVENT_CALLBACK(disable_lrz, DISABLE_LRZ_STAGE_ID) +CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_slow_clear_lrz, WARNING_SLOW_CLEAR_LRZ_STAGE_ID) +CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_depth_image_no_lrz, WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID) +CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_lrz_disabled, WARNING_LRZ_DISABLED_STAGE_ID) +CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_lrz_write_disabled, WARNING_LRZ_WRITE_DISABLED_STAGE_ID) +CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_fdm_force_disabled, WARNING_FDM_FORCE_DISABLED_STAGE_ID) void tu_perfetto_start_cmd_buffer_annotation( diff --git a/src/freedreno/vulkan/tu_perfetto.h b/src/freedreno/vulkan/tu_perfetto.h index 72dcc7acd0d..ca672e142ee 100644 --- a/src/freedreno/vulkan/tu_perfetto.h +++ b/src/freedreno/vulkan/tu_perfetto.h @@ -81,6 +81,11 @@ enum tu_stage_id { UPDATE_BUFFER_STAGE_ID, SLOW_CLEAR_LRZ_STAGE_ID, DISABLE_LRZ_STAGE_ID, + WARNING_SLOW_CLEAR_LRZ_STAGE_ID, + WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID, + WARNING_LRZ_DISABLED_STAGE_ID, + WARNING_LRZ_WRITE_DISABLED_STAGE_ID, + WARNING_FDM_FORCE_DISABLED_STAGE_ID, TU_STAGE_ID_COUNT, }; diff --git a/src/freedreno/vulkan/tu_tracepoints.py b/src/freedreno/vulkan/tu_tracepoints.py index 7ab20642af4..7a3b21038bf 100644 --- a/src/freedreno/vulkan/tu_tracepoints.py +++ b/src/freedreno/vulkan/tu_tracepoints.py @@ -286,6 +286,18 @@ begin_end_tp('compute_indirect', is_indirect=True, c_format="%ux%ux%u", fields=['x', 'y', 'z'])]) +# Performance warnings + +singular_tp('warning_slow_clear_lrz', toggle_name='perf_warnings') +singular_tp('warning_depth_image_no_lrz', toggle_name='perf_warnings') +singular_tp('warning_lrz_disabled', + toggle_name='perf_warnings', + args=[Arg(type='const char *', var='reason', c_format='%s')]) +singular_tp('warning_lrz_write_disabled', + toggle_name='perf_warnings', + args=[Arg(type='const char *', var='reason', c_format='%s')]) +singular_tp('warning_fdm_force_disabled', toggle_name='perf_warnings') + # Annotations for Cmd(Begin|End)DebugUtilsLabelEXT for suffix in ["", "_rp"]: begin_end_tp('cmd_buffer_annotation' + suffix,