tu/perfetto: Add performance warning tracepoints

LRZ and FDM have a few major performance pitfalls, if they are not
clearly surfaced when doing perfetto trace - they are easy to miss.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40935>
This commit is contained in:
Danylo Piliaiev 2026-04-13 15:23:10 +02:00 committed by Marge Bot
parent 109d98b4cf
commit 8a146a1be9
8 changed files with 72 additions and 4 deletions

View file

@ -2305,6 +2305,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
trace_start_slow_clear_lrz(&cmd->trace, &cmd->cs, cmd, image->vk.format,
image->vk.extent.width, image->vk.extent.height);
trace_warning_slow_clear_lrz(&cmd->trace, cs, cmd);
/* It is assumed that LRZ cache is invalidated at this point for
* the writes here to become visible to LRZ.

View file

@ -3051,6 +3051,10 @@ tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem)
static void
tu_renderpass_begin(struct tu_cmd_buffer *cmd)
{
if (cmd->state.pass->warn_fdm_force_disabled) {
trace_warning_fdm_force_disabled(&cmd->trace, &cmd->cs, cmd);
}
/* We need to re-emit any draw states that are patched in order for them to
* be correctly added to the per-renderpass patchpoint list, even if they
* are the same as before.

View file

@ -89,6 +89,7 @@ tu_lrz_disable_reason(struct tu_cmd_buffer *cmd, const char *reason) {
cmd->state.rp.lrz_disabled_at_draw = cmd->state.rp.drawcall_count;
perf_debug(cmd->device, "Disabling LRZ because '%s' at draw %u", reason,
cmd->state.rp.lrz_disabled_at_draw);
trace_warning_lrz_disabled(&cmd->rp_trace, &cmd->draw_cs, cmd, reason);
}
void
@ -104,6 +105,7 @@ tu_lrz_disable_write_for_rp(struct tu_cmd_buffer *cmd, const char *reason)
cmd->device,
"Disabling LRZ write for the rest of the RP because '%s' at draw %u",
reason, cmd->state.rp.lrz_write_disabled_at_draw);
trace_warning_lrz_write_disabled(&cmd->rp_trace, &cmd->draw_cs, cmd, reason);
}
template <chip CHIP>
@ -217,7 +219,7 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
const struct tu_image_view *view)
{
if (!view->image->lrz_layout.lrz_total_size) {
assert(!cmd->device->use_lrz || !vk_format_has_depth(att->format));
trace_warning_depth_image_no_lrz(&cmd->trace, &cmd->draw_cs, cmd);
return;
}
@ -243,11 +245,15 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
*/
if ((view->image->vk.create_flags &
VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_EXT) &&
!clears_depth)
!clears_depth) {
tu_lrz_disable_reason(cmd, "FRAGMENT_DENSITY_MAP_OFFSET_BIT attachment used without depth attachment clear");
return;
}
if (!clears_depth && !att->load)
if (!clears_depth && !att->load) {
tu_lrz_disable_reason(cmd, "Depth attachment isn't loaded or cleared");
return;
}
cmd->state.lrz.valid = true;
cmd->state.lrz.valid_at_start = true;

View file

@ -540,6 +540,7 @@ tu_render_pass_disable_fdm(struct tu_device *dev, struct tu_render_pass *pass)
if (att->samples > 1 &&
(att->load || att->load_stencil ||
att->store || att->store_stencil)) {
pass->warn_fdm_force_disabled = true;
perf_debug(dev, "Disabling fragment density map due to %s of multisample attachment",
(att->load || att->load_stencil) ? "load" : "store");
return true;

View file

@ -162,6 +162,8 @@ struct tu_render_pass
bool allow_ib2_skipping;
bool has_layered_fdm;
bool warn_fdm_force_disabled;
struct tu_subpass_barrier end_barrier;
struct tu_subpass subpasses[0];
};

View file

@ -76,7 +76,25 @@ static const struct {
[UPDATE_BUFFER_STAGE_ID] = { "Update Buffer", "" },
[SLOW_CLEAR_LRZ_STAGE_ID] = { "Slow Clear LRZ", "Perform slow clear of LRZ for this image, should be avoided" },
[DISABLE_LRZ_STAGE_ID] = { "Disable LRZ", "Disable LRZ for this image, should be avoided" },
// TODO add the rest
[WARNING_SLOW_CLEAR_LRZ_STAGE_ID] = {
"Slow LRZ Clear",
"LRZ fast clear is not used. Possible causes:\n"
"- The depth image is too large (width x height x layers x msaa) for LRZ fast clear\n"
"- [Adreno A6XX] LRZ is being cleared with a depth clear value other than 0.0 or 1.0"
},
[WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID] = {
"Depth Image Without LRZ",
"LRZ isn't used because the depth image width x height x layers x msaa is too large"
},
[WARNING_LRZ_DISABLED_STAGE_ID] = {
"LRZ Read/Write Disabled",
"LRZ read/write is disabled for the rest of the RP. This should be avoided near the start of the RP, but is OK near the end" },
[WARNING_LRZ_WRITE_DISABLED_STAGE_ID] = {
"LRZ Write Disabled",
"LRZ write is disabled for the rest of the RP. Avoid this near the start of the RP, it is OK near the end" },
[WARNING_FDM_FORCE_DISABLED_STAGE_ID] = {
"FDM Force Disabled",
"FDM is disabled due to the presence of LOAD_OP_LOAD or LOAD_OP_STORE" },
};
static uint32_t gpu_clock_id;
@ -245,6 +263,12 @@ get_stack(struct tu_device *dev, enum tu_stage_id stage_id)
case CMD_BUFFER_ANNOTATION_STAGE_ID:
case CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID:
return &dev->perfetto.annotations_stack;
case WARNING_SLOW_CLEAR_LRZ_STAGE_ID:
case WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID:
case WARNING_LRZ_DISABLED_STAGE_ID:
case WARNING_LRZ_WRITE_DISABLED_STAGE_ID:
case WARNING_FDM_FORCE_DISABLED_STAGE_ID:
return &dev->perfetto.sticky_warnings_stack;
default:
return &dev->perfetto.render_stack;
}
@ -359,6 +383,14 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
case CONCURRENT_BINNING_STAGE_ID:
case CONCURRENT_BINNING_BARRIER_STAGE_ID:
queue_id = BV_HW_QUEUE_ID;
break;
case WARNING_SLOW_CLEAR_LRZ_STAGE_ID:
case WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID:
case WARNING_LRZ_DISABLED_STAGE_ID:
case WARNING_LRZ_WRITE_DISABLED_STAGE_ID:
case WARNING_FDM_FORCE_DISABLED_STAGE_ID:
queue_id = PERF_WARNINGS_QUEUE_ID;
break;
default:
break;
}
@ -644,6 +676,11 @@ CREATE_EVENT_CALLBACK(update_buffer, UPDATE_BUFFER_STAGE_ID)
CREATE_EVENT_CALLBACK(resolve_image, RESOLVE_IMAGE_STAGE_ID)
CREATE_EVENT_CALLBACK(slow_clear_lrz, SLOW_CLEAR_LRZ_STAGE_ID)
CREATE_EVENT_CALLBACK(disable_lrz, DISABLE_LRZ_STAGE_ID)
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_slow_clear_lrz, WARNING_SLOW_CLEAR_LRZ_STAGE_ID)
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_depth_image_no_lrz, WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID)
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_lrz_disabled, WARNING_LRZ_DISABLED_STAGE_ID)
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_lrz_write_disabled, WARNING_LRZ_WRITE_DISABLED_STAGE_ID)
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_fdm_force_disabled, WARNING_FDM_FORCE_DISABLED_STAGE_ID)
void
tu_perfetto_start_cmd_buffer_annotation(

View file

@ -81,6 +81,11 @@ enum tu_stage_id {
UPDATE_BUFFER_STAGE_ID,
SLOW_CLEAR_LRZ_STAGE_ID,
DISABLE_LRZ_STAGE_ID,
WARNING_SLOW_CLEAR_LRZ_STAGE_ID,
WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID,
WARNING_LRZ_DISABLED_STAGE_ID,
WARNING_LRZ_WRITE_DISABLED_STAGE_ID,
WARNING_FDM_FORCE_DISABLED_STAGE_ID,
TU_STAGE_ID_COUNT,
};

View file

@ -286,6 +286,18 @@ begin_end_tp('compute_indirect',
is_indirect=True, c_format="%ux%ux%u",
fields=['x', 'y', 'z'])])
# Performance warnings
singular_tp('warning_slow_clear_lrz', toggle_name='perf_warnings')
singular_tp('warning_depth_image_no_lrz', toggle_name='perf_warnings')
singular_tp('warning_lrz_disabled',
toggle_name='perf_warnings',
args=[Arg(type='const char *', var='reason', c_format='%s')])
singular_tp('warning_lrz_write_disabled',
toggle_name='perf_warnings',
args=[Arg(type='const char *', var='reason', c_format='%s')])
singular_tp('warning_fdm_force_disabled', toggle_name='perf_warnings')
# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT
for suffix in ["", "_rp"]:
begin_end_tp('cmd_buffer_annotation' + suffix,