mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 11:38:05 +02:00
tu/perfetto: Add performance warning tracepoints
LRZ and FDM have a few major performance pitfalls, if they are not clearly surfaced when doing perfetto trace - they are easy to miss. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40935>
This commit is contained in:
parent
109d98b4cf
commit
8a146a1be9
8 changed files with 72 additions and 4 deletions
|
|
@ -2305,6 +2305,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
|
|||
|
||||
trace_start_slow_clear_lrz(&cmd->trace, &cmd->cs, cmd, image->vk.format,
|
||||
image->vk.extent.width, image->vk.extent.height);
|
||||
trace_warning_slow_clear_lrz(&cmd->trace, cs, cmd);
|
||||
|
||||
/* It is assumed that LRZ cache is invalidated at this point for
|
||||
* the writes here to become visible to LRZ.
|
||||
|
|
|
|||
|
|
@ -3051,6 +3051,10 @@ tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem)
|
|||
static void
|
||||
tu_renderpass_begin(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
if (cmd->state.pass->warn_fdm_force_disabled) {
|
||||
trace_warning_fdm_force_disabled(&cmd->trace, &cmd->cs, cmd);
|
||||
}
|
||||
|
||||
/* We need to re-emit any draw states that are patched in order for them to
|
||||
* be correctly added to the per-renderpass patchpoint list, even if they
|
||||
* are the same as before.
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ tu_lrz_disable_reason(struct tu_cmd_buffer *cmd, const char *reason) {
|
|||
cmd->state.rp.lrz_disabled_at_draw = cmd->state.rp.drawcall_count;
|
||||
perf_debug(cmd->device, "Disabling LRZ because '%s' at draw %u", reason,
|
||||
cmd->state.rp.lrz_disabled_at_draw);
|
||||
trace_warning_lrz_disabled(&cmd->rp_trace, &cmd->draw_cs, cmd, reason);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -104,6 +105,7 @@ tu_lrz_disable_write_for_rp(struct tu_cmd_buffer *cmd, const char *reason)
|
|||
cmd->device,
|
||||
"Disabling LRZ write for the rest of the RP because '%s' at draw %u",
|
||||
reason, cmd->state.rp.lrz_write_disabled_at_draw);
|
||||
trace_warning_lrz_write_disabled(&cmd->rp_trace, &cmd->draw_cs, cmd, reason);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -217,7 +219,7 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
|
|||
const struct tu_image_view *view)
|
||||
{
|
||||
if (!view->image->lrz_layout.lrz_total_size) {
|
||||
assert(!cmd->device->use_lrz || !vk_format_has_depth(att->format));
|
||||
trace_warning_depth_image_no_lrz(&cmd->trace, &cmd->draw_cs, cmd);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -243,11 +245,15 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
|
|||
*/
|
||||
if ((view->image->vk.create_flags &
|
||||
VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_EXT) &&
|
||||
!clears_depth)
|
||||
!clears_depth) {
|
||||
tu_lrz_disable_reason(cmd, "FRAGMENT_DENSITY_MAP_OFFSET_BIT attachment used without depth attachment clear");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!clears_depth && !att->load)
|
||||
if (!clears_depth && !att->load) {
|
||||
tu_lrz_disable_reason(cmd, "Depth attachment isn't loaded or cleared");
|
||||
return;
|
||||
}
|
||||
|
||||
cmd->state.lrz.valid = true;
|
||||
cmd->state.lrz.valid_at_start = true;
|
||||
|
|
|
|||
|
|
@ -540,6 +540,7 @@ tu_render_pass_disable_fdm(struct tu_device *dev, struct tu_render_pass *pass)
|
|||
if (att->samples > 1 &&
|
||||
(att->load || att->load_stencil ||
|
||||
att->store || att->store_stencil)) {
|
||||
pass->warn_fdm_force_disabled = true;
|
||||
perf_debug(dev, "Disabling fragment density map due to %s of multisample attachment",
|
||||
(att->load || att->load_stencil) ? "load" : "store");
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -162,6 +162,8 @@ struct tu_render_pass
|
|||
bool allow_ib2_skipping;
|
||||
bool has_layered_fdm;
|
||||
|
||||
bool warn_fdm_force_disabled;
|
||||
|
||||
struct tu_subpass_barrier end_barrier;
|
||||
struct tu_subpass subpasses[0];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -76,7 +76,25 @@ static const struct {
|
|||
[UPDATE_BUFFER_STAGE_ID] = { "Update Buffer", "" },
|
||||
[SLOW_CLEAR_LRZ_STAGE_ID] = { "Slow Clear LRZ", "Perform slow clear of LRZ for this image, should be avoided" },
|
||||
[DISABLE_LRZ_STAGE_ID] = { "Disable LRZ", "Disable LRZ for this image, should be avoided" },
|
||||
// TODO add the rest
|
||||
[WARNING_SLOW_CLEAR_LRZ_STAGE_ID] = {
|
||||
"Slow LRZ Clear",
|
||||
"LRZ fast clear is not used. Possible causes:\n"
|
||||
"- The depth image is too large (width x height x layers x msaa) for LRZ fast clear\n"
|
||||
"- [Adreno A6XX] LRZ is being cleared with a depth clear value other than 0.0 or 1.0"
|
||||
},
|
||||
[WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID] = {
|
||||
"Depth Image Without LRZ",
|
||||
"LRZ isn't used because the depth image width x height x layers x msaa is too large"
|
||||
},
|
||||
[WARNING_LRZ_DISABLED_STAGE_ID] = {
|
||||
"LRZ Read/Write Disabled",
|
||||
"LRZ read/write is disabled for the rest of the RP. This should be avoided near the start of the RP, but is OK near the end" },
|
||||
[WARNING_LRZ_WRITE_DISABLED_STAGE_ID] = {
|
||||
"LRZ Write Disabled",
|
||||
"LRZ write is disabled for the rest of the RP. Avoid this near the start of the RP, it is OK near the end" },
|
||||
[WARNING_FDM_FORCE_DISABLED_STAGE_ID] = {
|
||||
"FDM Force Disabled",
|
||||
"FDM is disabled due to the presence of LOAD_OP_LOAD or LOAD_OP_STORE" },
|
||||
};
|
||||
|
||||
static uint32_t gpu_clock_id;
|
||||
|
|
@ -245,6 +263,12 @@ get_stack(struct tu_device *dev, enum tu_stage_id stage_id)
|
|||
case CMD_BUFFER_ANNOTATION_STAGE_ID:
|
||||
case CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID:
|
||||
return &dev->perfetto.annotations_stack;
|
||||
case WARNING_SLOW_CLEAR_LRZ_STAGE_ID:
|
||||
case WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID:
|
||||
case WARNING_LRZ_DISABLED_STAGE_ID:
|
||||
case WARNING_LRZ_WRITE_DISABLED_STAGE_ID:
|
||||
case WARNING_FDM_FORCE_DISABLED_STAGE_ID:
|
||||
return &dev->perfetto.sticky_warnings_stack;
|
||||
default:
|
||||
return &dev->perfetto.render_stack;
|
||||
}
|
||||
|
|
@ -359,6 +383,14 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
|
|||
case CONCURRENT_BINNING_STAGE_ID:
|
||||
case CONCURRENT_BINNING_BARRIER_STAGE_ID:
|
||||
queue_id = BV_HW_QUEUE_ID;
|
||||
break;
|
||||
case WARNING_SLOW_CLEAR_LRZ_STAGE_ID:
|
||||
case WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID:
|
||||
case WARNING_LRZ_DISABLED_STAGE_ID:
|
||||
case WARNING_LRZ_WRITE_DISABLED_STAGE_ID:
|
||||
case WARNING_FDM_FORCE_DISABLED_STAGE_ID:
|
||||
queue_id = PERF_WARNINGS_QUEUE_ID;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -644,6 +676,11 @@ CREATE_EVENT_CALLBACK(update_buffer, UPDATE_BUFFER_STAGE_ID)
|
|||
CREATE_EVENT_CALLBACK(resolve_image, RESOLVE_IMAGE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(slow_clear_lrz, SLOW_CLEAR_LRZ_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(disable_lrz, DISABLE_LRZ_STAGE_ID)
|
||||
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_slow_clear_lrz, WARNING_SLOW_CLEAR_LRZ_STAGE_ID)
|
||||
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_depth_image_no_lrz, WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID)
|
||||
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_lrz_disabled, WARNING_LRZ_DISABLED_STAGE_ID)
|
||||
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_lrz_write_disabled, WARNING_LRZ_WRITE_DISABLED_STAGE_ID)
|
||||
CREATE_STICKY_WARNING_EVENT_CALLBACK(warning_fdm_force_disabled, WARNING_FDM_FORCE_DISABLED_STAGE_ID)
|
||||
|
||||
void
|
||||
tu_perfetto_start_cmd_buffer_annotation(
|
||||
|
|
|
|||
|
|
@ -81,6 +81,11 @@ enum tu_stage_id {
|
|||
UPDATE_BUFFER_STAGE_ID,
|
||||
SLOW_CLEAR_LRZ_STAGE_ID,
|
||||
DISABLE_LRZ_STAGE_ID,
|
||||
WARNING_SLOW_CLEAR_LRZ_STAGE_ID,
|
||||
WARNING_DEPTH_IMAGE_NO_LRZ_STAGE_ID,
|
||||
WARNING_LRZ_DISABLED_STAGE_ID,
|
||||
WARNING_LRZ_WRITE_DISABLED_STAGE_ID,
|
||||
WARNING_FDM_FORCE_DISABLED_STAGE_ID,
|
||||
|
||||
TU_STAGE_ID_COUNT,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -286,6 +286,18 @@ begin_end_tp('compute_indirect',
|
|||
is_indirect=True, c_format="%ux%ux%u",
|
||||
fields=['x', 'y', 'z'])])
|
||||
|
||||
# Performance warnings
|
||||
|
||||
singular_tp('warning_slow_clear_lrz', toggle_name='perf_warnings')
|
||||
singular_tp('warning_depth_image_no_lrz', toggle_name='perf_warnings')
|
||||
singular_tp('warning_lrz_disabled',
|
||||
toggle_name='perf_warnings',
|
||||
args=[Arg(type='const char *', var='reason', c_format='%s')])
|
||||
singular_tp('warning_lrz_write_disabled',
|
||||
toggle_name='perf_warnings',
|
||||
args=[Arg(type='const char *', var='reason', c_format='%s')])
|
||||
singular_tp('warning_fdm_force_disabled', toggle_name='perf_warnings')
|
||||
|
||||
# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT
|
||||
for suffix in ["", "_rp"]:
|
||||
begin_end_tp('cmd_buffer_annotation' + suffix,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue