tu/autotune: Prefer SYSMEM when only SW binning is possible

In cases where only SW binning is possible and where there would be
a performance impact from not using HW binning (i.e. > 2 tiles), it
is preferable to default to SYSMEM as the performance impact of
using GMEM is almost definitely not going to be worth it.

Signed-off-by: Dhruv Mark Collins <mark@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37802>
This commit is contained in:
Dhruv Mark Collins 2025-10-09 13:56:54 +00:00
parent dde478ce98
commit 8e1fe9da20
3 changed files with 21 additions and 14 deletions

View file

@ -1310,7 +1310,7 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
return true;
}
return vsc->binning;
return vsc->binning_possible && vsc->binning_useful;
}
static bool
@ -1373,8 +1373,16 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd,
return true;
}
if (TU_DEBUG(GMEM))
if (TU_DEBUG(GMEM)) {
cmd->state.rp.gmem_disable_reason = "TU_DEBUG(GMEM)";
return false;
}
/* This is a case where it's better to avoid GMEM, too many tiles but no HW binning possible. */
if (!vsc->binning_possible && vsc->binning_useful) {
cmd->state.rp.gmem_disable_reason = "Too many tiles and HW binning is not possible";
return true;
}
bool use_sysmem = cmd->device->autotune->get_optimal_mode(cmd, rp_ctx) == tu_autotune::render_mode::SYSMEM;
if (use_sysmem)
@ -6413,7 +6421,7 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r
* (perf queries), then we can't do this optimization since the
* start-of-the-CS geometry condition will have been overwritten.
*/
bool cond_load_allowed = vsc->binning &&
bool cond_load_allowed = vsc->binning_possible &&
cmd->state.pass->has_cond_load_store &&
!cmd->state.rp.draw_cs_writes_to_cond_pred;

View file

@ -564,8 +564,11 @@ struct tu_vsc_config {
/* Whether binning could be used for gmem rendering using this framebuffer. */
bool binning_possible;
/* Whether binning should be used for gmem rendering using this framebuffer. */
bool binning;
/* Whether binning is useful for GMEM rendering performance using this framebuffer. This is independent of whether
* binning is possible, and is determined by the tile count. Not binning when it's useful would be a performance
* hazard, and GMEM rendering should be avoided in the case where it's useful to bin but not possible to do so.
*/
bool binning_useful;
/* pipe register values */
uint32_t pipe_config[MAX_VSC_PIPES];

View file

@ -460,16 +460,12 @@ tu_tiling_config_update_pipes(struct tu_vsc_config *vsc,
static void
tu_tiling_config_update_binning(struct tu_vsc_config *vsc, const struct tu_device *device)
{
if (vsc->binning_possible) {
vsc->binning = (vsc->tile_count.width * vsc->tile_count.height) > 2;
vsc->binning_useful = (vsc->tile_count.width * vsc->tile_count.height) > 2;
if (TU_DEBUG(FORCEBIN))
vsc->binning = true;
if (TU_DEBUG(NOBIN))
vsc->binning = false;
} else {
vsc->binning = false;
}
if (TU_DEBUG(FORCEBIN))
vsc->binning_useful = true;
if (TU_DEBUG(NOBIN))
vsc->binning_useful = false;
}
void