turnip: skip unnecessary CP_REG_TEST for cond load/store

When no attachment allows conditional load/store, skip the unnecessary
CP_REG_TEST.

This is done to avoid a performance trap on a618 (gen1).  CP_REG_TEST or
any command that reads a register is slow on a618.

glmark2 score goes from 830 to 1001.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8162
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21206>
This commit is contained in:
Chia-I Wu 2023-02-08 12:25:21 -08:00 committed by Marge Bot
parent 37f6714fe7
commit 0278462575
3 changed files with 14 additions and 3 deletions

View file

@ -736,7 +736,8 @@ static void
tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
uint32_t pipe, uint32_t slot, bool skip_wfm)
{
if (cmd->state.tiling->binning_possible) {
if (cmd->state.tiling->binning_possible &&
cmd->state.pass->has_cond_load_store) {
tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(pipe)) |
A6XX_CP_REG_TEST_0_BIT(slot) |
@ -855,8 +856,10 @@ tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
tu6_emit_blit_scissor(cmd, cs, true);
const bool cond_exec_allowed = cmd->state.tiling->binning &&
cmd->state.pass->has_cond_load_store;
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
tu_load_gmem_attachment(cmd, cs, i, cmd->state.tiling->binning, false);
tu_load_gmem_attachment(cmd, cs, i, cond_exec_allowed, false);
}
static void
@ -873,9 +876,11 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
if (pass->attachments[a].gmem) {
const bool cond_exec_allowed = cmd->state.tiling->binning_possible &&
cmd->state.pass->has_cond_load_store;
tu_store_gmem_attachment(cmd, cs, a, a,
fb->layers, subpass->multiview_mask,
cmd->state.tiling->binning_possible);
cond_exec_allowed);
}
}

View file

@ -547,6 +547,9 @@ tu_render_pass_cond_config(struct tu_render_pass *pass)
(att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
att->cond_store_allowed =
(att->store || att->store_stencil) && !att->clear_mask;
pass->has_cond_load_store |=
att->cond_load_allowed | att->cond_store_allowed;
}
}

View file

@ -108,7 +108,10 @@ struct tu_render_pass
uint32_t sysmem_bandwidth_per_pixel;
struct tu_subpass_attachment *subpass_attachments;
struct tu_render_pass_attachment *attachments;
bool has_cond_load_store;
struct tu_subpass_barrier end_barrier;
struct tu_subpass subpasses[0];
};