tu/a750: invalidate vertex state before CP_DRAW_INDIRECT_MULTI

For devices that load shader consts through preamble, HLSQ_INVALIDATE_CMD
should be used to invalidate VS state before CP_DRAW_INDIRECT_MULTI. This
avoids previous consts loaded through CP_LOAD_STATE6_GEOM for non-indirect
draws to affect the consts needed for the current indirect draw.

Fixes two failing vkd3d-proton test cases on a750:
  test_vertex_id_dxbc
  test_vertex_id_dxil

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32924>
This commit is contained in:
Zan Dobersek 2025-01-07 11:46:36 +01:00 committed by Marge Bot
parent f887ae2f3c
commit 30a3d567c8
3 changed files with 26 additions and 9 deletions

View file

@ -5,8 +5,6 @@ test_sampler_rounding,Fail
test_shader_instructions,Fail
test_shader_sm66_quad_op_semantics,Fail
test_suballocate_small_textures_size,Fail
test_vertex_id_dxbc,Fail
test_vertex_id_dxil,Fail
# msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 67.5.20.1: hangcheck detected gpu lockup rb 0!
test_fence_wait_robustness,Crash

View file

@ -6120,13 +6120,30 @@ vs_params_offset(struct tu_cmd_buffer *cmd)
return param_offset;
}
template <chip CHIP>
static void
tu6_emit_empty_vs_params(struct tu_cmd_buffer *cmd)
{
if (cmd->state.vs_params.iova) {
if (cmd->state.last_vs_params.empty)
return;
if (cmd->device->physical_device->info->a7xx.load_shader_consts_via_preamble) {
struct tu_cs cs;
cmd->state.vs_params = tu_cs_draw_state(&cmd->sub_cs, &cs, 2);
/* CP_LOAD_STATE6_GEOM from previous draws can override consts loaded for
* indirect draws, causing problems like incorrect vertex index computation.
* VS state invalidation avoids that.
*/
tu_cs_emit_regs(&cs, HLSQ_INVALIDATE_CMD(CHIP,
.vs_state = true));
assert(cs.cur == cs.end);
} else {
cmd->state.vs_params = (struct tu_draw_state) {};
cmd->state.dirty |= TU_CMD_DIRTY_VS_PARAMS;
}
cmd->state.dirty |= TU_CMD_DIRTY_VS_PARAMS;
cmd->state.last_vs_params.empty = true;
}
static void
@ -6143,7 +6160,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
*/
if (!(cmd->state.dirty & (TU_CMD_DIRTY_DRAW_STATE | TU_CMD_DIRTY_VS_PARAMS |
TU_CMD_DIRTY_PROGRAM)) &&
cmd->state.vs_params.iova &&
!cmd->state.last_vs_params.empty &&
(offset == 0 || draw_id == cmd->state.last_vs_params.draw_id) &&
vertex_offset == cmd->state.last_vs_params.vertex_offset &&
first_instance == cmd->state.last_vs_params.first_instance) {
@ -6195,6 +6212,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
cmd->state.last_vs_params.vertex_offset = vertex_offset;
cmd->state.last_vs_params.first_instance = first_instance;
cmd->state.last_vs_params.draw_id = draw_id;
cmd->state.last_vs_params.empty = false;
struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
cmd->state.vs_params = (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4};
@ -6373,7 +6391,7 @@ tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(tu_buffer, buf, _buffer);
struct tu_cs *cs = &cmd->draw_cs;
tu6_emit_empty_vs_params(cmd);
tu6_emit_empty_vs_params<CHIP>(cmd);
if (cmd->device->physical_device->info->a6xx.indirect_draw_wfm_quirk)
draw_wfm(cmd);
@ -6402,7 +6420,7 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(tu_buffer, buf, _buffer);
struct tu_cs *cs = &cmd->draw_cs;
tu6_emit_empty_vs_params(cmd);
tu6_emit_empty_vs_params<CHIP>(cmd);
if (cmd->device->physical_device->info->a6xx.indirect_draw_wfm_quirk)
draw_wfm(cmd);
@ -6436,7 +6454,7 @@ tu_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(tu_buffer, count_buf, countBuffer);
struct tu_cs *cs = &cmd->draw_cs;
tu6_emit_empty_vs_params(cmd);
tu6_emit_empty_vs_params<CHIP>(cmd);
/* It turns out that the firmware we have for a650 only partially fixed the
* problem with CP_DRAW_INDIRECT_MULTI not waiting for WFI's to complete
@ -6473,7 +6491,7 @@ tu_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(tu_buffer, count_buf, countBuffer);
struct tu_cs *cs = &cmd->draw_cs;
tu6_emit_empty_vs_params(cmd);
tu6_emit_empty_vs_params<CHIP>(cmd);
draw_wfm(cmd);

View file

@ -266,6 +266,7 @@ struct tu_vs_params {
uint32_t vertex_offset;
uint32_t first_instance;
uint32_t draw_id;
bool empty;
};
struct tu_tess_params {