tu: Update RP state depending on pipeline in first RP draw

The pipeline used in RP may have been bound in another RP, so
we have to save relevant state and re-apply it on first draw.

Fixes GPU hang in the following test with forced binning + reg stomping:
 dEQP-VK.transform_feedback.primitives_generated_query.get.queue_reset.32bit.tese.xfb.color_write_disable_static.patch_list.pgq_default_xfb_default.two_draws.pqg_first.none_2_queries

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28326>
This commit is contained in:
Danylo Piliaiev 2024-03-21 16:44:32 +01:00 committed by Marge Bot
parent a19c511818
commit 5acdb22ba2
2 changed files with 50 additions and 37 deletions

View file

@ -3140,6 +3140,47 @@ tu_bind_fs(struct tu_cmd_buffer *cmd, struct tu_shader *fs)
}
}
/* We cannot do this only at pipeline bind time since pipeline
* could have been bound at any time before current renderpass,
* e.g. in the previous renderpass.
*/
static void
tu_pipeline_update_rp_state(struct tu_cmd_state *cmd_state)
{
if (cmd_state->pipeline_disable_gmem &&
!cmd_state->rp.disable_gmem) {
/* VK_EXT_attachment_feedback_loop_layout allows feedback loop to involve
* not only input attachments but also sampled images or image resources.
* But we cannot just patch gmem for image in the descriptors.
*
* At the moment, in context of DXVK, it is expected that only a few
* drawcalls in a frame would use feedback loop and they would be wrapped
* in their own renderpasses, so it should be ok to force sysmem.
*
* However, there are two further possible optimizations if need would
* arise for other translation layer:
* - Tiling could be enabled if we ensure that there is no barrier in
* the renderpass;
* - Check that both pipeline and attachments agree that feedback loop
* is needed.
*/
perf_debug(
cmd->device,
"Disabling gmem due to VK_EXT_attachment_feedback_loop_layout");
cmd_state->rp.disable_gmem = true;
}
if (cmd_state->pipeline_sysmem_single_prim_mode &&
!cmd_state->rp.sysmem_single_prim_mode) {
perf_debug(cmd->device, "single_prim_mode due to pipeline settings");
cmd_state->rp.sysmem_single_prim_mode = true;
}
if (cmd_state->pipeline_has_tess) {
cmd_state->rp.has_tess = true;
}
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -3187,40 +3228,11 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
cmd->state.load_state = pipeline->load_state;
cmd->state.prim_order_sysmem = pipeline->prim_order.state_sysmem;
cmd->state.prim_order_gmem = pipeline->prim_order.state_gmem;
cmd->state.pipeline_sysmem_single_prim_mode = pipeline->prim_order.sysmem_single_prim_mode;
cmd->state.pipeline_has_tess = pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
cmd->state.pipeline_disable_gmem = gfx_pipeline->feedback_loop_may_involve_textures;
if (gfx_pipeline->feedback_loop_may_involve_textures &&
!cmd->state.rp.disable_gmem) {
/* VK_EXT_attachment_feedback_loop_layout allows feedback loop to involve
* not only input attachments but also sampled images or image resources.
* But we cannot just patch gmem for image in the descriptors.
*
* At the moment, in context of DXVK, it is expected that only a few
* drawcalls in a frame would use feedback loop and they would be wrapped
* in their own renderpasses, so it should be ok to force sysmem.
*
* However, there are two further possible optimizations if need would
* arise for other translation layer:
* - Tiling could be enabled if we ensure that there is no barrier in
* the renderpass;
* - Check that both pipeline and attachments agree that feedback loop
* is needed.
*/
perf_debug(
cmd->device,
"Disabling gmem due to VK_EXT_attachment_feedback_loop_layout");
cmd->state.rp.disable_gmem = true;
}
if (pipeline->prim_order.sysmem_single_prim_mode &&
!cmd->state.rp.sysmem_single_prim_mode) {
if (gfx_pipeline->feedback_loop_color ||
gfx_pipeline->feedback_loop_ds) {
perf_debug(cmd->device, "single_prim_mode due to feedback loop");
} else {
perf_debug(cmd->device, "single_prim_mode due to rast order access");
}
cmd->state.rp.sysmem_single_prim_mode = true;
}
tu_pipeline_update_rp_state(&cmd->state);
if (pipeline->lrz_blend.valid) {
if (cmd->state.blend_reads_dest != pipeline->lrz_blend.reads_dest) {
@ -3263,10 +3275,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
u_foreach_bit(i, pipeline->set_state_mask)
cmd->state.dynamic_state[i] = pipeline->dynamic_state[i];
if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
cmd->state.rp.has_tess = true;
}
if (pipeline->program.per_view_viewport != cmd->state.per_view_viewport) {
cmd->state.per_view_viewport = pipeline->program.per_view_viewport;
cmd->state.dirty |= TU_CMD_DIRTY_PER_VIEW_VIEWPORT;
@ -5072,6 +5080,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
* is OK since CmdClearAttachments won't disable/overwrite them
*/
if (dirty & TU_CMD_DIRTY_DRAW_STATE) {
tu_pipeline_update_rp_state(&cmd->state);
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2));
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, program->config_state);

View file

@ -492,6 +492,9 @@ struct tu_cmd_state
bool stencil_front_write;
bool stencil_back_write;
bool pipeline_feedback_loop_ds;
bool pipeline_sysmem_single_prim_mode;
bool pipeline_has_tess;
bool pipeline_disable_gmem;
bool pipeline_blend_lrz, pipeline_bandwidth;
uint32_t pipeline_draw_states;