turnip: emit VPC_SO_DISABLE in xfb begin/end

SO was always enabled before this change.  That meant, after a call to
tu_CmdBindTransformFeedbackBuffersEXT to emit VPC_SO_BUFFER_SIZE, any
draw call (from the same render pass, in a different render pass, or in
a different cmdbuf) could potentially cause writes to the SO buffers
regardless of whether the draw is inside xfb begin/end or not.

I choose to emit VPC_SO_DISABLE instead of using stateobjs like
freedreno does only because it is simpler.  It is not clear to me which
is more efficient to HW.

This also fixes double SO writes for gmem rendering.  While
tu6_tile_render_begin was careful to disable SO for the draw pass,
tu6_emit_tile_select re-enabled it.

dEQP-VK.transform_feedback.* still passes.  It fixes
dEQP-GLES3.functional.transform_feedback.* on angle.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16502>
This commit is contained in:
Chia-I Wu 2022-05-13 10:41:30 -07:00 committed by Marge Bot
parent 0b7751babf
commit d3d34ad476

View file

@ -670,8 +670,6 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu6_emit_window_offset(cs, x1, y1);
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(false));
if (use_hw_binning(cmd)) {
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
@ -1290,9 +1288,6 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
/* enable stream-out, with sysmem there is only one pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(false));
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
tu_cs_emit(cs, 0x1);
@ -1340,9 +1335,6 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
const struct tu_framebuffer *fb = cmd->state.framebuffer;
if (use_hw_binning(cmd)) {
/* enable stream-out during binning pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(false));
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_RENDER_MODE(BINNING_PASS) |
A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
@ -1351,9 +1343,6 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu6_emit_binning_pass(cmd, cs);
/* and disable stream-out for draw pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(true));
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_FORCE_LRZ_WRITE_DIS |
A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
@ -1370,9 +1359,6 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
tu_cs_emit(cs, 0x1);
} else {
/* no binning pass, so enable stream-out for draw pass:: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(false));
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
}
@ -2151,6 +2137,8 @@ tu_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer,
CP_COND_REG_EXEC_0_SYSMEM |
CP_COND_REG_EXEC_0_BINNING);
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(false));
/* TODO: only update offset for active buffers */
for (uint32_t i = 0; i < IR3_MAX_SO_BUFFERS; i++)
tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_OFFSET(i, cmd->state.streamout_offset[i]));
@ -2197,6 +2185,8 @@ tu_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer,
CP_COND_REG_EXEC_0_SYSMEM |
CP_COND_REG_EXEC_0_BINNING);
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(true));
/* TODO: only flush buffers that need to be flushed */
for (uint32_t i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
/* note: FLUSH_BASE is always the same, so it could go in init_hw()? */