mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
turnip: Lazily call tu6_emit_descriptor_sets() at draw time.
This lets us batch up the state changes from multiple vkCmdBindDescriptorSets, which ANGLE and zink will both do in a single draw. Improves ANGLE (sysmem) driver_overhead perf by 5.18806% +/- 1.03444% (n=5). Improves ANGLE aztec_ruins_high perf by ~.3%. (clear result in the graph, but the screen went to sleep mid way through and so it was high variance) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20084>
This commit is contained in:
parent
73db82c816
commit
c1968deec2
2 changed files with 35 additions and 14 deletions
|
|
@ -2085,7 +2085,6 @@ tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_draw_state(&cmd->sub_cs, &state_cs,
|
||||
4 + 4 * descriptors_state->max_sets_bound +
|
||||
(descriptors_state->dynamic_bound ? 6 : 0));
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD;
|
||||
cs = &state_cs;
|
||||
} else {
|
||||
assert(bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
|
@ -2094,7 +2093,6 @@ tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
|
|||
hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
|
||||
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
|
||||
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
|
||||
cs = &cmd->cs;
|
||||
}
|
||||
|
||||
|
|
@ -2125,6 +2123,22 @@ tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
}
|
||||
|
||||
/* We lazily emit the draw state for desciptor sets at draw time, so that we can
|
||||
* batch together multiple tu_CmdBindDescriptorSets() calls. ANGLE and zink
|
||||
* will often emit multiple bind calls in a draw.
|
||||
*/
|
||||
static void
|
||||
tu_dirty_desc_sets(struct tu_cmd_buffer *cmd,
|
||||
VkPipelineBindPoint pipelineBindPoint)
|
||||
{
|
||||
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) {
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS;
|
||||
} else {
|
||||
assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS;
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
||||
VkPipelineBindPoint pipelineBindPoint,
|
||||
|
|
@ -2239,7 +2253,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
descriptors_state->dynamic_bound = true;
|
||||
}
|
||||
|
||||
tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
|
||||
tu_dirty_desc_sets(cmd, pipelineBindPoint);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -2284,7 +2298,7 @@ tu_CmdSetDescriptorBufferOffsetsEXT(
|
|||
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
|
||||
}
|
||||
|
||||
tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
|
||||
tu_dirty_desc_sets(cmd, pipelineBindPoint);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -2307,7 +2321,7 @@ tu_CmdBindDescriptorBufferEmbeddedSamplersEXT(
|
|||
|
||||
descriptors_state->set_iova[set] = set_layout->embedded_samplers->iova | 3;
|
||||
|
||||
tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
|
||||
tu_dirty_desc_sets(cmd, pipelineBindPoint);
|
||||
}
|
||||
|
||||
static enum VkResult
|
||||
|
|
@ -2645,7 +2659,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
|
||||
cmd->state.pipeline = pipeline;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD | TU_CMD_DIRTY_SHADER_CONSTS |
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS | TU_CMD_DIRTY_SHADER_CONSTS |
|
||||
TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS;
|
||||
|
||||
if (pipeline->output.feedback_loop_may_involve_textures &&
|
||||
|
|
@ -4924,7 +4938,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
|
||||
/* Early exit if there is nothing to emit, saves CPU cycles */
|
||||
uint32_t dirty = cmd->state.dirty;
|
||||
if (!(dirty & ~TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD))
|
||||
if (!(dirty & ~TU_CMD_DIRTY_COMPUTE_DESC_SETS))
|
||||
return VK_SUCCESS;
|
||||
|
||||
bool dirty_lrz =
|
||||
|
|
@ -5021,6 +5035,9 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
cmd->state.patch_control_points);
|
||||
}
|
||||
|
||||
if (dirty & TU_CMD_DIRTY_DESC_SETS)
|
||||
tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
|
||||
/* for the first draw in a renderpass, re-emit all the draw states
|
||||
*
|
||||
* and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
|
||||
|
|
@ -5061,7 +5078,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
emit_patch_control_points = false;
|
||||
uint32_t draw_state_count =
|
||||
((dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_DESC_SETS) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) +
|
||||
(dirty_lrz ? 1 : 0);
|
||||
|
|
@ -5090,8 +5107,10 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
|
||||
if (dirty & TU_CMD_DIRTY_SHADER_CONSTS)
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_CONST, cmd->state.shader_const);
|
||||
if (dirty & TU_CMD_DIRTY_DESC_SETS_LOAD)
|
||||
if (dirty & TU_CMD_DIRTY_DESC_SETS) {
|
||||
/* tu6_emit_descriptor_sets emitted the cmd->state.desc_sets draw state. */
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
|
||||
}
|
||||
if (dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
|
||||
if (emit_binding_stride) {
|
||||
|
|
@ -5120,7 +5139,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
* bits to preserve instead. The only things not emitted here are
|
||||
* compute-related state.
|
||||
*/
|
||||
cmd->state.dirty &= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
|
||||
cmd->state.dirty &= TU_CMD_DIRTY_COMPUTE_DESC_SETS;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -5730,10 +5749,12 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_emit_compute_driver_params(cmd, cs, pipeline, info);
|
||||
|
||||
if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD)
|
||||
if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_DESC_SETS) {
|
||||
tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
tu_cs_emit_state_ib(cs, pipeline->load_state);
|
||||
}
|
||||
|
||||
cmd->state.dirty &= ~TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
|
||||
cmd->state.dirty &= ~TU_CMD_DIRTY_COMPUTE_DESC_SETS;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
|
||||
|
|
|
|||
|
|
@ -58,8 +58,8 @@ enum tu_cmd_dirty_bits
|
|||
TU_CMD_DIRTY_RAST = BIT(2),
|
||||
TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
|
||||
TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
|
||||
TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
|
||||
TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
|
||||
TU_CMD_DIRTY_DESC_SETS = BIT(5),
|
||||
TU_CMD_DIRTY_COMPUTE_DESC_SETS = BIT(6),
|
||||
TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
|
||||
TU_CMD_DIRTY_LRZ = BIT(8),
|
||||
TU_CMD_DIRTY_VS_PARAMS = BIT(9),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue