mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 22:10:10 +01:00
v3dv: skip binning sync if binning shaders don't access external resources
Until now we have been enabling binning sync if we found a barrier involving geometry stages (a bcl barrier), however, if the actual binning shaders involved with the job don't access any external buffers or images there is no reason to sync at the binning stage. In this patch we don't immediately consume the bcl barrier flag from the command buffer state when we create a new job. Instead, we check this state when we are about to emit a draw call by checking if the shaders involved with binning may access external resources, such as vertex buffers, UBOs, or textures. If none of the draw calls in the job use binning shaders that access external resources then we never enable binning sync for the job. It is possible that a binning shader uses resources that are not synchronized through a barrier though, so we keep track of the access masks used with barriers for both buffers and images separately to better identify if the binning shader is affected by the barrier. If a serialized job never consumes the bcl barrier flag because none of its draw calls ever required a bcl sync, then the flag will be cleared when the job is finished. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16322>
This commit is contained in:
parent
46dd903cc6
commit
a981ac0539
3 changed files with 181 additions and 32 deletions
|
|
@ -589,6 +589,18 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
if (!job)
|
||||
return;
|
||||
|
||||
/* If a job is serialized it means it consumed a barrier. If the barrier
|
||||
* had the BCL sync flag and the job actually required to apply it, then
|
||||
* it would have done so and cleared it, but if the flag was not applied
|
||||
* because the job's binning shaders didn't require it, we want to clear
|
||||
* that state before we start a new job or record a new pipeline barrier
|
||||
* (we always finish the current job before processing a pipeline barrier).
|
||||
*/
|
||||
if (job->serialize) {
|
||||
cmd_buffer->state.bcl_barrier_buffer_access = 0;
|
||||
cmd_buffer->state.bcl_barrier_image_access = 0;
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.oom) {
|
||||
v3dv_job_destroy(job);
|
||||
cmd_buffer->state.job = NULL;
|
||||
|
|
@ -676,14 +688,7 @@ cmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
return;
|
||||
|
||||
job->serialize = true;
|
||||
if (cmd_buffer->state.has_bcl_barrier &&
|
||||
(job->type == V3DV_JOB_TYPE_GPU_CL ||
|
||||
job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY)) {
|
||||
job->needs_bcl_sync = true;
|
||||
}
|
||||
|
||||
cmd_buffer->state.has_barrier = false;
|
||||
cmd_buffer->state.has_bcl_barrier = false;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1657,7 +1662,8 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
|
|||
const VkCommandBuffer *cmd_buffers)
|
||||
{
|
||||
bool pending_barrier = false;
|
||||
bool pending_bcl_barrier = false;
|
||||
VkAccessFlags pending_bcl_barrier_buffer_access = 0;
|
||||
VkAccessFlags pending_bcl_barrier_image_access = 0;
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
|
||||
|
||||
|
|
@ -1687,10 +1693,13 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
|
|||
|
||||
if (pending_barrier) {
|
||||
job->serialize = true;
|
||||
if (pending_bcl_barrier)
|
||||
if (pending_bcl_barrier_buffer_access ||
|
||||
pending_bcl_barrier_image_access) {
|
||||
job->needs_bcl_sync = true;
|
||||
}
|
||||
pending_barrier = false;
|
||||
pending_bcl_barrier = false;
|
||||
pending_bcl_barrier_buffer_access = 0;
|
||||
pending_bcl_barrier_image_access = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1698,14 +1707,22 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
|
|||
* barrier state consumed with whatever comes after it (first job in
|
||||
* the next secondary or the primary, if this was the last secondary).
|
||||
*/
|
||||
assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
|
||||
assert(secondary->state.has_barrier ||
|
||||
(!secondary->state.bcl_barrier_buffer_access &&
|
||||
!secondary->state.bcl_barrier_image_access));
|
||||
pending_barrier = secondary->state.has_barrier;
|
||||
pending_bcl_barrier = secondary->state.has_bcl_barrier;
|
||||
pending_bcl_barrier_buffer_access =
|
||||
secondary->state.bcl_barrier_buffer_access;
|
||||
pending_bcl_barrier_image_access =
|
||||
secondary->state.bcl_barrier_image_access;
|
||||
}
|
||||
|
||||
if (pending_barrier) {
|
||||
primary->state.has_barrier = true;
|
||||
primary->state.has_bcl_barrier |= pending_bcl_barrier;
|
||||
primary->state.bcl_barrier_buffer_access |=
|
||||
pending_bcl_barrier_buffer_access;
|
||||
primary->state.bcl_barrier_image_access |=
|
||||
pending_bcl_barrier_image_access;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2402,8 +2419,95 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
v3dv_job_destroy(old_job);
|
||||
}
|
||||
|
||||
static bool
|
||||
cmd_buffer_binning_sync_required(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_pipeline *pipeline,
|
||||
bool indexed, bool indirect)
|
||||
{
|
||||
const struct v3dv_descriptor_maps *vs_bin_maps =
|
||||
pipeline->shared_data->maps[BROADCOM_SHADER_VERTEX_BIN];
|
||||
|
||||
const struct v3dv_descriptor_maps *gs_bin_maps =
|
||||
pipeline->shared_data->maps[BROADCOM_SHADER_GEOMETRY_BIN];
|
||||
|
||||
if (cmd_buffer->state.bcl_barrier_buffer_access) {
|
||||
VkAccessFlags buffer_access = cmd_buffer->state.bcl_barrier_buffer_access;
|
||||
|
||||
/* Index buffer read */
|
||||
if (indexed && (buffer_access & VK_ACCESS_INDEX_READ_BIT))
|
||||
return true;
|
||||
|
||||
/* Indirect buffer read */
|
||||
if (indirect && (buffer_access & VK_ACCESS_INDIRECT_COMMAND_READ_BIT))
|
||||
return true;
|
||||
|
||||
/* Attribute read */
|
||||
if (buffer_access & VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
|
||||
const struct v3d_vs_prog_data *prog_data =
|
||||
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
|
||||
if (prog_data->vattr_sizes[i] > 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* UBO / SSBO read */
|
||||
if (buffer_access & (VK_ACCESS_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_MEMORY_READ_BIT)) {
|
||||
|
||||
if (vs_bin_maps->ubo_map.num_desc > 0 ||
|
||||
vs_bin_maps->ssbo_map.num_desc > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (gs_bin_maps && (gs_bin_maps->ubo_map.num_desc > 0 ||
|
||||
gs_bin_maps->ssbo_map.num_desc > 0)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* SSBO write */
|
||||
if (buffer_access & (VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_MEMORY_WRITE_BIT)) {
|
||||
if (vs_bin_maps->ssbo_map.num_desc > 0)
|
||||
return true;
|
||||
|
||||
if (gs_bin_maps && gs_bin_maps->ssbo_map.num_desc > 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.bcl_barrier_image_access) {
|
||||
VkAccessFlags image_access = cmd_buffer->state.bcl_barrier_image_access;
|
||||
|
||||
/* Image load / store */
|
||||
if (image_access & (VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_MEMORY_READ_BIT |
|
||||
VK_ACCESS_MEMORY_WRITE_BIT)) {
|
||||
if (vs_bin_maps->texture_map.num_desc > 0 ||
|
||||
vs_bin_maps->sampler_map.num_desc > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
|
||||
{
|
||||
job->needs_bcl_sync = true;
|
||||
cmd_buffer->state.bcl_barrier_buffer_access = 0;
|
||||
cmd_buffer->state.bcl_barrier_image_access = 0;
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
bool indexed, bool indirect)
|
||||
{
|
||||
assert(cmd_buffer->state.gfx.pipeline);
|
||||
assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
|
||||
|
|
@ -2426,6 +2530,20 @@ v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer);
|
||||
job->draw_count++;
|
||||
|
||||
/* If this job is serialized (has consumed a barrier) then check if we need
|
||||
* to sync at the binning stage by testing if the binning shaders involved
|
||||
* with the draw call require access to external resources.
|
||||
*/
|
||||
if (job->serialize && (cmd_buffer->state.bcl_barrier_buffer_access ||
|
||||
cmd_buffer->state.bcl_barrier_image_access)) {
|
||||
assert(!job->needs_bcl_sync);
|
||||
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
if (cmd_buffer_binning_sync_required(cmd_buffer, pipeline,
|
||||
indexed, indirect)) {
|
||||
consume_bcl_sync(cmd_buffer, job);
|
||||
}
|
||||
}
|
||||
|
||||
/* GL shader state binds shaders, uniform and vertex attribute state. The
|
||||
* compiler injects uniforms to handle some descriptor types (such as
|
||||
* textures), so we need to regen that when descriptor state changes.
|
||||
|
|
@ -2509,7 +2627,7 @@ cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
|
||||
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
|
||||
if (likely(!pass->multiview_enabled)) {
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, false);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
|
||||
return;
|
||||
}
|
||||
|
|
@ -2517,7 +2635,7 @@ cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
|
||||
while (view_mask) {
|
||||
cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, false);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
|
||||
}
|
||||
}
|
||||
|
|
@ -2557,7 +2675,7 @@ v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
|||
|
||||
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
|
||||
if (likely(!pass->multiview_enabled)) {
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, false);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
|
||||
(cmd_buffer, indexCount, instanceCount,
|
||||
firstIndex, vertexOffset, firstInstance);
|
||||
|
|
@ -2567,7 +2685,7 @@ v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
|||
uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
|
||||
while (view_mask) {
|
||||
cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, false);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
|
||||
(cmd_buffer, indexCount, instanceCount,
|
||||
firstIndex, vertexOffset, firstInstance);
|
||||
|
|
@ -2590,7 +2708,7 @@ v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
|||
|
||||
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
|
||||
if (likely(!pass->multiview_enabled)) {
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, true);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
|
||||
(cmd_buffer, buffer, offset, drawCount, stride);
|
||||
return;
|
||||
|
|
@ -2599,7 +2717,7 @@ v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
|||
uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
|
||||
while (view_mask) {
|
||||
cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, true);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
|
||||
(cmd_buffer, buffer, offset, drawCount, stride);
|
||||
}
|
||||
|
|
@ -2621,7 +2739,7 @@ v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
|||
|
||||
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
|
||||
if (likely(!pass->multiview_enabled)) {
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, true);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
|
||||
(cmd_buffer, buffer, offset, drawCount, stride);
|
||||
return;
|
||||
|
|
@ -2630,7 +2748,7 @@ v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
|||
uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
|
||||
while (view_mask) {
|
||||
cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
|
||||
v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, true);
|
||||
v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
|
||||
(cmd_buffer, buffer, offset, drawCount, stride);
|
||||
}
|
||||
|
|
@ -2686,7 +2804,22 @@ v3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
|
|||
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
|
||||
cmd_buffer->state.has_bcl_barrier = true;
|
||||
for (int i = 0; i < memoryBarrierCount; i++) {
|
||||
cmd_buffer->state.bcl_barrier_buffer_access |=
|
||||
pMemoryBarriers[i].dstAccessMask;
|
||||
cmd_buffer->state.bcl_barrier_image_access |=
|
||||
pMemoryBarriers[i].dstAccessMask;
|
||||
}
|
||||
for (int i = 0; i < bufferBarrierCount; i++) {
|
||||
cmd_buffer->state.bcl_barrier_buffer_access |=
|
||||
pBufferBarriers[i].dstAccessMask;
|
||||
}
|
||||
for (int i = 0; i < imageBarrierCount; i++) {
|
||||
if (pImageBarriers[i].oldLayout != VK_IMAGE_LAYOUT_UNDEFINED) {
|
||||
cmd_buffer->state.bcl_barrier_image_access |=
|
||||
pImageBarriers[i].dstAccessMask;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1161,7 +1161,8 @@ v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
uint32_t *alloc_count,
|
||||
void **ptr);
|
||||
|
||||
void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
|
||||
void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
bool indexed, bool indirect);
|
||||
|
||||
/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
|
||||
* cmd_buffer specific header?
|
||||
|
|
@ -1254,7 +1255,8 @@ struct v3dv_cmd_buffer_state {
|
|||
* process.
|
||||
*/
|
||||
bool has_barrier;
|
||||
bool has_bcl_barrier;
|
||||
VkAccessFlags bcl_barrier_buffer_access;
|
||||
VkAccessFlags bcl_barrier_image_access;
|
||||
|
||||
/* Secondary command buffer state */
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -1631,7 +1631,8 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
|
|||
* job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed.
|
||||
*/
|
||||
bool pending_barrier = false;
|
||||
bool pending_bcl_barrier = false;
|
||||
VkAccessFlags pending_bcl_barrier_buffer_access = 0;
|
||||
VkAccessFlags pending_bcl_barrier_image_access = 0;
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
|
||||
|
||||
|
|
@ -1667,7 +1668,10 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
|
|||
struct v3dv_job *primary_job = primary->state.job;
|
||||
if (!primary_job || secondary_job->serialize || pending_barrier) {
|
||||
const bool needs_bcl_barrier =
|
||||
secondary_job->needs_bcl_sync || pending_bcl_barrier;
|
||||
secondary_job->needs_bcl_sync ||
|
||||
pending_bcl_barrier_buffer_access ||
|
||||
pending_bcl_barrier_image_access;
|
||||
|
||||
primary_job =
|
||||
cmd_buffer_subpass_split_for_barrier(primary,
|
||||
needs_bcl_barrier);
|
||||
|
|
@ -1709,13 +1713,16 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
|
|||
v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
|
||||
if (pending_barrier) {
|
||||
secondary_job->serialize = true;
|
||||
if (pending_bcl_barrier)
|
||||
if (pending_bcl_barrier_buffer_access ||
|
||||
pending_bcl_barrier_image_access) {
|
||||
secondary_job->needs_bcl_sync = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pending_barrier = false;
|
||||
pending_bcl_barrier = false;
|
||||
pending_bcl_barrier_buffer_access = 0;
|
||||
pending_bcl_barrier_image_access = 0;
|
||||
}
|
||||
|
||||
/* If the secondary has recorded any vkCmdEndQuery commands, we need to
|
||||
|
|
@ -1727,14 +1734,21 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
|
|||
/* If this secondary had any pending barrier state we will need that
|
||||
* barrier state consumed with whatever comes next in the primary.
|
||||
*/
|
||||
assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
|
||||
assert(secondary->state.has_barrier ||
|
||||
(!secondary->state.bcl_barrier_buffer_access &&
|
||||
!secondary->state.bcl_barrier_image_access));
|
||||
|
||||
pending_barrier = secondary->state.has_barrier;
|
||||
pending_bcl_barrier = secondary->state.has_bcl_barrier;
|
||||
pending_bcl_barrier_buffer_access =
|
||||
secondary->state.bcl_barrier_buffer_access;
|
||||
pending_bcl_barrier_image_access =
|
||||
secondary->state.bcl_barrier_image_access;
|
||||
}
|
||||
|
||||
if (pending_barrier) {
|
||||
primary->state.has_barrier = true;
|
||||
primary->state.has_bcl_barrier |= pending_bcl_barrier;
|
||||
primary->state.bcl_barrier_buffer_access |= pending_bcl_barrier_buffer_access;
|
||||
primary->state.bcl_barrier_image_access |= pending_bcl_barrier_image_access;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue