mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 22:30:12 +01:00
panvk/csf: Fix BY_REGION dependencies
When only the tile buffers are touched, it's okay to take care of the dependency at the draw level, with DCD_FLAGS_2, but as soon as one side of the dep has side effects that could impact the other side, we need to split the render pass and insert a real barrier, with a proper flush on read-only L1 caches. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Aksel Hjerpbakk <aksel.hjerpbakk@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38950>
This commit is contained in:
parent
a00f6ee033
commit
aa3c8e6fb0
3 changed files with 63 additions and 32 deletions
|
|
@ -4,16 +4,8 @@ asan-dEQP-VK.glsl.loops.special.do_while_dynamic_iterations.dowhile_trap_vertex,
|
||||||
# New failures with VKCTS 1.4.4.0
|
# New failures with VKCTS 1.4.4.0
|
||||||
dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.combined_image_sampler,Crash
|
dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.combined_image_sampler,Crash
|
||||||
dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.sampled_image,Crash
|
dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.sampled_image,Crash
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.read_write.shader_read_write,Fail
|
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.sampled_read_storage_write,Fail
|
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.shader_read_write,Fail
|
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.storage_read_storage_write,Fail
|
|
||||||
asan-dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.combined_image_sampler,Crash
|
asan-dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.combined_image_sampler,Crash
|
||||||
asan-dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.sampled_image,Crash
|
asan-dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.sampled_image,Crash
|
||||||
asan-dEQP-VK.image.general_layout.memory_barrier.fragment.read_write.shader_read_write,Fail
|
|
||||||
asan-dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.sampled_read_storage_write,Fail
|
|
||||||
asan-dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.shader_read_write,Fail
|
|
||||||
asan-dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.storage_read_storage_write,Fail
|
|
||||||
|
|
||||||
|
|
||||||
afbcp-spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
|
afbcp-spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
|
||||||
|
|
|
||||||
|
|
@ -198,9 +198,6 @@ dEQP-VK.glsl.swizzles.vector_swizzles.mediump_vec4_x_fragment
|
||||||
dEQP-VK.glsl.arrays.constructor.int4_fragment
|
dEQP-VK.glsl.arrays.constructor.int4_fragment
|
||||||
dEQP-VK.glsl.arrays.declaration.implicit_size_int_ivec3_fragment
|
dEQP-VK.glsl.arrays.declaration.implicit_size_int_ivec3_fragment
|
||||||
|
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.shader_read_write
|
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.sampled_read_storage_write
|
|
||||||
dEQP-VK.image.general_layout.memory_barrier.fragment.write_read.storage_read_storage_write
|
|
||||||
dEQP-VK.image.store.with_format.2d_array.r8_snorm_single_layer
|
dEQP-VK.image.store.with_format.2d_array.r8_snorm_single_layer
|
||||||
dEQP-VK.image.subresource_layout.2d.4_levels.r8g8b8a8_uint_offset
|
dEQP-VK.image.subresource_layout.2d.4_levels.r8g8b8a8_uint_offset
|
||||||
dEQP-VK.image.texel_view_compatible.graphic.extended.1d_image.texture_read.astc_5x4_unorm_block.r32g32b32a32_sint
|
dEQP-VK.image.texel_view_compatible.graphic.extended.1d_image.texture_read.astc_5x4_unorm_block.r32g32b32a32_sint
|
||||||
|
|
|
||||||
|
|
@ -386,6 +386,35 @@ add_memory_dependency(struct panvk_cache_flush_info *cache_flush,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
frag_subqueue_needs_sidefx_barrier(VkAccessFlags2 src_access,
|
||||||
|
VkAccessFlags2 dst_access)
|
||||||
|
{
|
||||||
|
bool src_reads_mem = src_access & (VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
||||||
|
VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
|
||||||
|
VK_ACCESS_2_MEMORY_READ_BIT);
|
||||||
|
bool dst_reads_mem = dst_access & (VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
||||||
|
VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
|
||||||
|
VK_ACCESS_2_MEMORY_READ_BIT);
|
||||||
|
bool src_writes_mem = src_access & (VK_ACCESS_2_MEMORY_WRITE_BIT |
|
||||||
|
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT);
|
||||||
|
bool dst_writes_mem = dst_access & (VK_ACCESS_2_MEMORY_WRITE_BIT |
|
||||||
|
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT);
|
||||||
|
|
||||||
|
/* If there's no read -> write, write -> write or write -> read
|
||||||
|
* memory dependency, we can skip, otherwise we have to split the
|
||||||
|
* render pass. We could possibly add the dependency at the draw level,
|
||||||
|
* using extra bits in the DCD2 flags to encode storage reads/writes and
|
||||||
|
* adding extra WAIT/WAIT_RESOURCE shader side, but we can't flush the
|
||||||
|
* texture cache, so it wouldn't work for SAMPLED_READ. Let's keep things
|
||||||
|
* simple and consider any side effect as requiring a split, until this
|
||||||
|
* proves to be a real bottleneck.
|
||||||
|
*/
|
||||||
|
return (src_reads_mem && dst_writes_mem) ||
|
||||||
|
(src_writes_mem && dst_writes_mem) ||
|
||||||
|
(src_writes_mem && dst_reads_mem);
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
should_split_render_pass(const uint32_t wait_masks[static PANVK_SUBQUEUE_COUNT],
|
should_split_render_pass(const uint32_t wait_masks[static PANVK_SUBQUEUE_COUNT],
|
||||||
VkAccessFlags2 src_access, VkAccessFlags2 dst_access)
|
VkAccessFlags2 src_access, VkAccessFlags2 dst_access)
|
||||||
|
|
@ -406,15 +435,19 @@ should_split_render_pass(const uint32_t wait_masks[static PANVK_SUBQUEUE_COUNT],
|
||||||
BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT))
|
BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* split if the fragment subqueue self-waits with a feedback loop, because
|
if (wait_masks[PANVK_SUBQUEUE_FRAGMENT] &
|
||||||
* we lower subpassLoad to texelFetch
|
BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) {
|
||||||
*/
|
/* split if the fragment subqueue self-waits with a feedback loop, because
|
||||||
if ((wait_masks[PANVK_SUBQUEUE_FRAGMENT] &
|
* we lower subpassLoad to texelFetch
|
||||||
BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) &&
|
*/
|
||||||
(src_access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
|
if ((src_access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
|
||||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) &&
|
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) &&
|
||||||
(dst_access & VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT))
|
(dst_access & VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
if (frag_subqueue_needs_sidefx_barrier(src_access, dst_access))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -433,13 +466,29 @@ collect_cache_flush_info(enum panvk_subqueue_id subqueue,
|
||||||
add_memory_dependency(cache_flush, src_access, dst_access);
|
add_memory_dependency(cache_flush, src_access, dst_access);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
can_skip_barrier(struct panvk_cmd_buffer *cmdbuf, const VkDependencyInfo *info,
|
||||||
|
struct panvk_sync_scope src, struct panvk_sync_scope dst)
|
||||||
|
{
|
||||||
|
bool inside_rp = cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf);
|
||||||
|
bool by_region = info->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT;
|
||||||
|
|
||||||
|
if (inside_rp && by_region &&
|
||||||
|
!frag_subqueue_needs_sidefx_barrier(src.access, dst.access))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
collect_cs_deps(struct panvk_cmd_buffer *cmdbuf,
|
collect_cs_deps(struct panvk_cmd_buffer *cmdbuf, const VkDependencyInfo *info,
|
||||||
struct panvk_sync_scope src, struct panvk_sync_scope dst,
|
struct panvk_sync_scope src, struct panvk_sync_scope dst,
|
||||||
struct panvk_cs_deps *deps)
|
struct panvk_cs_deps *deps)
|
||||||
{
|
{
|
||||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
if (can_skip_barrier(cmdbuf, info, src, dst))
|
||||||
|
return;
|
||||||
|
|
||||||
|
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||||
uint32_t wait_masks[PANVK_SUBQUEUE_COUNT] = {0};
|
uint32_t wait_masks[PANVK_SUBQUEUE_COUNT] = {0};
|
||||||
add_execution_dependency(wait_masks, src.stages, dst.stages);
|
add_execution_dependency(wait_masks, src.stages, dst.stages);
|
||||||
|
|
||||||
|
|
@ -581,7 +630,7 @@ panvk_per_arch(add_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
|
||||||
VK_QUEUE_FAMILY_IGNORED,
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
barrier_stage);
|
barrier_stage);
|
||||||
|
|
||||||
collect_cs_deps(cmdbuf, src, dst, out);
|
collect_cs_deps(cmdbuf, in, src, dst, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < in->bufferMemoryBarrierCount; i++) {
|
for (uint32_t i = 0; i < in->bufferMemoryBarrierCount; i++) {
|
||||||
|
|
@ -593,7 +642,7 @@ panvk_per_arch(add_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
|
||||||
barrier->dstQueueFamilyIndex,
|
barrier->dstQueueFamilyIndex,
|
||||||
barrier_stage);
|
barrier_stage);
|
||||||
|
|
||||||
collect_cs_deps(cmdbuf, src, dst, out);
|
collect_cs_deps(cmdbuf, in, src, dst, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < in->imageMemoryBarrierCount; i++) {
|
for (uint32_t i = 0; i < in->imageMemoryBarrierCount; i++) {
|
||||||
|
|
@ -608,7 +657,7 @@ panvk_per_arch(add_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
|
||||||
barrier->dstQueueFamilyIndex,
|
barrier->dstQueueFamilyIndex,
|
||||||
barrier_stage);
|
barrier_stage);
|
||||||
|
|
||||||
collect_cs_deps(cmdbuf, src, dst, out);
|
collect_cs_deps(cmdbuf, in, src, dst, out);
|
||||||
|
|
||||||
if (barrier_stage == PANVK_BARRIER_STAGE_FIRST && transition.stages)
|
if (barrier_stage == PANVK_BARRIER_STAGE_FIRST && transition.stages)
|
||||||
out->needs_layout_transitions = true;
|
out->needs_layout_transitions = true;
|
||||||
|
|
@ -736,13 +785,6 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
||||||
const VkDependencyInfo *pDependencyInfo)
|
const VkDependencyInfo *pDependencyInfo)
|
||||||
{
|
{
|
||||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||||
|
|
||||||
/* Intra render pass barriers can be skipped iff we're inside a render
|
|
||||||
* pass. */
|
|
||||||
if ((cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf)) &&
|
|
||||||
(pDependencyInfo->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT))
|
|
||||||
return;
|
|
||||||
|
|
||||||
struct panvk_cs_deps deps = {0};
|
struct panvk_cs_deps deps = {0};
|
||||||
|
|
||||||
panvk_per_arch(add_cs_deps)(cmdbuf, PANVK_BARRIER_STAGE_FIRST, pDependencyInfo, &deps, false);
|
panvk_per_arch(add_cs_deps)(cmdbuf, PANVK_BARRIER_STAGE_FIRST, pDependencyInfo, &deps, false);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue