radv: pass the image subresource range to radv_{src,dst}_access_flush()

This will allow us to optimize the pipe misaligned special case for
GFX11 because only the first mip in the mip-tail needs the L2 cache
invalidation.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31921>
This commit is contained in:
Samuel Pitoiset 2024-10-31 09:41:15 +01:00 committed by Marge Bot
parent f7a39fac10
commit 7a3a65c0c4
14 changed files with 120 additions and 94 deletions

View file

@ -286,9 +286,9 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
} else if (use_compute) {
fill_buffer_shader(cmd_buffer, va, size, value);
flush_bits =
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, image);
flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image, NULL);
} else if (size)
radv_cp_dma_clear_buffer(cmd_buffer, va, size, value);

View file

@ -755,7 +755,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
radv_meta_restore(&saved_state, cmd_buffer);
return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image);
VK_ACCESS_2_SHADER_WRITE_BIT, image, NULL);
}
static uint32_t
@ -899,16 +899,6 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
clear_word = radv_get_htile_fast_clear_value(device, iview->image, clear_value);
if (pre_flush) {
enum radv_cmd_flush_bits bits =
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT,
iview->image);
cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
*pre_flush |= cmd_buffer->state.flush_bits;
}
VkImageSubresourceRange range = {
.aspectMask = aspects,
.baseMipLevel = iview->vk.base_mip_level,
@ -917,6 +907,16 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
.layerCount = iview->vk.layer_count,
};
if (pre_flush) {
enum radv_cmd_flush_bits bits =
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image, &range) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT,
iview->image, &range);
cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
*pre_flush |= cmd_buffer->state.flush_bits;
}
flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
if (iview->image->planes[0].surface.has_stencil &&
@ -1337,7 +1337,7 @@ radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_im
radv_meta_restore(&saved_state, cmd_buffer);
return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image);
VK_ACCESS_2_SHADER_WRITE_BIT, image, NULL);
}
uint32_t
@ -1709,7 +1709,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
if (pre_flush) {
enum radv_cmd_flush_bits bits = radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image);
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image, NULL);
cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
*pre_flush |= cmd_buffer->state.flush_bits;
}

View file

@ -234,9 +234,9 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
VK_ACCESS_TRANSFER_WRITE_BIT, dst_image, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_TRANSFER_READ_BIT, dst_image);
VK_ACCESS_TRANSFER_READ_BIT, dst_image, NULL);
const enum util_format_layout format_layout = vk_format_description(dst_image->vk.format)->layout;
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
@ -614,9 +614,9 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
VK_ACCESS_TRANSFER_WRITE_BIT, dst_image, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_TRANSFER_READ_BIT, dst_image);
VK_ACCESS_TRANSFER_READ_BIT, dst_image, NULL);
const enum util_format_layout format_layout = vk_format_description(dst_image->vk.format)->layout;
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {

View file

@ -187,8 +187,8 @@ radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *
cmd_buffer->state.flush_bits |=
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL, NULL);
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
@ -237,7 +237,7 @@ radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, NULL, NULL);
}

View file

@ -171,7 +171,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
}
state->flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, image);
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, image, NULL);
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
@ -246,6 +246,6 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
radv_meta_restore(&saved_state, cmd_buffer);
state->flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, image);
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image, NULL);
}

View file

@ -521,9 +521,9 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, image);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image, subresourceRange);
/* Initialize the HTILE metadata as "fully expanded". */
uint32_t htile_value = radv_get_htile_initial_value(device, image);

View file

@ -453,13 +453,13 @@ radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_i
if (flush_cb)
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, image);
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, image, range);
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
if (flush_cb)
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image, range);
radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
@ -649,8 +649,8 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag
return;
}
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, image);
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_READ_BIT, image, subresourceRange);
radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
@ -735,9 +735,9 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, image);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image, subresourceRange);
/* Initialize the DCC metadata as "fully expanded". */
cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);

View file

@ -134,9 +134,6 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, image);
radv_image_view_init(&iview, device,
&(VkImageViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@ -154,6 +151,11 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_
},
NULL);
const VkImageSubresourceRange range = vk_image_view_subresource_range(&iview.vk);
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_SHADER_READ_BIT, image, &range);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.fmask_expand.p_layout,
0, 2,
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
@ -185,8 +187,8 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, image);
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, image, &range);
/* Re-initialize FMASK in fully expanded mode. */
cmd_buffer->state.flush_bits |= radv_init_fmask(cmd_buffer, image, subresourceRange);

View file

@ -217,15 +217,15 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_im
}
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, src_image, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, src_image);
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, src_image, NULL);
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image, NULL);
}
enum radv_resolve_method {

View file

@ -640,9 +640,9 @@ radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct
radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image,
dst_iview->vk.format, dst_layout, region);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, NULL, NULL);
}
void
@ -663,8 +663,8 @@ radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkIm
*/
cmd_buffer->state.flush_bits |=
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL, NULL);
struct radv_image_view *src_iview = render->ds_att.iview;
VkImageLayout src_layout =
@ -727,9 +727,9 @@ radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkIm
&(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count},
aspects, resolve_mode);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, NULL, NULL);
uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);

View file

@ -635,10 +635,14 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivi
}},
});
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_SHADER_READ_BIT, src_iview->image) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, dst_iview->image);
const VkImageSubresourceRange src_range = vk_image_view_subresource_range(&src_iview->vk);
const VkImageSubresourceRange dst_range = vk_image_view_subresource_range(&dst_iview->vk);
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT,
src_iview->image, &src_range) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, dst_iview->image, &dst_range);
unsigned push_constants[2] = {
src_offset->x - dst_offset->x,
@ -650,8 +654,9 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivi
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_iview->image);
cmd_buffer->state.flush_bits |=
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_iview->image, &dst_range);
}
static void

View file

@ -1352,10 +1352,11 @@ radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t i
return;
}
enum radv_cmd_flush_bits flush_bits =
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_READ_BIT, NULL, NULL);
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
@ -1513,7 +1514,7 @@ radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopy
sizeof(consts), &consts);
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL, NULL);
radv_indirect_dispatch(
cmd_buffer, src_buffer->bo,
@ -1619,7 +1620,7 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
sizeof(consts), &consts);
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL, NULL);
radv_indirect_dispatch(
cmd_buffer, src_buffer->bo,

View file

@ -6570,7 +6570,7 @@ can_skip_buffer_l2_flushes(struct radv_device *device)
enum radv_cmd_flush_bits
radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages, VkAccessFlags2 src_flags,
const struct radv_image *image)
const struct radv_image *image, const VkImageSubresourceRange *range)
{
src_flags = vk_expand_src_access_flags2(src_stages, src_flags);
@ -6638,7 +6638,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
enum radv_cmd_flush_bits
radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 dst_stages, VkAccessFlags2 dst_flags,
const struct radv_image *image)
const struct radv_image *image, const VkImageSubresourceRange *range)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -6747,12 +6747,18 @@ radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_
if (!iview)
continue;
const VkImageSubresourceRange range = vk_image_view_subresource_range(&iview->vk);
cmd_buffer->state.flush_bits |=
radv_src_access_flush(cmd_buffer, barrier->src_stage_mask, barrier->src_access_mask, iview->image);
radv_src_access_flush(cmd_buffer, barrier->src_stage_mask, barrier->src_access_mask, iview->image, &range);
}
if (render->ds_att.iview) {
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_stage_mask,
barrier->src_access_mask, render->ds_att.iview->image);
struct radv_image_view *iview = render->ds_att.iview;
const VkImageSubresourceRange range = vk_image_view_subresource_range(&iview->vk);
cmd_buffer->state.flush_bits |= radv_src_access_flush(
cmd_buffer, barrier->src_stage_mask, barrier->src_access_mask, render->ds_att.iview->image, &range);
}
radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
@ -6762,12 +6768,18 @@ radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_
if (!iview)
continue;
const VkImageSubresourceRange range = vk_image_view_subresource_range(&iview->vk);
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, barrier->dst_stage_mask, barrier->dst_access_mask, iview->image);
radv_dst_access_flush(cmd_buffer, barrier->dst_stage_mask, barrier->dst_access_mask, iview->image, &range);
}
if (render->ds_att.iview) {
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_stage_mask,
barrier->dst_access_mask, render->ds_att.iview->image);
struct radv_image_view *iview = render->ds_att.iview;
const VkImageSubresourceRange range = vk_image_view_subresource_range(&iview->vk);
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, barrier->dst_stage_mask, barrier->dst_access_mask, iview->image, &range);
}
radv_gang_barrier(cmd_buffer, barrier->src_stage_mask, barrier->dst_stage_mask);
@ -7352,7 +7364,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
*/
if (cmd_buffer->state.rb_noncoherent_dirty && !can_skip_buffer_l2_flushes(device))
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_TRANSFER_WRITE_BIT, NULL);
VK_ACCESS_2_TRANSFER_WRITE_BIT, NULL, NULL);
/* Since NGG streamout uses GDS, we need to make GDS idle when
* we leave the IB, otherwise another process might overwrite
@ -8838,8 +8850,9 @@ radv_handle_color_fbfetch_output(struct radv_cmd_buffer *cmd_buffer, uint32_t in
const VkImageSubresourceRange range = vk_image_view_subresource_range(&att->iview->vk);
/* Consider previous rendering work for WAW hazards. */
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, att->iview->image);
cmd_buffer->state.flush_bits |=
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
att->iview->image, &range);
/* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress DCC. */
radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout,
@ -8850,7 +8863,7 @@ radv_handle_color_fbfetch_output(struct radv_cmd_buffer *cmd_buffer, uint32_t in
cmd_buffer->state.flush_bits |= radv_dst_access_flush(
cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, att->iview->image);
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, att->iview->image, &range);
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
}
@ -8883,7 +8896,7 @@ radv_handle_depth_fbfetch_output(struct radv_cmd_buffer *cmd_buffer)
/* Consider previous rendering work for WAW hazards. */
cmd_buffer->state.flush_bits |=
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, att->iview->image);
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, att->iview->image, &range);
/* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress HTILE. */
radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout,
@ -8895,7 +8908,7 @@ radv_handle_depth_fbfetch_output(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->state.flush_bits |= radv_dst_access_flush(
cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, att->iview->image);
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, att->iview->image, &range);
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
}
@ -12205,8 +12218,9 @@ radv_trace_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndir
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL,
NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL, NULL);
radv_update_buffer_cp(cmd_buffer,
device->rra_trace.ray_history_addr + offsetof(struct radv_ray_history_header, dispatch_index),
@ -12427,15 +12441,15 @@ radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *ima
/* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
* in considering previous rendering work for WAW hazards. */
state->flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image, range);
if (image->planes[0].surface.has_stencil &&
!(range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
/* Flush caches before performing a separate aspect initialization because it's a
* read-modify-write operation.
*/
state->flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, image);
state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_SHADER_READ_BIT, image, range);
}
state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
@ -12558,7 +12572,7 @@ radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i
* consistent in considering previous rendering work for WAW hazards.
*/
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image, range);
if (radv_image_has_cmask(image)) {
static const uint32_t cmask_clear_values[4] = {0xffffffff, 0xdddddddd, 0xeeeeeeee, 0xffffffff};
@ -12811,17 +12825,17 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t dep_count, const VkDep
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
const VkMemoryBarrier2 *barrier = &dep_info->pMemoryBarriers[i];
src_stage_mask |= barrier->srcStageMask;
src_flush_bits |= radv_src_access_flush(cmd_buffer, barrier->srcStageMask, barrier->srcAccessMask, NULL);
src_flush_bits |= radv_src_access_flush(cmd_buffer, barrier->srcStageMask, barrier->srcAccessMask, NULL, NULL);
dst_stage_mask |= barrier->dstStageMask;
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dstStageMask, barrier->dstAccessMask, NULL);
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dstStageMask, barrier->dstAccessMask, NULL, NULL);
}
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
const VkBufferMemoryBarrier2 *barrier = &dep_info->pBufferMemoryBarriers[i];
src_stage_mask |= barrier->srcStageMask;
src_flush_bits |= radv_src_access_flush(cmd_buffer, barrier->srcStageMask, barrier->srcAccessMask, NULL);
src_flush_bits |= radv_src_access_flush(cmd_buffer, barrier->srcStageMask, barrier->srcAccessMask, NULL, NULL);
dst_stage_mask |= barrier->dstStageMask;
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dstStageMask, barrier->dstAccessMask, NULL);
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dstStageMask, barrier->dstAccessMask, NULL, NULL);
}
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
@ -12829,9 +12843,11 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t dep_count, const VkDep
VK_FROM_HANDLE(radv_image, image, barrier->image);
src_stage_mask |= barrier->srcStageMask;
src_flush_bits |= radv_src_access_flush(cmd_buffer, barrier->srcStageMask, barrier->srcAccessMask, image);
src_flush_bits |= radv_src_access_flush(cmd_buffer, barrier->srcStageMask, barrier->srcAccessMask, image,
&barrier->subresourceRange);
dst_stage_mask |= barrier->dstStageMask;
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dstStageMask, barrier->dstAccessMask, image);
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dstStageMask, barrier->dstAccessMask, image,
&barrier->subresourceRange);
}
}

View file

@ -737,10 +737,12 @@ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
VkAccessFlags2 src_flags, const struct radv_image *image);
VkAccessFlags2 src_flags, const struct radv_image *image,
const VkImageSubresourceRange *range);
enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 dst_stages,
VkAccessFlags2 dst_flags, const struct radv_image *image);
VkAccessFlags2 dst_flags, const struct radv_image *image,
const VkImageSubresourceRange *range);
struct radv_resolve_barrier {
VkPipelineStageFlags2 src_stage_mask;