nvk: Implement vkCmdPipelineBarrier2 for real

We also need to plumb all the same logic into event handling.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26408>
This commit is contained in:
Faith Ekstrand 2023-11-29 15:09:57 -06:00 committed by Marge Bot
parent e962430e6f
commit e1c1cdbd5f
3 changed files with 203 additions and 8 deletions

View file

@ -16,6 +16,7 @@
#include "nvk_pipeline.h"
#include "vk_pipeline_layout.h"
#include "vk_synchronization.h"
#include "nouveau_context.h"
@ -23,6 +24,7 @@
#include "nvk_cl906f.h"
#include "nvk_cl90b5.h"
#include "nvk_cla097.h"
#include "nvk_cla0c0.h"
#include "nvk_clc597.h"
@ -330,7 +332,193 @@ nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
}
}
#include "nvk_cl9097.h"
enum nvk_barrier {
NVK_BARRIER_RENDER_WFI = 1 << 0,
NVK_BARRIER_COMPUTE_WFI = 1 << 1,
NVK_BARRIER_FLUSH_SHADER_DATA = 1 << 2,
NVK_BARRIER_INVALIDATE_SHADER_DATA = 1 << 3,
NVK_BARRIER_INVALIDATE_TEX_DATA = 1 << 4,
NVK_BARRIER_INVALIDATE_CONSTANT = 1 << 5,
NVK_BARRIER_INVALIDATE_MME_DATA = 1 << 6,
};
static enum nvk_barrier
nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
VkAccessFlags2 access)
{
stages = vk_expand_src_stage_flags2(stages);
access = vk_filter_src_access_flags2(stages, access);
enum nvk_barrier barriers = 0;
if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
barriers |= NVK_BARRIER_RENDER_WFI;
if (vk_pipeline_stage_flags2_has_compute_shader(stages))
barriers |= NVK_BARRIER_COMPUTE_WFI;
}
if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
barriers |= NVK_BARRIER_RENDER_WFI;
if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
(stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_BLIT_BIT |
VK_PIPELINE_STAGE_2_CLEAR_BIT)))
barriers |= NVK_BARRIER_RENDER_WFI;
return barriers;
}
static enum nvk_barrier
nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
VkAccessFlags2 access)
{
stages = vk_expand_dst_stage_flags2(stages);
access = vk_filter_dst_access_flags2(stages, access);
enum nvk_barrier barriers = 0;
if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT))
barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
NVK_BARRIER_INVALIDATE_CONSTANT;
if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
(stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_BLIT_BIT)))
barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
return barriers;
}
void
nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
const VkDependencyInfo *dep,
bool wait)
{
enum nvk_barrier barriers = 0;
for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
bar->srcAccessMask);
}
for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
bar->srcAccessMask);
}
for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
bar->srcAccessMask);
}
if (!barriers)
return;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
if (barriers & NVK_BARRIER_RENDER_WFI) {
P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
.data = DATA_TRUE,
.flush_data = FLUSH_DATA_TRUE,
});
}
if (barriers & NVK_BARRIER_COMPUTE_WFI) {
P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
.data = DATA_TRUE,
.flush_data = FLUSH_DATA_TRUE,
});
}
} else if (barriers & NVK_BARRIER_RENDER_WFI) {
/* If this comes from a vkCmdSetEvent, we don't need to wait */
if (wait)
P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
} else {
/* Compute WFI only happens when shader data is flushed */
assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
}
}
void
nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
uint32_t dep_count,
const VkDependencyInfo *deps)
{
enum nvk_barrier barriers = 0;
for (uint32_t d = 0; d < dep_count; d++) {
const VkDependencyInfo *dep = &deps[d];
for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
barriers |= nvk_barrier_invalidates(bar->dstStageMask,
bar->dstAccessMask);
}
for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
barriers |= nvk_barrier_invalidates(bar->dstStageMask,
bar->dstAccessMask);
}
for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
barriers |= nvk_barrier_invalidates(bar->dstStageMask,
bar->dstAccessMask);
}
}
if (!barriers)
return;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
.lines = LINES_ALL,
});
}
if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
NVK_BARRIER_INVALIDATE_CONSTANT)) {
P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
.global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
.constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
});
}
if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
__push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_eng3d >= TURING_A)
P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
}
}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
@ -338,13 +526,8 @@ nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
/* TODO: We don't need to WFI all the time, do we? */
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
P_IMMD(p, NV9097, WAIT_FOR_IDLE, 0);
P_IMMD(p, NV9097, INVALIDATE_TEXTURE_DATA_CACHE, {
.lines = LINES_ALL,
});
nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -234,6 +234,14 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
uint64_t *addr);
void nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
const VkDependencyInfo *dep,
bool wait);
void nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
uint32_t dep_count,
const VkDependencyInfo *deps);
void
nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
struct nvk_descriptor_state *desc);

View file

@ -167,6 +167,8 @@ nvk_CmdSetEvent2(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(nvk_event, event, _event);
nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, false);
VkPipelineStageFlags2 stages = 0;
for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
@ -232,4 +234,6 @@ nvk_CmdWaitEvents2(VkCommandBuffer commandBuffer,
.release_size = RELEASE_SIZE_4BYTE,
});
}
nvk_cmd_invalidate_deps(cmd, eventCount, pDependencyInfos);
}