nvk: Enable zcull for VK_ATTACHMENT_LOAD_OP_LOAD

Reviewed-by: Mary Guillemard <mary@mary.zone>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33861>
This commit is contained in:
Mel Henning 2025-03-04 20:00:20 -05:00 committed by Marge Bot
parent 5e04c965de
commit c24963d8da
3 changed files with 133 additions and 22 deletions

View file

@ -14,6 +14,7 @@
#include "nvk_shader.h"
#include "util/bitpack_helpers.h"
#include "util/compiler.h"
#include "vk_format.h"
#include "vk_render_pass.h"
#include "vk_standard_sample_locations.h"
@ -1019,6 +1020,16 @@ get_depth_stencil_plane_params(struct nvk_image_view *iview,
*image_out = nil_image;
}
static struct nvk_zcull_plane*
nvk_get_zcull_plane(struct nvk_rendering_state *render) {
if (render->depth_att.iview) {
struct nvk_image *img = (struct nvk_image*) render->depth_att.iview->vk.image;
if (img->zcull.nil.size_B > 0) {
return &img->zcull;
}
}
return NULL;
}
static uint32_t
nvk_vk_format_to_zcull_format(VkFormat format) {
@ -1355,20 +1366,34 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
}
/* TODO: zcull for depth-stencil */
struct nvk_zcull_plane *zcull_plane = nvk_get_zcull_plane(render);
bool use_zcull = pdev->info.has_zcull_info &&
pRenderingInfo->pDepthAttachment != NULL &&
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR;
pRenderingInfo->pDepthAttachment->loadOp != VK_ATTACHMENT_LOAD_OP_NONE &&
(zcull_plane ||
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR);
if (use_zcull) {
uint32_t start_count = nv_push_dw_count(p);
struct nil_zcull zcull_info = nil_zcull_new(
&pdev->info.zcull_info,
render->area.offset.x,
render->area.offset.y,
render->area.extent.width,
render->area.extent.height
);
struct nil_zcull zcull_info;
uint64_t addr_begin, addr_end;
if (zcull_plane) {
zcull_info = zcull_plane->nil;
addr_begin = zcull_plane->addr;
addr_end = zcull_plane->addr + zcull_plane->nil.size_B;
} else {
zcull_info = nil_zcull_new(
&pdev->info.zcull_info,
render->area.offset.x,
render->area.offset.y,
render->area.extent.width,
render->area.extent.height
);
addr_begin = 0;
addr_end = 0;
}
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0);
@ -1380,10 +1405,10 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
P_NV9097_SET_ZCULL_REGION_ALIQUOTS(p, zcull_info.aliquot_count);
P_MTHD(p, NV9097, SET_ZCULL_STORAGE_A);
P_NV9097_SET_ZCULL_STORAGE_A(p, 0);
P_NV9097_SET_ZCULL_STORAGE_B(p, 0);
P_NV9097_SET_ZCULL_STORAGE_C(p, 0);
P_NV9097_SET_ZCULL_STORAGE_D(p, 0);
P_NV9097_SET_ZCULL_STORAGE_A(p, addr_begin >> 32);
P_NV9097_SET_ZCULL_STORAGE_B(p, addr_begin & UINT32_MAX);
P_NV9097_SET_ZCULL_STORAGE_C(p, addr_end >> 32);
P_NV9097_SET_ZCULL_STORAGE_D(p, addr_end & UINT32_MAX);
P_IMMD(p, NV9097, SET_ZCULL_REGION_FORMAT, TYPE_Z_4X4);
@ -1433,18 +1458,35 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
.type = TYPE_DEPTH_TEST,
});
P_IMMD(p, NV9097, SET_Z_CLEAR_VALUE,
fui(pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth));
float depth = 0.0f;
switch (pRenderingInfo->pDepthAttachment->loadOp) {
case VK_ATTACHMENT_LOAD_OP_CLEAR:
depth =
pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
FALLTHROUGH;
case VK_ATTACHMENT_LOAD_OP_DONT_CARE:
P_IMMD(p, NV9097, SET_Z_CLEAR_VALUE, fui(depth));
P_IMMD(p, NV9097, CLEAR_ZCULL_REGION, {
.z_enable = true,
.stencil_enable = false,
.use_clear_rect = false,
.use_rt_array_index = false,
.make_conservative = true,
});
P_IMMD(p, NV9097, CLEAR_ZCULL_REGION, {
.z_enable = true,
.stencil_enable = false,
.use_clear_rect = false,
.use_rt_array_index = false,
.make_conservative = true,
});
break;
case VK_ATTACHMENT_LOAD_OP_LOAD:
assert(zcull_plane);
P_IMMD(p, NV9097, LOAD_ZCULL, 0);
break;
default:
assert(!"Unhandled loadOp");
break;
}
uint32_t end_count = nv_push_dw_count(p);
assert(end_count - start_count == zcull_count);
assert(end_count - start_count <= zcull_count);
} else {
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0x3f);
}
@ -1617,6 +1659,13 @@ nvk_CmdEndRendering2KHR(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
struct nvk_rendering_state *render = &cmd->state.gfx.render;
struct nvk_zcull_plane* zcull_plane = nvk_get_zcull_plane(render);
if (zcull_plane &&
render->depth_att.store_op == VK_ATTACHMENT_STORE_OP_STORE) {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
P_IMMD(p, NV9097, STORE_ZCULL, 0);
}
if (!(render->flags & VK_RENDERING_SUSPENDING_BIT)) {
for (uint32_t i = 0; i < render->color_att_count; i++) {
struct nvk_image_view *iview = render->color_att[i].iview;

View file

@ -1026,6 +1026,15 @@ nvk_image_init(struct nvk_device *dev,
}
}
if ((image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
image->vk.image_type != VK_IMAGE_TYPE_3D &&
image->vk.tiling == VK_IMAGE_TILING_OPTIMAL &&
pdev->info.has_zcull_info) {
image->zcull.nil = nil_zcull_new(&pdev->info.zcull_info, 0, 0,
image->vk.extent.width,
image->vk.extent.height);
}
const enum pipe_format plane0_format = image->planes[0].nil.format.p_format;
if (plane0_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
struct nil_image_init_info stencil_nil_info = {
@ -1243,6 +1252,17 @@ nvk_image_plane_add_req(struct nvk_device *dev,
*size_B += plane_size_B;
}
static void
nvk_image_zcull_add_req(struct nvk_device *dev,
const struct nvk_zcull_plane *zcull,
uint64_t *size_B, uint32_t *align_B)
{
assert(util_is_power_of_two_or_zero64(*align_B));
*align_B = MAX2(*align_B, zcull->nil.align_B);
*size_B = align64(*size_B, zcull->nil.align_B);
*size_B += zcull->nil.size_B;
}
static void
nvk_get_image_memory_requirements(struct nvk_device *dev,
struct nvk_image *image,
@ -1277,6 +1297,10 @@ nvk_get_image_memory_requirements(struct nvk_device *dev,
}
}
if (image->zcull.nil.size_B > 0) {
nvk_image_zcull_add_req(dev, &image->zcull, &size_B, &align_B);
}
if (image->stencil_copy_temp.nil.size_B > 0) {
nvk_image_plane_add_req(dev, image, &image->stencil_copy_temp,
&size_B, &align_B);
@ -1576,6 +1600,17 @@ nvk_image_plane_bind(struct nvk_device *dev,
return VK_SUCCESS;
}
static VkResult
nvk_image_zcull_bind(struct nvk_zcull_plane *zcull,
struct nvk_device_memory *mem,
uint64_t *offset_B)
{
*offset_B = align64(*offset_B, zcull->nil.align_B);
zcull->addr = mem->mem->va->addr + *offset_B;
*offset_B += zcull->nil.size_B;
return VK_SUCCESS;
}
static VkResult
nvk_bind_image_memory(struct nvk_device *dev,
const VkBindImageMemoryInfo *info)
@ -1626,6 +1661,26 @@ nvk_bind_image_memory(struct nvk_device *dev,
}
}
if (image->zcull.nil.size_B > 0) {
result = nvk_image_zcull_bind(&image->zcull, mem, &offset_B);
if (result != VK_SUCCESS)
return result;
/*
* zcull hardware kills the context if we try to LOAD_ZCULL on garbage
* data. Work around this by always initializing the zcull data to zero.
*/
result = nvk_upload_queue_fill(dev, &dev->upload,
image->zcull.addr,
0, image->zcull.nil.size_B);
if (result != VK_SUCCESS)
return result;
result = nvk_upload_queue_sync(dev, &dev->upload);
if (result != VK_SUCCESS)
return result;
}
if (image->stencil_copy_temp.nil.size_B > 0) {
result = nvk_image_plane_bind(dev, image, &image->stencil_copy_temp,
mem, &offset_B);

View file

@ -70,6 +70,11 @@ struct nvk_image_plane {
uint64_t host_offset;
};
struct nvk_zcull_plane {
struct nil_zcull nil;
uint64_t addr;
};
struct nvk_image {
struct vk_image vk;
@ -88,6 +93,8 @@ struct nvk_image {
uint8_t plane_count;
struct nvk_image_plane planes[NVK_MAX_IMAGE_PLANES];
struct nvk_zcull_plane zcull;
/* In order to support D32_SFLOAT_S8_UINT, a temp area is
* needed. The stencil plane can't be a copied using the DMA
* engine in a single pass since it would need 8 components support.