mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
nvk: Enable zcull for VK_ATTACHMENT_LOAD_OP_LOAD
Reviewed-by: Mary Guillemard <mary@mary.zone> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33861>
This commit is contained in:
parent
5e04c965de
commit
c24963d8da
3 changed files with 133 additions and 22 deletions
|
|
@ -14,6 +14,7 @@
|
||||||
#include "nvk_shader.h"
|
#include "nvk_shader.h"
|
||||||
|
|
||||||
#include "util/bitpack_helpers.h"
|
#include "util/bitpack_helpers.h"
|
||||||
|
#include "util/compiler.h"
|
||||||
#include "vk_format.h"
|
#include "vk_format.h"
|
||||||
#include "vk_render_pass.h"
|
#include "vk_render_pass.h"
|
||||||
#include "vk_standard_sample_locations.h"
|
#include "vk_standard_sample_locations.h"
|
||||||
|
|
@ -1019,6 +1020,16 @@ get_depth_stencil_plane_params(struct nvk_image_view *iview,
|
||||||
*image_out = nil_image;
|
*image_out = nil_image;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct nvk_zcull_plane*
|
||||||
|
nvk_get_zcull_plane(struct nvk_rendering_state *render) {
|
||||||
|
if (render->depth_att.iview) {
|
||||||
|
struct nvk_image *img = (struct nvk_image*) render->depth_att.iview->vk.image;
|
||||||
|
if (img->zcull.nil.size_B > 0) {
|
||||||
|
return &img->zcull;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
nvk_vk_format_to_zcull_format(VkFormat format) {
|
nvk_vk_format_to_zcull_format(VkFormat format) {
|
||||||
|
|
@ -1355,20 +1366,34 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: zcull for depth-stencil */
|
/* TODO: zcull for depth-stencil */
|
||||||
|
struct nvk_zcull_plane *zcull_plane = nvk_get_zcull_plane(render);
|
||||||
bool use_zcull = pdev->info.has_zcull_info &&
|
bool use_zcull = pdev->info.has_zcull_info &&
|
||||||
pRenderingInfo->pDepthAttachment != NULL &&
|
pRenderingInfo->pDepthAttachment != NULL &&
|
||||||
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
|
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
|
||||||
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR;
|
pRenderingInfo->pDepthAttachment->loadOp != VK_ATTACHMENT_LOAD_OP_NONE &&
|
||||||
|
(zcull_plane ||
|
||||||
|
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR);
|
||||||
|
|
||||||
if (use_zcull) {
|
if (use_zcull) {
|
||||||
uint32_t start_count = nv_push_dw_count(p);
|
uint32_t start_count = nv_push_dw_count(p);
|
||||||
struct nil_zcull zcull_info = nil_zcull_new(
|
struct nil_zcull zcull_info;
|
||||||
&pdev->info.zcull_info,
|
uint64_t addr_begin, addr_end;
|
||||||
render->area.offset.x,
|
|
||||||
render->area.offset.y,
|
if (zcull_plane) {
|
||||||
render->area.extent.width,
|
zcull_info = zcull_plane->nil;
|
||||||
render->area.extent.height
|
addr_begin = zcull_plane->addr;
|
||||||
);
|
addr_end = zcull_plane->addr + zcull_plane->nil.size_B;
|
||||||
|
} else {
|
||||||
|
zcull_info = nil_zcull_new(
|
||||||
|
&pdev->info.zcull_info,
|
||||||
|
render->area.offset.x,
|
||||||
|
render->area.offset.y,
|
||||||
|
render->area.extent.width,
|
||||||
|
render->area.extent.height
|
||||||
|
);
|
||||||
|
addr_begin = 0;
|
||||||
|
addr_end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0);
|
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0);
|
||||||
|
|
||||||
|
|
@ -1380,10 +1405,10 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||||||
P_NV9097_SET_ZCULL_REGION_ALIQUOTS(p, zcull_info.aliquot_count);
|
P_NV9097_SET_ZCULL_REGION_ALIQUOTS(p, zcull_info.aliquot_count);
|
||||||
|
|
||||||
P_MTHD(p, NV9097, SET_ZCULL_STORAGE_A);
|
P_MTHD(p, NV9097, SET_ZCULL_STORAGE_A);
|
||||||
P_NV9097_SET_ZCULL_STORAGE_A(p, 0);
|
P_NV9097_SET_ZCULL_STORAGE_A(p, addr_begin >> 32);
|
||||||
P_NV9097_SET_ZCULL_STORAGE_B(p, 0);
|
P_NV9097_SET_ZCULL_STORAGE_B(p, addr_begin & UINT32_MAX);
|
||||||
P_NV9097_SET_ZCULL_STORAGE_C(p, 0);
|
P_NV9097_SET_ZCULL_STORAGE_C(p, addr_end >> 32);
|
||||||
P_NV9097_SET_ZCULL_STORAGE_D(p, 0);
|
P_NV9097_SET_ZCULL_STORAGE_D(p, addr_end & UINT32_MAX);
|
||||||
|
|
||||||
P_IMMD(p, NV9097, SET_ZCULL_REGION_FORMAT, TYPE_Z_4X4);
|
P_IMMD(p, NV9097, SET_ZCULL_REGION_FORMAT, TYPE_Z_4X4);
|
||||||
|
|
||||||
|
|
@ -1433,18 +1458,35 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||||||
.type = TYPE_DEPTH_TEST,
|
.type = TYPE_DEPTH_TEST,
|
||||||
});
|
});
|
||||||
|
|
||||||
P_IMMD(p, NV9097, SET_Z_CLEAR_VALUE,
|
float depth = 0.0f;
|
||||||
fui(pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth));
|
switch (pRenderingInfo->pDepthAttachment->loadOp) {
|
||||||
|
case VK_ATTACHMENT_LOAD_OP_CLEAR:
|
||||||
|
depth =
|
||||||
|
pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
|
||||||
|
FALLTHROUGH;
|
||||||
|
case VK_ATTACHMENT_LOAD_OP_DONT_CARE:
|
||||||
|
P_IMMD(p, NV9097, SET_Z_CLEAR_VALUE, fui(depth));
|
||||||
|
|
||||||
P_IMMD(p, NV9097, CLEAR_ZCULL_REGION, {
|
P_IMMD(p, NV9097, CLEAR_ZCULL_REGION, {
|
||||||
.z_enable = true,
|
.z_enable = true,
|
||||||
.stencil_enable = false,
|
.stencil_enable = false,
|
||||||
.use_clear_rect = false,
|
.use_clear_rect = false,
|
||||||
.use_rt_array_index = false,
|
.use_rt_array_index = false,
|
||||||
.make_conservative = true,
|
.make_conservative = true,
|
||||||
});
|
});
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_ATTACHMENT_LOAD_OP_LOAD:
|
||||||
|
assert(zcull_plane);
|
||||||
|
P_IMMD(p, NV9097, LOAD_ZCULL, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(!"Unhandled loadOp");
|
||||||
|
break;
|
||||||
|
}
|
||||||
uint32_t end_count = nv_push_dw_count(p);
|
uint32_t end_count = nv_push_dw_count(p);
|
||||||
assert(end_count - start_count == zcull_count);
|
assert(end_count - start_count <= zcull_count);
|
||||||
} else {
|
} else {
|
||||||
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0x3f);
|
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0x3f);
|
||||||
}
|
}
|
||||||
|
|
@ -1617,6 +1659,13 @@ nvk_CmdEndRendering2KHR(VkCommandBuffer commandBuffer,
|
||||||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||||
struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||||||
|
|
||||||
|
struct nvk_zcull_plane* zcull_plane = nvk_get_zcull_plane(render);
|
||||||
|
if (zcull_plane &&
|
||||||
|
render->depth_att.store_op == VK_ATTACHMENT_STORE_OP_STORE) {
|
||||||
|
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||||||
|
P_IMMD(p, NV9097, STORE_ZCULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (!(render->flags & VK_RENDERING_SUSPENDING_BIT)) {
|
if (!(render->flags & VK_RENDERING_SUSPENDING_BIT)) {
|
||||||
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
||||||
struct nvk_image_view *iview = render->color_att[i].iview;
|
struct nvk_image_view *iview = render->color_att[i].iview;
|
||||||
|
|
|
||||||
|
|
@ -1026,6 +1026,15 @@ nvk_image_init(struct nvk_device *dev,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
|
||||||
|
image->vk.image_type != VK_IMAGE_TYPE_3D &&
|
||||||
|
image->vk.tiling == VK_IMAGE_TILING_OPTIMAL &&
|
||||||
|
pdev->info.has_zcull_info) {
|
||||||
|
image->zcull.nil = nil_zcull_new(&pdev->info.zcull_info, 0, 0,
|
||||||
|
image->vk.extent.width,
|
||||||
|
image->vk.extent.height);
|
||||||
|
}
|
||||||
|
|
||||||
const enum pipe_format plane0_format = image->planes[0].nil.format.p_format;
|
const enum pipe_format plane0_format = image->planes[0].nil.format.p_format;
|
||||||
if (plane0_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
|
if (plane0_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
|
||||||
struct nil_image_init_info stencil_nil_info = {
|
struct nil_image_init_info stencil_nil_info = {
|
||||||
|
|
@ -1243,6 +1252,17 @@ nvk_image_plane_add_req(struct nvk_device *dev,
|
||||||
*size_B += plane_size_B;
|
*size_B += plane_size_B;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
nvk_image_zcull_add_req(struct nvk_device *dev,
|
||||||
|
const struct nvk_zcull_plane *zcull,
|
||||||
|
uint64_t *size_B, uint32_t *align_B)
|
||||||
|
{
|
||||||
|
assert(util_is_power_of_two_or_zero64(*align_B));
|
||||||
|
*align_B = MAX2(*align_B, zcull->nil.align_B);
|
||||||
|
*size_B = align64(*size_B, zcull->nil.align_B);
|
||||||
|
*size_B += zcull->nil.size_B;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
nvk_get_image_memory_requirements(struct nvk_device *dev,
|
nvk_get_image_memory_requirements(struct nvk_device *dev,
|
||||||
struct nvk_image *image,
|
struct nvk_image *image,
|
||||||
|
|
@ -1277,6 +1297,10 @@ nvk_get_image_memory_requirements(struct nvk_device *dev,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (image->zcull.nil.size_B > 0) {
|
||||||
|
nvk_image_zcull_add_req(dev, &image->zcull, &size_B, &align_B);
|
||||||
|
}
|
||||||
|
|
||||||
if (image->stencil_copy_temp.nil.size_B > 0) {
|
if (image->stencil_copy_temp.nil.size_B > 0) {
|
||||||
nvk_image_plane_add_req(dev, image, &image->stencil_copy_temp,
|
nvk_image_plane_add_req(dev, image, &image->stencil_copy_temp,
|
||||||
&size_B, &align_B);
|
&size_B, &align_B);
|
||||||
|
|
@ -1576,6 +1600,17 @@ nvk_image_plane_bind(struct nvk_device *dev,
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
nvk_image_zcull_bind(struct nvk_zcull_plane *zcull,
|
||||||
|
struct nvk_device_memory *mem,
|
||||||
|
uint64_t *offset_B)
|
||||||
|
{
|
||||||
|
*offset_B = align64(*offset_B, zcull->nil.align_B);
|
||||||
|
zcull->addr = mem->mem->va->addr + *offset_B;
|
||||||
|
*offset_B += zcull->nil.size_B;
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
nvk_bind_image_memory(struct nvk_device *dev,
|
nvk_bind_image_memory(struct nvk_device *dev,
|
||||||
const VkBindImageMemoryInfo *info)
|
const VkBindImageMemoryInfo *info)
|
||||||
|
|
@ -1626,6 +1661,26 @@ nvk_bind_image_memory(struct nvk_device *dev,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (image->zcull.nil.size_B > 0) {
|
||||||
|
result = nvk_image_zcull_bind(&image->zcull, mem, &offset_B);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* zcull hardware kills the context if we try to LOAD_ZCULL on garbage
|
||||||
|
* data. Work around this by always initializing the zcull data to zero.
|
||||||
|
*/
|
||||||
|
result = nvk_upload_queue_fill(dev, &dev->upload,
|
||||||
|
image->zcull.addr,
|
||||||
|
0, image->zcull.nil.size_B);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = nvk_upload_queue_sync(dev, &dev->upload);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
if (image->stencil_copy_temp.nil.size_B > 0) {
|
if (image->stencil_copy_temp.nil.size_B > 0) {
|
||||||
result = nvk_image_plane_bind(dev, image, &image->stencil_copy_temp,
|
result = nvk_image_plane_bind(dev, image, &image->stencil_copy_temp,
|
||||||
mem, &offset_B);
|
mem, &offset_B);
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,11 @@ struct nvk_image_plane {
|
||||||
uint64_t host_offset;
|
uint64_t host_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct nvk_zcull_plane {
|
||||||
|
struct nil_zcull nil;
|
||||||
|
uint64_t addr;
|
||||||
|
};
|
||||||
|
|
||||||
struct nvk_image {
|
struct nvk_image {
|
||||||
struct vk_image vk;
|
struct vk_image vk;
|
||||||
|
|
||||||
|
|
@ -88,6 +93,8 @@ struct nvk_image {
|
||||||
uint8_t plane_count;
|
uint8_t plane_count;
|
||||||
struct nvk_image_plane planes[NVK_MAX_IMAGE_PLANES];
|
struct nvk_image_plane planes[NVK_MAX_IMAGE_PLANES];
|
||||||
|
|
||||||
|
struct nvk_zcull_plane zcull;
|
||||||
|
|
||||||
/* In order to support D32_SFLOAT_S8_UINT, a temp area is
|
/* In order to support D32_SFLOAT_S8_UINT, a temp area is
|
||||||
* needed. The stencil plane can't be a copied using the DMA
|
* needed. The stencil plane can't be a copied using the DMA
|
||||||
* engine in a single pass since it would need 8 components support.
|
* engine in a single pass since it would need 8 components support.
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue