freedreno,tu: Unify LRZ layout calculations

Now with nolrzfc the memory for fast-clear will be allocated but
fast-clear itself will not be used.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32868>
This commit is contained in:
Danylo Piliaiev 2025-01-03 16:27:48 +01:00 committed by Marge Bot
parent 5a4b28e77c
commit 33083d580a
15 changed files with 203 additions and 176 deletions

View file

@ -0,0 +1,35 @@
/*
* Copyright © 2025 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#include "freedreno_lrz_layout.h"
#include "util/compiler.h"
void
fdl5_lrz_layout_init(struct fdl_lrz_layout *lrz_layout, uint32_t width,
uint32_t height, uint32_t nr_samples)
{
uint32_t lrz_pitch = align(DIV_ROUND_UP(width, 8), 64);
uint32_t lrz_height = DIV_ROUND_UP(height, 8);
/* LRZ buffer is super-sampled: */
switch (nr_samples) {
case 4:
lrz_pitch *= 2;
FALLTHROUGH;
case 2:
lrz_height *= 2;
}
uint32_t lrz_size = lrz_pitch * lrz_height * 2;
lrz_size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
lrz_layout->lrz_offset = 0;
lrz_layout->lrz_pitch = lrz_pitch;
lrz_layout->lrz_height = lrz_height;
lrz_layout->lrz_layer_size = 0;
lrz_layout->lrz_fc_offset = 0;
lrz_layout->lrz_fc_size = 0;
lrz_layout->lrz_total_size = lrz_size;
}

View file

@ -0,0 +1,99 @@
/*
* Copyright © 2025 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#ifndef FREEDRENO_LRZ_LAYOUT_H_
#define FREEDRENO_LRZ_LAYOUT_H_
#include <stdint.h>
#include "freedreno_layout.h"
BEGINC;
struct fdl_lrz_layout {
uint32_t lrz_offset;
uint32_t lrz_pitch;
uint32_t lrz_height;
uint32_t lrz_layer_size;
uint32_t lrz_fc_offset;
uint32_t lrz_fc_size;
uint32_t lrz_total_size;
};
void
fdl5_lrz_layout_init(struct fdl_lrz_layout *lrz_layout, uint32_t width,
uint32_t height, uint32_t nr_samples);
ENDC;
#ifdef __cplusplus
#include "common/freedreno_lrz.h"
template <chip CHIP>
static void
fdl6_lrz_layout_init(struct fdl_lrz_layout *lrz_layout,
struct fdl_layout *layout,
const struct fd_dev_info *dev_info, uint32_t lrz_offset,
uint32_t array_layers)
{
unsigned width = layout->width0;
unsigned height = layout->height0;
/* LRZ buffer is super-sampled */
switch (layout->nr_samples) {
case 8:
height *= 2;
FALLTHROUGH;
case 4:
width *= 2;
FALLTHROUGH;
case 2:
height *= 2;
break;
default:
break;
}
unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
lrz_layout->lrz_offset = lrz_offset;
lrz_layout->lrz_height = lrz_height;
lrz_layout->lrz_pitch = lrz_pitch;
lrz_layout->lrz_layer_size = lrz_pitch * lrz_height * sizeof(uint16_t);
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
/* Fast-clear buffer is 1bit/block */
lrz_layout->lrz_fc_size =
DIV_ROUND_UP(nblocksx * nblocksy, 8) * array_layers;
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes
* on A7XX (HW limitation) */
if (!dev_info->a6xx.enable_lrz_fast_clear ||
lrz_layout->lrz_fc_size > fd_lrzfc_layout<CHIP>::FC_SIZE) {
lrz_layout->lrz_fc_size = 0;
}
uint32_t lrz_size = lrz_layout->lrz_layer_size * array_layers;
if (dev_info->a6xx.enable_lrz_fast_clear ||
dev_info->a6xx.has_lrz_dir_tracking) {
lrz_layout->lrz_fc_offset =
lrz_layout->lrz_offset + lrz_size;
lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
}
lrz_layout->lrz_total_size = lrz_size;
uint32_t lrz_clear_height = lrz_layout->lrz_height * array_layers;
if (((lrz_clear_height - 1) >> 14) > 0) {
/* For simplicity bail out if LRZ cannot be cleared in one go. */
lrz_layout->lrz_height = 0;
lrz_layout->lrz_total_size = 0;
}
}
#endif
#endif

View file

@ -10,6 +10,7 @@ libfreedreno_layout = static_library(
'fd6_tiled_memcpy.cc',
'fd6_view.c',
'freedreno_layout.c',
'freedreno_lrz_layout.c',
freedreno_xml_header_files,
],
include_directories : [inc_freedreno, inc_include, inc_src],

View file

@ -2094,11 +2094,11 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
VK_SAMPLE_COUNT_1_BIT);
ops->clear_value(cmd, cs, PIPE_FORMAT_Z16_UNORM, value);
ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM,
image->iova + image->lrz_offset,
image->lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM);
uint32_t lrz_height = image->lrz_height * image->vk.array_layers;
image->iova + image->lrz_layout.lrz_offset,
image->lrz_layout.lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM);
uint32_t lrz_height = image->lrz_layout.lrz_height * image->vk.array_layers;
ops->coords(cmd, cs, (VkOffset2D) {}, blt_no_coord,
(VkExtent2D) { image->lrz_pitch, lrz_height });
(VkExtent2D) { image->lrz_layout.lrz_pitch, lrz_height });
ops->run(cmd, cs);
ops->teardown(cmd, cs);
@ -2122,7 +2122,7 @@ tu6_dirty_lrz_fc(struct tu_cmd_buffer *cmd,
clear.color.uint32[0] = 0xffffffff;
using LRZFC = fd_lrzfc_layout<CHIP>;
uint64_t lrz_fc_iova = image->iova + image->lrz_fc_offset;
uint64_t lrz_fc_iova = image->iova + image->lrz_layout.lrz_fc_offset;
ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT,
VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false,
VK_SAMPLE_COUNT_1_BIT);
@ -2361,7 +2361,7 @@ tu_CmdBlitImage2(VkCommandBuffer commandBuffer,
pBlitImageInfo->filter);
}
if (dst_image->lrz_height) {
if (dst_image->lrz_layout.lrz_total_size) {
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
}
}
@ -2495,7 +2495,7 @@ tu_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
tu_copy_buffer_to_image<CHIP>(cmd, src_buffer, dst_image,
pCopyBufferToImageInfo->pRegions + i);
if (dst_image->lrz_height) {
if (dst_image->lrz_layout.lrz_total_size) {
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
}
}
@ -2584,7 +2584,7 @@ tu_CopyMemoryToImageEXT(VkDevice _device,
info->flags & VK_HOST_IMAGE_COPY_MEMCPY_EXT);
}
if (dst_image->lrz_height) {
if (dst_image->lrz_layout.lrz_total_size) {
TU_CALLX(device, tu_disable_lrz_cpu)(device, dst_image);
}
@ -3028,7 +3028,7 @@ tu_CmdCopyImage2(VkCommandBuffer commandBuffer,
pCopyImageInfo->pRegions + i);
}
if (dst_image->lrz_height) {
if (dst_image->lrz_layout.lrz_total_size) {
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
}
}
@ -3207,7 +3207,7 @@ tu_CopyImageToImageEXT(VkDevice _device,
copy_memcpy);
}
if (dst_image->lrz_height) {
if (dst_image->lrz_layout.lrz_total_size) {
TU_CALLX(device, tu_disable_lrz_cpu)(device, dst_image);
}

View file

@ -2054,7 +2054,8 @@ tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem)
if (cmd->state.lrz.image_view) {
struct tu_image *image = cmd->state.lrz.image_view->image;
addr.bo = image->bo;
addr.offset = (image->iova - image->bo->iova) + image->lrz_fc_offset +
addr.offset = (image->iova - image->bo->iova) +
image->lrz_layout.lrz_fc_offset +
offsetof(fd_lrzfc_layout<CHIP>, dir_track);
}

View file

@ -573,63 +573,14 @@ tu_image_update_layout(struct tu_device *device, struct tu_image *image,
const struct util_format_description *desc = util_format_description(image->layout[0].format);
if (util_format_has_depth(desc) && device->use_lrz) {
/* Depth plane is the first one */
struct fdl_layout *layout = &image->layout[0];
unsigned width = layout->width0;
unsigned height = layout->height0;
fdl6_lrz_layout_init<CHIP>(&image->lrz_layout, &image->layout[0],
device->physical_device->info,
image->total_size, image->vk.array_layers);
/* LRZ buffer is super-sampled */
switch (layout->nr_samples) {
case 8:
height *= 2;
FALLTHROUGH;
case 4:
width *= 2;
FALLTHROUGH;
case 2:
height *= 2;
break;
default:
break;
}
unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
image->lrz_height = lrz_height;
image->lrz_pitch = lrz_pitch;
image->lrz_offset = image->total_size;
image->lrz_layer_size = lrz_pitch * lrz_height * sizeof(uint16_t);
uint32_t lrz_size = image->lrz_layer_size * image->vk.array_layers;
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
/* Fast-clear buffer is 1bit/block */
unsigned lrz_fc_size =
DIV_ROUND_UP(nblocksx * nblocksy, 8) * image->vk.array_layers;
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes on A7XX (HW limitation) */
image->has_lrz_fc =
device->physical_device->info->a6xx.enable_lrz_fast_clear &&
lrz_fc_size <= fd_lrzfc_layout<CHIP>::FC_SIZE &&
!TU_DEBUG(NOLRZFC);
if (image->has_lrz_fc || device->physical_device->info->a6xx.has_lrz_dir_tracking) {
image->lrz_fc_offset = image->total_size + lrz_size;
lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
}
uint32_t lrz_clear_height = lrz_height * image->vk.array_layers;
if (((lrz_clear_height - 1) >> 14) > 0) {
/* For simplicity bail out if LRZ cannot be cleared in one go. */
image->lrz_height = 0;
lrz_size = 0;
}
image->total_size += lrz_size;
image->total_size += image->lrz_layout.lrz_total_size;
} else {
image->lrz_height = 0;
image->lrz_layout.lrz_height = 0;
image->lrz_layout.lrz_total_size = 0;
}
return VK_SUCCESS;

View file

@ -11,6 +11,7 @@
#define TU_IMAGE_H
#include "tu_common.h"
#include "fdl/freedreno_lrz_layout.h"
#define TU_MAX_PLANE_COUNT 3
@ -41,12 +42,7 @@ struct tu_image
/* For fragment density map */
void *map;
uint32_t lrz_height;
uint32_t lrz_pitch;
uint32_t lrz_offset;
uint32_t lrz_layer_size;
uint32_t lrz_fc_offset;
bool has_lrz_fc;
struct fdl_lrz_layout lrz_layout;
bool ubwc_enabled;
bool force_linear_tile;

View file

@ -74,15 +74,17 @@ tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
return;
}
uint64_t lrz_iova = depth_image->iova + depth_image->lrz_offset;
uint64_t lrz_fc_iova = depth_image->iova + depth_image->lrz_fc_offset;
if (!depth_image->lrz_fc_offset)
uint64_t lrz_iova = depth_image->iova + depth_image->lrz_layout.lrz_offset;
uint64_t lrz_fc_iova =
depth_image->iova + depth_image->lrz_layout.lrz_fc_offset;
if (!depth_image->lrz_layout.lrz_fc_offset)
lrz_fc_iova = 0;
tu_cs_emit_regs(
cs, A6XX_GRAS_LRZ_BUFFER_BASE(.qword = lrz_iova),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_pitch,
.array_pitch = depth_image->lrz_layer_size),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_layout.lrz_pitch,
.array_pitch =
depth_image->lrz_layout.lrz_layer_size),
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(.qword = lrz_fc_iova));
if (CHIP >= A7XX) {
@ -153,7 +155,7 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
const struct tu_render_pass_attachment *att,
const struct tu_image_view *view)
{
if (!view->image->lrz_height) {
if (!view->image->lrz_layout.lrz_total_size) {
assert(!cmd->device->use_lrz || !vk_format_has_depth(att->format));
return;
}
@ -182,7 +184,8 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
/* Be optimistic and unconditionally enable fast-clear in
* secondary cmdbufs and when reusing previous LRZ state.
*/
cmd->state.lrz.fast_clear = view->image->has_lrz_fc;
cmd->state.lrz.fast_clear =
view->image->lrz_layout.lrz_fc_size > 0 && !TU_DEBUG(NOLRZFC);
cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking;
cmd->state.lrz.reuse_previous_state = !clears_depth;
@ -246,7 +249,7 @@ tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd)
uint32_t a;
for (a = 0; a < cmd->state.pass->attachment_count; a++) {
if (cmd->state.attachments[a]->image->lrz_height)
if (cmd->state.attachments[a]->image->lrz_layout.lrz_total_size)
break;
}
@ -274,7 +277,7 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
int lrz_img_count = 0;
for (unsigned i = 0; i < pass->attachment_count; i++) {
if (cmd->state.attachments[i]->image->lrz_height)
if (cmd->state.attachments[i]->image->lrz_layout.lrz_total_size)
lrz_img_count++;
}
@ -387,7 +390,7 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
* TODO: we could avoid this if we don't store depth and don't
* expect secondary cmdbufs.
*/
if (lrz->image_view->image->has_lrz_fc) {
if (lrz->image_view->image->lrz_layout.lrz_fc_size > 0) {
tu6_dirty_lrz_fc<CHIP>(cmd, cs, lrz->image_view->image);
}
}
@ -524,7 +527,7 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
if (!cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking)
return;
if (!image->lrz_height)
if (!image->lrz_layout.lrz_total_size)
return;
tu6_emit_lrz_buffer<CHIP>(cs, image);
@ -540,19 +543,20 @@ tu_disable_lrz_cpu(struct tu_device *device, struct tu_image *image)
if (!device->physical_device->info->a6xx.has_lrz_dir_tracking)
return;
if (!image->lrz_height)
if (!image->lrz_layout.lrz_total_size)
return;
const unsigned lrz_dir_offset = offsetof(fd_lrzfc_layout<CHIP>, dir_track);
uint8_t *lrz_dir_tracking =
(uint8_t *)image->map + image->lrz_fc_offset + lrz_dir_offset;
(uint8_t *)image->map + image->lrz_layout.lrz_fc_offset + lrz_dir_offset;
*lrz_dir_tracking = FD_LRZ_GPU_DIR_DISABLED;
if (image->bo->cached_non_coherent) {
tu_bo_sync_cache(device, image->bo,
image->bo_offset + image->lrz_offset + lrz_dir_offset,
1, TU_MEM_SYNC_CACHE_TO_GPU);
tu_bo_sync_cache(
device, image->bo,
image->bo_offset + image->lrz_layout.lrz_offset + lrz_dir_offset, 1,
TU_MEM_SYNC_CACHE_TO_GPU);
}
}
TU_GENX(tu_disable_lrz_cpu);
@ -566,7 +570,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
if (!rangeCount || !image->lrz_height ||
if (!rangeCount || !image->lrz_layout.lrz_total_size ||
!cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking)
return;
@ -585,8 +589,9 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
if (!range)
return;
bool fast_clear = image->has_lrz_fc &&
tu_lrzfc_depth_supported<CHIP>(pDepthStencil->depth);
bool fast_clear = image->lrz_layout.lrz_fc_size &&
tu_lrzfc_depth_supported<CHIP>(pDepthStencil->depth) &&
!TU_DEBUG(NOLRZFC);
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, image);

View file

@ -181,7 +181,7 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_layout.lrz_pitch * 2));
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
@ -201,8 +201,8 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_layout.lrz_pitch) |
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_layout.lrz_height));
OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
@ -210,8 +210,8 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_layout.lrz_pitch - 1) |
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_layout.lrz_height - 1));
fd5_emit_blit(batch, ring);
}

View file

@ -155,7 +155,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
if (rsc->lrz) {
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_layout.lrz_pitch));
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
OUT_RELOC(ring, rsc->lrz, 0, 0, 0);

View file

@ -12,26 +12,10 @@ static void
setup_lrz(struct fd_resource *rsc)
{
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
/* LRZ buffer is super-sampled: */
switch (rsc->b.b.nr_samples) {
case 4:
lrz_pitch *= 2;
FALLTHROUGH;
case 2:
lrz_height *= 2;
}
unsigned size = lrz_pitch * lrz_height * 2;
size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
rsc->lrz_height = lrz_height;
rsc->lrz_width = lrz_pitch;
rsc->lrz_pitch = lrz_pitch;
rsc->lrz = fd_bo_new(screen->dev, size, FD_BO_NOMAP, "lrz");
fdl5_lrz_layout_init(&rsc->lrz_layout, rsc->b.b.width0, rsc->b.b.height0,
rsc->b.b.nr_samples);
rsc->lrz = fd_bo_new(screen->dev, rsc->lrz_layout.lrz_total_size,
FD_BO_NOMAP, "lrz");
}
uint32_t

View file

@ -807,8 +807,8 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_layout.lrz_pitch - 1) |
A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_layout.lrz_height - 1));
union pipe_color_union clear_color = { .f = {depth} };
@ -824,7 +824,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
A6XX_RB_2D_DST(
.bo = lrz,
),
A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2),
A6XX_RB_2D_DST_PITCH(zsbuf->lrz_layout.lrz_pitch * 2),
);
/*

View file

@ -272,10 +272,10 @@ emit_lrz(struct fd_batch *batch, struct fd_batch_subpass *subpass)
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
OUT_REG(ring,
A6XX_GRAS_LRZ_BUFFER_BASE(.bo = subpass->lrz),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_pitch),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_layout.lrz_pitch),
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(
.bo = zsbuf->lrz_fc_offset ? subpass->lrz : NULL,
.bo_offset = zsbuf->lrz_fc_offset
.bo = zsbuf->lrz_layout.lrz_fc_size ? subpass->lrz : NULL,
.bo_offset = zsbuf->lrz_layout.lrz_fc_offset
),
);
fd_ringbuffer_attach_bo(ring, subpass->lrz);

View file

@ -236,50 +236,12 @@ static void
setup_lrz(struct fd_resource *rsc)
{
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
struct fdl_layout *layout = &rsc->layout;
unsigned width = layout->width0;
unsigned height = layout->height0;
uint32_t nr_layers = 1;
fdl6_lrz_layout_init<CHIP>(&rsc->lrz_layout, &rsc->layout, screen->info, 0,
nr_layers);
/* LRZ buffer is super-sampled: */
switch (layout->nr_samples) {
case 4:
width *= 2;
FALLTHROUGH;
case 2:
height *= 2;
}
unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
rsc->lrz_height = lrz_height;
rsc->lrz_width = lrz_pitch;
rsc->lrz_pitch = lrz_pitch;
unsigned lrz_size = lrz_pitch * lrz_height * sizeof(uint16_t);
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
/* Fast-clear buffer is 1bit/block */
unsigned lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes
* on A7XX (HW limitation)
*/
bool has_lrz_fc = screen->info->a6xx.enable_lrz_fast_clear &&
lrz_fc_size <= fd_lrzfc_layout<CHIP>::FC_SIZE &&
!FD_DBG(NOLRZFC);
/* Allocate a LRZ fast-clear buffer even if we aren't using FC, if the
* hw is re-using this buffer for direction tracking
*/
if (has_lrz_fc || screen->info->a6xx.has_lrz_dir_tracking) {
rsc->lrz_fc_offset = lrz_size;
lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
}
rsc->lrz = fd_bo_new(screen->dev, lrz_size, FD_BO_NOMAP, "lrz");
rsc->lrz = fd_bo_new(screen->dev, rsc->lrz_layout.lrz_total_size,
FD_BO_NOMAP, "lrz");
}
template <chip CHIP>

View file

@ -16,6 +16,7 @@
#include "util/u_transfer_helper.h"
#include "freedreno/fdl/freedreno_layout.h"
#include "freedreno/fdl/freedreno_lrz_layout.h"
#include "freedreno_batch.h"
#include "freedreno_util.h"
@ -139,18 +140,10 @@ struct fd_resource {
*/
bool needs_ubwc_clear : 1;
/*
* LRZ
*
* TODO lrz width/height/pitch should probably also move to
* fdl_layout
*/
/* LRZ */
struct fdl_lrz_layout lrz_layout;
bool lrz_valid : 1;
enum fd_lrz_direction lrz_direction : 2;
uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch?
uint16_t lrz_height;
uint16_t lrz_pitch;
uint32_t lrz_fc_offset;
struct fd_bo *lrz;
};