mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 01:30:08 +01:00
freedreno,tu: Unify LRZ layout calculations
Now with nolrzfc the memory for fast-clear will be allocated but fast-clear itself will not be used. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32868>
This commit is contained in:
parent
5a4b28e77c
commit
33083d580a
15 changed files with 203 additions and 176 deletions
35
src/freedreno/fdl/freedreno_lrz_layout.c
Normal file
35
src/freedreno/fdl/freedreno_lrz_layout.c
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright © 2025 Igalia S.L.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "freedreno_lrz_layout.h"
|
||||
#include "util/compiler.h"
|
||||
|
||||
void
|
||||
fdl5_lrz_layout_init(struct fdl_lrz_layout *lrz_layout, uint32_t width,
|
||||
uint32_t height, uint32_t nr_samples)
|
||||
{
|
||||
uint32_t lrz_pitch = align(DIV_ROUND_UP(width, 8), 64);
|
||||
uint32_t lrz_height = DIV_ROUND_UP(height, 8);
|
||||
|
||||
/* LRZ buffer is super-sampled: */
|
||||
switch (nr_samples) {
|
||||
case 4:
|
||||
lrz_pitch *= 2;
|
||||
FALLTHROUGH;
|
||||
case 2:
|
||||
lrz_height *= 2;
|
||||
}
|
||||
|
||||
uint32_t lrz_size = lrz_pitch * lrz_height * 2;
|
||||
lrz_size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
|
||||
|
||||
lrz_layout->lrz_offset = 0;
|
||||
lrz_layout->lrz_pitch = lrz_pitch;
|
||||
lrz_layout->lrz_height = lrz_height;
|
||||
lrz_layout->lrz_layer_size = 0;
|
||||
lrz_layout->lrz_fc_offset = 0;
|
||||
lrz_layout->lrz_fc_size = 0;
|
||||
lrz_layout->lrz_total_size = lrz_size;
|
||||
}
|
||||
99
src/freedreno/fdl/freedreno_lrz_layout.h
Normal file
99
src/freedreno/fdl/freedreno_lrz_layout.h
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright © 2025 Igalia S.L.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef FREEDRENO_LRZ_LAYOUT_H_
|
||||
#define FREEDRENO_LRZ_LAYOUT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "freedreno_layout.h"
|
||||
|
||||
BEGINC;
|
||||
|
||||
struct fdl_lrz_layout {
|
||||
uint32_t lrz_offset;
|
||||
uint32_t lrz_pitch;
|
||||
uint32_t lrz_height;
|
||||
uint32_t lrz_layer_size;
|
||||
uint32_t lrz_fc_offset;
|
||||
uint32_t lrz_fc_size;
|
||||
uint32_t lrz_total_size;
|
||||
};
|
||||
|
||||
void
|
||||
fdl5_lrz_layout_init(struct fdl_lrz_layout *lrz_layout, uint32_t width,
|
||||
uint32_t height, uint32_t nr_samples);
|
||||
ENDC;
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include "common/freedreno_lrz.h"
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
fdl6_lrz_layout_init(struct fdl_lrz_layout *lrz_layout,
|
||||
struct fdl_layout *layout,
|
||||
const struct fd_dev_info *dev_info, uint32_t lrz_offset,
|
||||
uint32_t array_layers)
|
||||
{
|
||||
unsigned width = layout->width0;
|
||||
unsigned height = layout->height0;
|
||||
|
||||
/* LRZ buffer is super-sampled */
|
||||
switch (layout->nr_samples) {
|
||||
case 8:
|
||||
height *= 2;
|
||||
FALLTHROUGH;
|
||||
case 4:
|
||||
width *= 2;
|
||||
FALLTHROUGH;
|
||||
case 2:
|
||||
height *= 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
|
||||
unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
|
||||
|
||||
lrz_layout->lrz_offset = lrz_offset;
|
||||
lrz_layout->lrz_height = lrz_height;
|
||||
lrz_layout->lrz_pitch = lrz_pitch;
|
||||
lrz_layout->lrz_layer_size = lrz_pitch * lrz_height * sizeof(uint16_t);
|
||||
|
||||
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
|
||||
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
|
||||
|
||||
/* Fast-clear buffer is 1bit/block */
|
||||
lrz_layout->lrz_fc_size =
|
||||
DIV_ROUND_UP(nblocksx * nblocksy, 8) * array_layers;
|
||||
|
||||
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes
|
||||
* on A7XX (HW limitation) */
|
||||
if (!dev_info->a6xx.enable_lrz_fast_clear ||
|
||||
lrz_layout->lrz_fc_size > fd_lrzfc_layout<CHIP>::FC_SIZE) {
|
||||
lrz_layout->lrz_fc_size = 0;
|
||||
}
|
||||
|
||||
uint32_t lrz_size = lrz_layout->lrz_layer_size * array_layers;
|
||||
if (dev_info->a6xx.enable_lrz_fast_clear ||
|
||||
dev_info->a6xx.has_lrz_dir_tracking) {
|
||||
lrz_layout->lrz_fc_offset =
|
||||
lrz_layout->lrz_offset + lrz_size;
|
||||
lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
|
||||
}
|
||||
|
||||
lrz_layout->lrz_total_size = lrz_size;
|
||||
|
||||
uint32_t lrz_clear_height = lrz_layout->lrz_height * array_layers;
|
||||
if (((lrz_clear_height - 1) >> 14) > 0) {
|
||||
/* For simplicity bail out if LRZ cannot be cleared in one go. */
|
||||
lrz_layout->lrz_height = 0;
|
||||
lrz_layout->lrz_total_size = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -10,6 +10,7 @@ libfreedreno_layout = static_library(
|
|||
'fd6_tiled_memcpy.cc',
|
||||
'fd6_view.c',
|
||||
'freedreno_layout.c',
|
||||
'freedreno_lrz_layout.c',
|
||||
freedreno_xml_header_files,
|
||||
],
|
||||
include_directories : [inc_freedreno, inc_include, inc_src],
|
||||
|
|
|
|||
|
|
@ -2094,11 +2094,11 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
|
|||
VK_SAMPLE_COUNT_1_BIT);
|
||||
ops->clear_value(cmd, cs, PIPE_FORMAT_Z16_UNORM, value);
|
||||
ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM,
|
||||
image->iova + image->lrz_offset,
|
||||
image->lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM);
|
||||
uint32_t lrz_height = image->lrz_height * image->vk.array_layers;
|
||||
image->iova + image->lrz_layout.lrz_offset,
|
||||
image->lrz_layout.lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM);
|
||||
uint32_t lrz_height = image->lrz_layout.lrz_height * image->vk.array_layers;
|
||||
ops->coords(cmd, cs, (VkOffset2D) {}, blt_no_coord,
|
||||
(VkExtent2D) { image->lrz_pitch, lrz_height });
|
||||
(VkExtent2D) { image->lrz_layout.lrz_pitch, lrz_height });
|
||||
ops->run(cmd, cs);
|
||||
ops->teardown(cmd, cs);
|
||||
|
||||
|
|
@ -2122,7 +2122,7 @@ tu6_dirty_lrz_fc(struct tu_cmd_buffer *cmd,
|
|||
clear.color.uint32[0] = 0xffffffff;
|
||||
|
||||
using LRZFC = fd_lrzfc_layout<CHIP>;
|
||||
uint64_t lrz_fc_iova = image->iova + image->lrz_fc_offset;
|
||||
uint64_t lrz_fc_iova = image->iova + image->lrz_layout.lrz_fc_offset;
|
||||
ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false,
|
||||
VK_SAMPLE_COUNT_1_BIT);
|
||||
|
|
@ -2361,7 +2361,7 @@ tu_CmdBlitImage2(VkCommandBuffer commandBuffer,
|
|||
pBlitImageInfo->filter);
|
||||
}
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
if (dst_image->lrz_layout.lrz_total_size) {
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
|
||||
}
|
||||
}
|
||||
|
|
@ -2495,7 +2495,7 @@ tu_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
|
|||
tu_copy_buffer_to_image<CHIP>(cmd, src_buffer, dst_image,
|
||||
pCopyBufferToImageInfo->pRegions + i);
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
if (dst_image->lrz_layout.lrz_total_size) {
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
|
||||
}
|
||||
}
|
||||
|
|
@ -2584,7 +2584,7 @@ tu_CopyMemoryToImageEXT(VkDevice _device,
|
|||
info->flags & VK_HOST_IMAGE_COPY_MEMCPY_EXT);
|
||||
}
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
if (dst_image->lrz_layout.lrz_total_size) {
|
||||
TU_CALLX(device, tu_disable_lrz_cpu)(device, dst_image);
|
||||
}
|
||||
|
||||
|
|
@ -3028,7 +3028,7 @@ tu_CmdCopyImage2(VkCommandBuffer commandBuffer,
|
|||
pCopyImageInfo->pRegions + i);
|
||||
}
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
if (dst_image->lrz_layout.lrz_total_size) {
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
|
||||
}
|
||||
}
|
||||
|
|
@ -3207,7 +3207,7 @@ tu_CopyImageToImageEXT(VkDevice _device,
|
|||
copy_memcpy);
|
||||
}
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
if (dst_image->lrz_layout.lrz_total_size) {
|
||||
TU_CALLX(device, tu_disable_lrz_cpu)(device, dst_image);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2054,7 +2054,8 @@ tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem)
|
|||
if (cmd->state.lrz.image_view) {
|
||||
struct tu_image *image = cmd->state.lrz.image_view->image;
|
||||
addr.bo = image->bo;
|
||||
addr.offset = (image->iova - image->bo->iova) + image->lrz_fc_offset +
|
||||
addr.offset = (image->iova - image->bo->iova) +
|
||||
image->lrz_layout.lrz_fc_offset +
|
||||
offsetof(fd_lrzfc_layout<CHIP>, dir_track);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -573,63 +573,14 @@ tu_image_update_layout(struct tu_device *device, struct tu_image *image,
|
|||
|
||||
const struct util_format_description *desc = util_format_description(image->layout[0].format);
|
||||
if (util_format_has_depth(desc) && device->use_lrz) {
|
||||
/* Depth plane is the first one */
|
||||
struct fdl_layout *layout = &image->layout[0];
|
||||
unsigned width = layout->width0;
|
||||
unsigned height = layout->height0;
|
||||
fdl6_lrz_layout_init<CHIP>(&image->lrz_layout, &image->layout[0],
|
||||
device->physical_device->info,
|
||||
image->total_size, image->vk.array_layers);
|
||||
|
||||
/* LRZ buffer is super-sampled */
|
||||
switch (layout->nr_samples) {
|
||||
case 8:
|
||||
height *= 2;
|
||||
FALLTHROUGH;
|
||||
case 4:
|
||||
width *= 2;
|
||||
FALLTHROUGH;
|
||||
case 2:
|
||||
height *= 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
|
||||
unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
|
||||
|
||||
image->lrz_height = lrz_height;
|
||||
image->lrz_pitch = lrz_pitch;
|
||||
image->lrz_offset = image->total_size;
|
||||
image->lrz_layer_size = lrz_pitch * lrz_height * sizeof(uint16_t);
|
||||
uint32_t lrz_size = image->lrz_layer_size * image->vk.array_layers;
|
||||
|
||||
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
|
||||
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
|
||||
|
||||
/* Fast-clear buffer is 1bit/block */
|
||||
unsigned lrz_fc_size =
|
||||
DIV_ROUND_UP(nblocksx * nblocksy, 8) * image->vk.array_layers;
|
||||
|
||||
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes on A7XX (HW limitation) */
|
||||
image->has_lrz_fc =
|
||||
device->physical_device->info->a6xx.enable_lrz_fast_clear &&
|
||||
lrz_fc_size <= fd_lrzfc_layout<CHIP>::FC_SIZE &&
|
||||
!TU_DEBUG(NOLRZFC);
|
||||
|
||||
if (image->has_lrz_fc || device->physical_device->info->a6xx.has_lrz_dir_tracking) {
|
||||
image->lrz_fc_offset = image->total_size + lrz_size;
|
||||
lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
|
||||
}
|
||||
|
||||
uint32_t lrz_clear_height = lrz_height * image->vk.array_layers;
|
||||
if (((lrz_clear_height - 1) >> 14) > 0) {
|
||||
/* For simplicity bail out if LRZ cannot be cleared in one go. */
|
||||
image->lrz_height = 0;
|
||||
lrz_size = 0;
|
||||
}
|
||||
|
||||
image->total_size += lrz_size;
|
||||
image->total_size += image->lrz_layout.lrz_total_size;
|
||||
} else {
|
||||
image->lrz_height = 0;
|
||||
image->lrz_layout.lrz_height = 0;
|
||||
image->lrz_layout.lrz_total_size = 0;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#define TU_IMAGE_H
|
||||
|
||||
#include "tu_common.h"
|
||||
#include "fdl/freedreno_lrz_layout.h"
|
||||
|
||||
#define TU_MAX_PLANE_COUNT 3
|
||||
|
||||
|
|
@ -41,12 +42,7 @@ struct tu_image
|
|||
/* For fragment density map */
|
||||
void *map;
|
||||
|
||||
uint32_t lrz_height;
|
||||
uint32_t lrz_pitch;
|
||||
uint32_t lrz_offset;
|
||||
uint32_t lrz_layer_size;
|
||||
uint32_t lrz_fc_offset;
|
||||
bool has_lrz_fc;
|
||||
struct fdl_lrz_layout lrz_layout;
|
||||
|
||||
bool ubwc_enabled;
|
||||
bool force_linear_tile;
|
||||
|
|
|
|||
|
|
@ -74,15 +74,17 @@ tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
|
|||
return;
|
||||
}
|
||||
|
||||
uint64_t lrz_iova = depth_image->iova + depth_image->lrz_offset;
|
||||
uint64_t lrz_fc_iova = depth_image->iova + depth_image->lrz_fc_offset;
|
||||
if (!depth_image->lrz_fc_offset)
|
||||
uint64_t lrz_iova = depth_image->iova + depth_image->lrz_layout.lrz_offset;
|
||||
uint64_t lrz_fc_iova =
|
||||
depth_image->iova + depth_image->lrz_layout.lrz_fc_offset;
|
||||
if (!depth_image->lrz_layout.lrz_fc_offset)
|
||||
lrz_fc_iova = 0;
|
||||
|
||||
tu_cs_emit_regs(
|
||||
cs, A6XX_GRAS_LRZ_BUFFER_BASE(.qword = lrz_iova),
|
||||
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_pitch,
|
||||
.array_pitch = depth_image->lrz_layer_size),
|
||||
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_layout.lrz_pitch,
|
||||
.array_pitch =
|
||||
depth_image->lrz_layout.lrz_layer_size),
|
||||
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(.qword = lrz_fc_iova));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
|
|
@ -153,7 +155,7 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
|
|||
const struct tu_render_pass_attachment *att,
|
||||
const struct tu_image_view *view)
|
||||
{
|
||||
if (!view->image->lrz_height) {
|
||||
if (!view->image->lrz_layout.lrz_total_size) {
|
||||
assert(!cmd->device->use_lrz || !vk_format_has_depth(att->format));
|
||||
return;
|
||||
}
|
||||
|
|
@ -182,7 +184,8 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
|
|||
/* Be optimistic and unconditionally enable fast-clear in
|
||||
* secondary cmdbufs and when reusing previous LRZ state.
|
||||
*/
|
||||
cmd->state.lrz.fast_clear = view->image->has_lrz_fc;
|
||||
cmd->state.lrz.fast_clear =
|
||||
view->image->lrz_layout.lrz_fc_size > 0 && !TU_DEBUG(NOLRZFC);
|
||||
|
||||
cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking;
|
||||
cmd->state.lrz.reuse_previous_state = !clears_depth;
|
||||
|
|
@ -246,7 +249,7 @@ tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd)
|
|||
|
||||
uint32_t a;
|
||||
for (a = 0; a < cmd->state.pass->attachment_count; a++) {
|
||||
if (cmd->state.attachments[a]->image->lrz_height)
|
||||
if (cmd->state.attachments[a]->image->lrz_layout.lrz_total_size)
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -274,7 +277,7 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
|
|||
|
||||
int lrz_img_count = 0;
|
||||
for (unsigned i = 0; i < pass->attachment_count; i++) {
|
||||
if (cmd->state.attachments[i]->image->lrz_height)
|
||||
if (cmd->state.attachments[i]->image->lrz_layout.lrz_total_size)
|
||||
lrz_img_count++;
|
||||
}
|
||||
|
||||
|
|
@ -387,7 +390,7 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
* TODO: we could avoid this if we don't store depth and don't
|
||||
* expect secondary cmdbufs.
|
||||
*/
|
||||
if (lrz->image_view->image->has_lrz_fc) {
|
||||
if (lrz->image_view->image->lrz_layout.lrz_fc_size > 0) {
|
||||
tu6_dirty_lrz_fc<CHIP>(cmd, cs, lrz->image_view->image);
|
||||
}
|
||||
}
|
||||
|
|
@ -524,7 +527,7 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
if (!cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking)
|
||||
return;
|
||||
|
||||
if (!image->lrz_height)
|
||||
if (!image->lrz_layout.lrz_total_size)
|
||||
return;
|
||||
|
||||
tu6_emit_lrz_buffer<CHIP>(cs, image);
|
||||
|
|
@ -540,19 +543,20 @@ tu_disable_lrz_cpu(struct tu_device *device, struct tu_image *image)
|
|||
if (!device->physical_device->info->a6xx.has_lrz_dir_tracking)
|
||||
return;
|
||||
|
||||
if (!image->lrz_height)
|
||||
if (!image->lrz_layout.lrz_total_size)
|
||||
return;
|
||||
|
||||
const unsigned lrz_dir_offset = offsetof(fd_lrzfc_layout<CHIP>, dir_track);
|
||||
uint8_t *lrz_dir_tracking =
|
||||
(uint8_t *)image->map + image->lrz_fc_offset + lrz_dir_offset;
|
||||
(uint8_t *)image->map + image->lrz_layout.lrz_fc_offset + lrz_dir_offset;
|
||||
|
||||
*lrz_dir_tracking = FD_LRZ_GPU_DIR_DISABLED;
|
||||
|
||||
if (image->bo->cached_non_coherent) {
|
||||
tu_bo_sync_cache(device, image->bo,
|
||||
image->bo_offset + image->lrz_offset + lrz_dir_offset,
|
||||
1, TU_MEM_SYNC_CACHE_TO_GPU);
|
||||
tu_bo_sync_cache(
|
||||
device, image->bo,
|
||||
image->bo_offset + image->lrz_layout.lrz_offset + lrz_dir_offset, 1,
|
||||
TU_MEM_SYNC_CACHE_TO_GPU);
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_disable_lrz_cpu);
|
||||
|
|
@ -566,7 +570,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
|||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange *pRanges)
|
||||
{
|
||||
if (!rangeCount || !image->lrz_height ||
|
||||
if (!rangeCount || !image->lrz_layout.lrz_total_size ||
|
||||
!cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking)
|
||||
return;
|
||||
|
||||
|
|
@ -585,8 +589,9 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
|||
if (!range)
|
||||
return;
|
||||
|
||||
bool fast_clear = image->has_lrz_fc &&
|
||||
tu_lrzfc_depth_supported<CHIP>(pDepthStencil->depth);
|
||||
bool fast_clear = image->lrz_layout.lrz_fc_size &&
|
||||
tu_lrzfc_depth_supported<CHIP>(pDepthStencil->depth) &&
|
||||
!TU_DEBUG(NOLRZFC);
|
||||
|
||||
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, image);
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
|
|||
OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
|
||||
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
|
||||
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
|
||||
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
|
||||
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_layout.lrz_pitch * 2));
|
||||
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
|
||||
OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
|
||||
|
||||
|
|
@ -201,8 +201,8 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
|
|||
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
|
||||
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
|
||||
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
|
||||
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_layout.lrz_pitch) |
|
||||
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_layout.lrz_height));
|
||||
OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
|
||||
|
|
@ -210,8 +210,8 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
|
|||
|
||||
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
|
||||
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
|
||||
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
|
||||
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
|
||||
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_layout.lrz_pitch - 1) |
|
||||
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_layout.lrz_height - 1));
|
||||
|
||||
fd5_emit_blit(batch, ring);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
|
|||
if (rsc->lrz) {
|
||||
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
|
||||
OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
|
||||
OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
|
||||
OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_layout.lrz_pitch));
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
|
||||
OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
|
||||
|
|
|
|||
|
|
@ -12,26 +12,10 @@ static void
|
|||
setup_lrz(struct fd_resource *rsc)
|
||||
{
|
||||
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
|
||||
unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
|
||||
unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
|
||||
|
||||
/* LRZ buffer is super-sampled: */
|
||||
switch (rsc->b.b.nr_samples) {
|
||||
case 4:
|
||||
lrz_pitch *= 2;
|
||||
FALLTHROUGH;
|
||||
case 2:
|
||||
lrz_height *= 2;
|
||||
}
|
||||
|
||||
unsigned size = lrz_pitch * lrz_height * 2;
|
||||
|
||||
size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
|
||||
|
||||
rsc->lrz_height = lrz_height;
|
||||
rsc->lrz_width = lrz_pitch;
|
||||
rsc->lrz_pitch = lrz_pitch;
|
||||
rsc->lrz = fd_bo_new(screen->dev, size, FD_BO_NOMAP, "lrz");
|
||||
fdl5_lrz_layout_init(&rsc->lrz_layout, rsc->b.b.width0, rsc->b.b.height0,
|
||||
rsc->b.b.nr_samples);
|
||||
rsc->lrz = fd_bo_new(screen->dev, rsc->lrz_layout.lrz_total_size,
|
||||
FD_BO_NOMAP, "lrz");
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
|
|||
|
|
@ -807,8 +807,8 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
|
|||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
|
||||
OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
|
||||
A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
|
||||
OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_layout.lrz_pitch - 1) |
|
||||
A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_layout.lrz_height - 1));
|
||||
|
||||
union pipe_color_union clear_color = { .f = {depth} };
|
||||
|
||||
|
|
@ -824,7 +824,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
|
|||
A6XX_RB_2D_DST(
|
||||
.bo = lrz,
|
||||
),
|
||||
A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2),
|
||||
A6XX_RB_2D_DST_PITCH(zsbuf->lrz_layout.lrz_pitch * 2),
|
||||
);
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -272,10 +272,10 @@ emit_lrz(struct fd_batch *batch, struct fd_batch_subpass *subpass)
|
|||
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_LRZ_BUFFER_BASE(.bo = subpass->lrz),
|
||||
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_pitch),
|
||||
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_layout.lrz_pitch),
|
||||
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(
|
||||
.bo = zsbuf->lrz_fc_offset ? subpass->lrz : NULL,
|
||||
.bo_offset = zsbuf->lrz_fc_offset
|
||||
.bo = zsbuf->lrz_layout.lrz_fc_size ? subpass->lrz : NULL,
|
||||
.bo_offset = zsbuf->lrz_layout.lrz_fc_offset
|
||||
),
|
||||
);
|
||||
fd_ringbuffer_attach_bo(ring, subpass->lrz);
|
||||
|
|
|
|||
|
|
@ -236,50 +236,12 @@ static void
|
|||
setup_lrz(struct fd_resource *rsc)
|
||||
{
|
||||
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
|
||||
struct fdl_layout *layout = &rsc->layout;
|
||||
unsigned width = layout->width0;
|
||||
unsigned height = layout->height0;
|
||||
uint32_t nr_layers = 1;
|
||||
fdl6_lrz_layout_init<CHIP>(&rsc->lrz_layout, &rsc->layout, screen->info, 0,
|
||||
nr_layers);
|
||||
|
||||
/* LRZ buffer is super-sampled: */
|
||||
switch (layout->nr_samples) {
|
||||
case 4:
|
||||
width *= 2;
|
||||
FALLTHROUGH;
|
||||
case 2:
|
||||
height *= 2;
|
||||
}
|
||||
|
||||
unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
|
||||
unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
|
||||
|
||||
rsc->lrz_height = lrz_height;
|
||||
rsc->lrz_width = lrz_pitch;
|
||||
rsc->lrz_pitch = lrz_pitch;
|
||||
|
||||
unsigned lrz_size = lrz_pitch * lrz_height * sizeof(uint16_t);
|
||||
|
||||
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
|
||||
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
|
||||
|
||||
/* Fast-clear buffer is 1bit/block */
|
||||
unsigned lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
|
||||
|
||||
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes
|
||||
* on A7XX (HW limitation)
|
||||
*/
|
||||
bool has_lrz_fc = screen->info->a6xx.enable_lrz_fast_clear &&
|
||||
lrz_fc_size <= fd_lrzfc_layout<CHIP>::FC_SIZE &&
|
||||
!FD_DBG(NOLRZFC);
|
||||
|
||||
/* Allocate a LRZ fast-clear buffer even if we aren't using FC, if the
|
||||
* hw is re-using this buffer for direction tracking
|
||||
*/
|
||||
if (has_lrz_fc || screen->info->a6xx.has_lrz_dir_tracking) {
|
||||
rsc->lrz_fc_offset = lrz_size;
|
||||
lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
|
||||
}
|
||||
|
||||
rsc->lrz = fd_bo_new(screen->dev, lrz_size, FD_BO_NOMAP, "lrz");
|
||||
rsc->lrz = fd_bo_new(screen->dev, rsc->lrz_layout.lrz_total_size,
|
||||
FD_BO_NOMAP, "lrz");
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include "util/u_transfer_helper.h"
|
||||
|
||||
#include "freedreno/fdl/freedreno_layout.h"
|
||||
#include "freedreno/fdl/freedreno_lrz_layout.h"
|
||||
#include "freedreno_batch.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
|
|
@ -139,18 +140,10 @@ struct fd_resource {
|
|||
*/
|
||||
bool needs_ubwc_clear : 1;
|
||||
|
||||
/*
|
||||
* LRZ
|
||||
*
|
||||
* TODO lrz width/height/pitch should probably also move to
|
||||
* fdl_layout
|
||||
*/
|
||||
/* LRZ */
|
||||
struct fdl_lrz_layout lrz_layout;
|
||||
bool lrz_valid : 1;
|
||||
enum fd_lrz_direction lrz_direction : 2;
|
||||
uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch?
|
||||
uint16_t lrz_height;
|
||||
uint16_t lrz_pitch;
|
||||
uint32_t lrz_fc_offset;
|
||||
struct fd_bo *lrz;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue