pvr: Implement ZLS subtile alignment

This is a workaround for the edge case where a depth buffer is smaller
than a single tile size.

Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23313>
This commit is contained in:
Matt Coster 2023-02-27 12:33:31 +00:00 committed by Marge Bot
parent 620e7d4cf2
commit d1b17a5edc
3 changed files with 213 additions and 41 deletions

View file

@ -1087,33 +1087,180 @@ pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
* to the nearest power-of-two (which will be tile-aligned). If the attachment
* is not twiddled, we don't need to worry about tile-alignment at all.
*/
static void
pvr_sub_cmd_gfx_align_zls_subtiles(const struct pvr_device_info *dev_info,
const struct pvr_render_job *job,
const struct pvr_image *image)
static bool pvr_sub_cmd_gfx_requires_ds_subtile_alignment(
const struct pvr_device_info *dev_info,
const struct pvr_render_job *job)
{
const struct pvr_image *const ds_image =
pvr_image_view_get_image(job->ds.iview);
uint32_t zls_tile_size_x;
uint32_t zls_tile_size_y;
rogue_get_zls_tile_size_xy(dev_info, &zls_tile_size_x, &zls_tile_size_y);
if (image->physical_extent.width >= zls_tile_size_x &&
image->physical_extent.height >= zls_tile_size_y) {
return;
if (ds_image->physical_extent.width >= zls_tile_size_x &&
ds_image->physical_extent.height >= zls_tile_size_y) {
return false;
}
/* If we have the zls_subtile feature, we can skip the alignment iff:
* - The attachment is not multisampled, and
* - The depth and stencil attachments are the same.
*/
if (PVR_HAS_FEATURE(dev_info, zls_subtile) &&
image->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
(job->has_stencil_attachment || !job->has_depth_attachment)) {
return;
ds_image->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
job->has_stencil_attachment == job->has_depth_attachment) {
return false;
}
pvr_finishme("Unaligned ZLS subtile");
mesa_logd("Image: %ux%u ZLS tile: %ux%u\n",
image->physical_extent.width,
image->physical_extent.height,
zls_tile_size_x,
zls_tile_size_y);
/* No ZLS functions enabled; nothing to do. */
if ((!job->has_depth_attachment && !job->has_stencil_attachment) ||
(!job->ds.load && !job->ds.store)) {
return false;
}
return true;
}
static VkResult
pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer,
struct pvr_sub_cmd_gfx *const gfx_sub_cmd)
{
struct pvr_sub_cmd *const prev_sub_cmd =
container_of(gfx_sub_cmd, struct pvr_sub_cmd, gfx);
struct pvr_ds_attachment *const ds = &gfx_sub_cmd->job.ds;
const struct pvr_image *const ds_image = pvr_image_view_get_image(ds->iview);
const VkFormat copy_format = pvr_get_raw_copy_format(ds_image->vk.format);
struct pvr_suballoc_bo *buffer;
uint32_t buffer_layer_size;
VkBufferImageCopy2 region;
VkExtent2D zls_tile_size;
VkExtent2D rounded_size;
uint32_t buffer_size;
VkExtent2D scale;
VkResult result;
/* The operations below assume the last command in the buffer was the target
* gfx subcommand. Assert that this is the case.
*/
assert(list_last_entry(&cmd_buffer->sub_cmds, struct pvr_sub_cmd, link) ==
prev_sub_cmd);
if (!ds->load && !ds->store)
return VK_SUCCESS;
rogue_get_zls_tile_size_xy(&cmd_buffer->device->pdevice->dev_info,
&zls_tile_size.width,
&zls_tile_size.height);
rogue_get_isp_scale_xy_from_samples(ds_image->vk.samples,
&scale.width,
&scale.height);
rounded_size = (VkExtent2D){
.width = ALIGN_POT(ds_image->physical_extent.width, zls_tile_size.width),
.height =
ALIGN_POT(ds_image->physical_extent.height, zls_tile_size.height),
};
buffer_layer_size = vk_format_get_blocksize(ds_image->vk.format) *
rounded_size.width * rounded_size.height * scale.width *
scale.height;
if (ds->iview->vk.layer_count > 1)
buffer_layer_size = ALIGN_POT(buffer_layer_size, ds_image->alignment);
buffer_size = buffer_layer_size * ds->iview->vk.layer_count;
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
cmd_buffer->device->heaps.general_heap,
buffer_size,
0,
&buffer);
if (result != VK_SUCCESS)
return result;
region = (VkBufferImageCopy2){
.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
.pNext = NULL,
.bufferOffset = 0,
.bufferRowLength = rounded_size.width,
.bufferImageHeight = 0,
.imageSubresource = {
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
.mipLevel = ds->iview->vk.base_mip_level,
.baseArrayLayer = ds->iview->vk.base_array_layer,
.layerCount = ds->iview->vk.layer_count,
},
.imageOffset = { 0 },
.imageExtent = {
.width = ds->iview->vk.extent.width,
.height = ds->iview->vk.extent.height,
.depth = 1,
},
};
if (ds->load) {
result =
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
if (result != VK_SUCCESS)
return result;
result = pvr_copy_image_to_buffer_region_format(cmd_buffer,
ds_image,
buffer->dev_addr,
&region,
copy_format,
copy_format);
if (result != VK_SUCCESS)
return result;
cmd_buffer->state.current_sub_cmd->transfer.serialize_with_frag = true;
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
if (result != VK_SUCCESS)
return result;
/* Now we have to fiddle with cmd_buffer to place this transfer command
* *before* the target gfx subcommand.
*/
list_move_to(&cmd_buffer->state.current_sub_cmd->link,
&prev_sub_cmd->link);
}
if (ds->store) {
result =
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
if (result != VK_SUCCESS)
return result;
result = pvr_copy_buffer_to_image_region_format(cmd_buffer,
buffer->dev_addr,
ds_image,
&region,
copy_format,
copy_format,
0);
if (result != VK_SUCCESS)
return result;
cmd_buffer->state.current_sub_cmd->transfer.serialize_with_frag = true;
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
if (result != VK_SUCCESS)
return result;
}
/* Finally, patch up the target graphics sub_cmd to use the correctly-strided
* buffer.
*/
ds->has_alignment_transfers = true;
ds->addr = buffer->dev_addr;
ds->physical_extent = rounded_size;
gfx_sub_cmd->wait_on_previous_transfer = true;
return VK_SUCCESS;
}
struct pvr_emit_state {
@ -1320,15 +1467,18 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
bool d_store = false, s_store = false;
bool d_load = false, s_load = false;
job->ds.iview = ds_iview;
job->ds.addr = ds_image->dev_addr;
job->ds.stride =
pvr_stride_from_pitch(level_pitch, ds_iview->vk.format);
job->ds.height = ds_iview->vk.extent.height;
job->ds.physical_width = u_minify(ds_image->physical_extent.width,
ds_iview->vk.base_mip_level);
job->ds.physical_height = u_minify(ds_image->physical_extent.height,
ds_iview->vk.base_mip_level);
job->ds.physical_extent = (VkExtent2D){
.width = u_minify(ds_image->physical_extent.width,
ds_iview->vk.base_mip_level),
.height = u_minify(ds_image->physical_extent.height,
ds_iview->vk.base_mip_level),
};
job->ds.layer_size = ds_image->layer_size;
job->ds_clear_value = default_ds_clear_value;
@ -1409,9 +1559,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
if (job->ds.load || job->ds.store || store_was_optimised_out)
job->process_empty_tiles = true;
}
pvr_sub_cmd_gfx_align_zls_subtiles(dev_info, job, image);
if (pvr_sub_cmd_gfx_requires_ds_subtile_alignment(dev_info, job)) {
result = pvr_sub_cmd_gfx_align_ds_subtiles(cmd_buffer, sub_cmd);
if (result != VK_SUCCESS)
return result;
}
}
} else {
job->has_depth_attachment = false;
job->has_stencil_attachment = false;

View file

@ -1340,21 +1340,29 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
if (job->has_depth_attachment) {
uint32_t aligned_width =
ALIGN_POT(job->ds.physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
uint32_t aligned_height =
ALIGN_POT(job->ds.physical_height, ROGUE_IPF_TILE_SIZE_PIXELS);
uint32_t alignment_x;
uint32_t alignment_y;
if (job->ds.has_alignment_transfers) {
rogue_get_zls_tile_size_xy(dev_info, &alignment_x, &alignment_y);
} else {
alignment_x = ROGUE_IPF_TILE_SIZE_PIXELS;
alignment_y = ROGUE_IPF_TILE_SIZE_PIXELS;
}
rogue_get_isp_num_tiles_xy(
dev_info,
job->samples,
ALIGN_POT(job->ds.physical_extent.width, alignment_x),
ALIGN_POT(job->ds.physical_extent.height, alignment_y),
&value.zlsextent_x_z,
&value.zlsextent_y_z);
rogue_get_isp_num_tiles_xy(dev_info,
job->samples,
aligned_width,
aligned_height,
&value.zlsextent_x_z,
&value.zlsextent_y_z);
value.zlsextent_x_z -= 1;
value.zlsextent_y_z -= 1;
if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED) {
if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED &&
!job->ds.has_alignment_transfers) {
value.loadtwiddled = true;
value.storetwiddled = true;
}
@ -1380,10 +1388,10 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
}
value.zloaden = job->ds.load;
value.forcezload = job->ds.load;
value.forcezload = value.zloaden;
value.zstoreen = job->ds.store;
value.forcezstore = job->ds.store;
value.forcezstore = value.zstoreen;
zload_format = value.zloadformat;
} else {
@ -1392,10 +1400,10 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
if (job->has_stencil_attachment) {
value.sstoreen = job->ds.store;
value.forcezstore = job->ds.store;
value.forcezstore = value.sstoreen;
value.sloaden = job->ds.load;
value.forcezload = job->ds.load;
value.forcezload = value.sloaden;
}
}
stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
@ -1570,8 +1578,13 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) {
if (job->has_depth_attachment) {
value.x = job->ds.stride - 1;
value.y = job->ds.height - 1;
if (job->ds.has_alignment_transfers) {
value.x = job->ds.physical_extent.width - 1;
value.y = job->ds.physical_extent.height - 1;
} else {
value.x = job->ds.stride - 1;
value.y = job->ds.height - 1;
}
}
}
stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);

View file

@ -78,8 +78,7 @@ struct pvr_render_job {
pvr_dev_addr_t addr;
uint32_t stride;
uint32_t height;
uint32_t physical_width;
uint32_t physical_height;
VkExtent2D physical_extent;
uint32_t layer_size;
VkFormat vk_format;
/* FIXME: This should be of type 'enum pvr_memlayout', but this is defined
@ -91,6 +90,12 @@ struct pvr_render_job {
* included by both this header and pvr_private.h.
*/
uint32_t memlayout;
/* TODO: Is this really necessary? Maybe we can extract all useful
* information and drop this member. */
const struct pvr_image_view *iview;
bool has_alignment_transfers;
} ds;
VkClearDepthStencilValue ds_clear_value;