mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 11:00:11 +01:00
pvr: Implement ZLS subtile alignment
This is a workaround for the edge case where a depth buffer is smaller than a single tile size. Signed-off-by: Matt Coster <matt.coster@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23313>
This commit is contained in:
parent
620e7d4cf2
commit
d1b17a5edc
3 changed files with 213 additions and 41 deletions
|
|
@ -1087,33 +1087,180 @@ pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
|
|||
* to the nearest power-of-two (which will be tile-aligned). If the attachment
|
||||
* is not twiddled, we don't need to worry about tile-alignment at all.
|
||||
*/
|
||||
static void
|
||||
pvr_sub_cmd_gfx_align_zls_subtiles(const struct pvr_device_info *dev_info,
|
||||
const struct pvr_render_job *job,
|
||||
const struct pvr_image *image)
|
||||
static bool pvr_sub_cmd_gfx_requires_ds_subtile_alignment(
|
||||
const struct pvr_device_info *dev_info,
|
||||
const struct pvr_render_job *job)
|
||||
{
|
||||
const struct pvr_image *const ds_image =
|
||||
pvr_image_view_get_image(job->ds.iview);
|
||||
uint32_t zls_tile_size_x;
|
||||
uint32_t zls_tile_size_y;
|
||||
|
||||
rogue_get_zls_tile_size_xy(dev_info, &zls_tile_size_x, &zls_tile_size_y);
|
||||
|
||||
if (image->physical_extent.width >= zls_tile_size_x &&
|
||||
image->physical_extent.height >= zls_tile_size_y) {
|
||||
return;
|
||||
if (ds_image->physical_extent.width >= zls_tile_size_x &&
|
||||
ds_image->physical_extent.height >= zls_tile_size_y) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If we have the zls_subtile feature, we can skip the alignment iff:
|
||||
* - The attachment is not multisampled, and
|
||||
* - The depth and stencil attachments are the same.
|
||||
*/
|
||||
if (PVR_HAS_FEATURE(dev_info, zls_subtile) &&
|
||||
image->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
|
||||
(job->has_stencil_attachment || !job->has_depth_attachment)) {
|
||||
return;
|
||||
ds_image->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
|
||||
job->has_stencil_attachment == job->has_depth_attachment) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pvr_finishme("Unaligned ZLS subtile");
|
||||
mesa_logd("Image: %ux%u ZLS tile: %ux%u\n",
|
||||
image->physical_extent.width,
|
||||
image->physical_extent.height,
|
||||
zls_tile_size_x,
|
||||
zls_tile_size_y);
|
||||
/* No ZLS functions enabled; nothing to do. */
|
||||
if ((!job->has_depth_attachment && !job->has_stencil_attachment) ||
|
||||
(!job->ds.load && !job->ds.store)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
struct pvr_sub_cmd_gfx *const gfx_sub_cmd)
|
||||
{
|
||||
struct pvr_sub_cmd *const prev_sub_cmd =
|
||||
container_of(gfx_sub_cmd, struct pvr_sub_cmd, gfx);
|
||||
struct pvr_ds_attachment *const ds = &gfx_sub_cmd->job.ds;
|
||||
const struct pvr_image *const ds_image = pvr_image_view_get_image(ds->iview);
|
||||
const VkFormat copy_format = pvr_get_raw_copy_format(ds_image->vk.format);
|
||||
|
||||
struct pvr_suballoc_bo *buffer;
|
||||
uint32_t buffer_layer_size;
|
||||
VkBufferImageCopy2 region;
|
||||
VkExtent2D zls_tile_size;
|
||||
VkExtent2D rounded_size;
|
||||
uint32_t buffer_size;
|
||||
VkExtent2D scale;
|
||||
VkResult result;
|
||||
|
||||
/* The operations below assume the last command in the buffer was the target
|
||||
* gfx subcommand. Assert that this is the case.
|
||||
*/
|
||||
assert(list_last_entry(&cmd_buffer->sub_cmds, struct pvr_sub_cmd, link) ==
|
||||
prev_sub_cmd);
|
||||
|
||||
if (!ds->load && !ds->store)
|
||||
return VK_SUCCESS;
|
||||
|
||||
rogue_get_zls_tile_size_xy(&cmd_buffer->device->pdevice->dev_info,
|
||||
&zls_tile_size.width,
|
||||
&zls_tile_size.height);
|
||||
rogue_get_isp_scale_xy_from_samples(ds_image->vk.samples,
|
||||
&scale.width,
|
||||
&scale.height);
|
||||
|
||||
rounded_size = (VkExtent2D){
|
||||
.width = ALIGN_POT(ds_image->physical_extent.width, zls_tile_size.width),
|
||||
.height =
|
||||
ALIGN_POT(ds_image->physical_extent.height, zls_tile_size.height),
|
||||
};
|
||||
|
||||
buffer_layer_size = vk_format_get_blocksize(ds_image->vk.format) *
|
||||
rounded_size.width * rounded_size.height * scale.width *
|
||||
scale.height;
|
||||
|
||||
if (ds->iview->vk.layer_count > 1)
|
||||
buffer_layer_size = ALIGN_POT(buffer_layer_size, ds_image->alignment);
|
||||
|
||||
buffer_size = buffer_layer_size * ds->iview->vk.layer_count;
|
||||
|
||||
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
cmd_buffer->device->heaps.general_heap,
|
||||
buffer_size,
|
||||
0,
|
||||
&buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
region = (VkBufferImageCopy2){
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
|
||||
.pNext = NULL,
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = rounded_size.width,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
.mipLevel = ds->iview->vk.base_mip_level,
|
||||
.baseArrayLayer = ds->iview->vk.base_array_layer,
|
||||
.layerCount = ds->iview->vk.layer_count,
|
||||
},
|
||||
.imageOffset = { 0 },
|
||||
.imageExtent = {
|
||||
.width = ds->iview->vk.extent.width,
|
||||
.height = ds->iview->vk.extent.height,
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
|
||||
if (ds->load) {
|
||||
result =
|
||||
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = pvr_copy_image_to_buffer_region_format(cmd_buffer,
|
||||
ds_image,
|
||||
buffer->dev_addr,
|
||||
®ion,
|
||||
copy_format,
|
||||
copy_format);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
cmd_buffer->state.current_sub_cmd->transfer.serialize_with_frag = true;
|
||||
|
||||
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* Now we have to fiddle with cmd_buffer to place this transfer command
|
||||
* *before* the target gfx subcommand.
|
||||
*/
|
||||
list_move_to(&cmd_buffer->state.current_sub_cmd->link,
|
||||
&prev_sub_cmd->link);
|
||||
}
|
||||
|
||||
if (ds->store) {
|
||||
result =
|
||||
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = pvr_copy_buffer_to_image_region_format(cmd_buffer,
|
||||
buffer->dev_addr,
|
||||
ds_image,
|
||||
®ion,
|
||||
copy_format,
|
||||
copy_format,
|
||||
0);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
cmd_buffer->state.current_sub_cmd->transfer.serialize_with_frag = true;
|
||||
|
||||
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Finally, patch up the target graphics sub_cmd to use the correctly-strided
|
||||
* buffer.
|
||||
*/
|
||||
ds->has_alignment_transfers = true;
|
||||
ds->addr = buffer->dev_addr;
|
||||
ds->physical_extent = rounded_size;
|
||||
|
||||
gfx_sub_cmd->wait_on_previous_transfer = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct pvr_emit_state {
|
||||
|
|
@ -1320,15 +1467,18 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
bool d_store = false, s_store = false;
|
||||
bool d_load = false, s_load = false;
|
||||
|
||||
job->ds.iview = ds_iview;
|
||||
job->ds.addr = ds_image->dev_addr;
|
||||
|
||||
job->ds.stride =
|
||||
pvr_stride_from_pitch(level_pitch, ds_iview->vk.format);
|
||||
job->ds.height = ds_iview->vk.extent.height;
|
||||
job->ds.physical_width = u_minify(ds_image->physical_extent.width,
|
||||
ds_iview->vk.base_mip_level);
|
||||
job->ds.physical_height = u_minify(ds_image->physical_extent.height,
|
||||
ds_iview->vk.base_mip_level);
|
||||
job->ds.physical_extent = (VkExtent2D){
|
||||
.width = u_minify(ds_image->physical_extent.width,
|
||||
ds_iview->vk.base_mip_level),
|
||||
.height = u_minify(ds_image->physical_extent.height,
|
||||
ds_iview->vk.base_mip_level),
|
||||
};
|
||||
job->ds.layer_size = ds_image->layer_size;
|
||||
|
||||
job->ds_clear_value = default_ds_clear_value;
|
||||
|
|
@ -1409,9 +1559,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
|
||||
if (job->ds.load || job->ds.store || store_was_optimised_out)
|
||||
job->process_empty_tiles = true;
|
||||
}
|
||||
|
||||
pvr_sub_cmd_gfx_align_zls_subtiles(dev_info, job, image);
|
||||
if (pvr_sub_cmd_gfx_requires_ds_subtile_alignment(dev_info, job)) {
|
||||
result = pvr_sub_cmd_gfx_align_ds_subtiles(cmd_buffer, sub_cmd);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
job->has_depth_attachment = false;
|
||||
job->has_stencil_attachment = false;
|
||||
|
|
|
|||
|
|
@ -1340,21 +1340,29 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
|||
|
||||
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
|
||||
if (job->has_depth_attachment) {
|
||||
uint32_t aligned_width =
|
||||
ALIGN_POT(job->ds.physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
|
||||
uint32_t aligned_height =
|
||||
ALIGN_POT(job->ds.physical_height, ROGUE_IPF_TILE_SIZE_PIXELS);
|
||||
uint32_t alignment_x;
|
||||
uint32_t alignment_y;
|
||||
|
||||
if (job->ds.has_alignment_transfers) {
|
||||
rogue_get_zls_tile_size_xy(dev_info, &alignment_x, &alignment_y);
|
||||
} else {
|
||||
alignment_x = ROGUE_IPF_TILE_SIZE_PIXELS;
|
||||
alignment_y = ROGUE_IPF_TILE_SIZE_PIXELS;
|
||||
}
|
||||
|
||||
rogue_get_isp_num_tiles_xy(
|
||||
dev_info,
|
||||
job->samples,
|
||||
ALIGN_POT(job->ds.physical_extent.width, alignment_x),
|
||||
ALIGN_POT(job->ds.physical_extent.height, alignment_y),
|
||||
&value.zlsextent_x_z,
|
||||
&value.zlsextent_y_z);
|
||||
|
||||
rogue_get_isp_num_tiles_xy(dev_info,
|
||||
job->samples,
|
||||
aligned_width,
|
||||
aligned_height,
|
||||
&value.zlsextent_x_z,
|
||||
&value.zlsextent_y_z);
|
||||
value.zlsextent_x_z -= 1;
|
||||
value.zlsextent_y_z -= 1;
|
||||
|
||||
if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED) {
|
||||
if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED &&
|
||||
!job->ds.has_alignment_transfers) {
|
||||
value.loadtwiddled = true;
|
||||
value.storetwiddled = true;
|
||||
}
|
||||
|
|
@ -1380,10 +1388,10 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
|||
}
|
||||
|
||||
value.zloaden = job->ds.load;
|
||||
value.forcezload = job->ds.load;
|
||||
value.forcezload = value.zloaden;
|
||||
|
||||
value.zstoreen = job->ds.store;
|
||||
value.forcezstore = job->ds.store;
|
||||
value.forcezstore = value.zstoreen;
|
||||
|
||||
zload_format = value.zloadformat;
|
||||
} else {
|
||||
|
|
@ -1392,10 +1400,10 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
|||
|
||||
if (job->has_stencil_attachment) {
|
||||
value.sstoreen = job->ds.store;
|
||||
value.forcezstore = job->ds.store;
|
||||
value.forcezstore = value.sstoreen;
|
||||
|
||||
value.sloaden = job->ds.load;
|
||||
value.forcezload = job->ds.load;
|
||||
value.forcezload = value.sloaden;
|
||||
}
|
||||
}
|
||||
stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
|
||||
|
|
@ -1570,8 +1578,13 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
|||
if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
|
||||
pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) {
|
||||
if (job->has_depth_attachment) {
|
||||
value.x = job->ds.stride - 1;
|
||||
value.y = job->ds.height - 1;
|
||||
if (job->ds.has_alignment_transfers) {
|
||||
value.x = job->ds.physical_extent.width - 1;
|
||||
value.y = job->ds.physical_extent.height - 1;
|
||||
} else {
|
||||
value.x = job->ds.stride - 1;
|
||||
value.y = job->ds.height - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
|
||||
|
|
|
|||
|
|
@ -78,8 +78,7 @@ struct pvr_render_job {
|
|||
pvr_dev_addr_t addr;
|
||||
uint32_t stride;
|
||||
uint32_t height;
|
||||
uint32_t physical_width;
|
||||
uint32_t physical_height;
|
||||
VkExtent2D physical_extent;
|
||||
uint32_t layer_size;
|
||||
VkFormat vk_format;
|
||||
/* FIXME: This should be of type 'enum pvr_memlayout', but this is defined
|
||||
|
|
@ -91,6 +90,12 @@ struct pvr_render_job {
|
|||
* included by both this header and pvr_private.h.
|
||||
*/
|
||||
uint32_t memlayout;
|
||||
|
||||
/* TODO: Is this really necessary? Maybe we can extract all useful
|
||||
* information and drop this member. */
|
||||
const struct pvr_image_view *iview;
|
||||
|
||||
bool has_alignment_transfers;
|
||||
} ds;
|
||||
|
||||
VkClearDepthStencilValue ds_clear_value;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue