mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 14:40:10 +01:00
pvr: Implement depth/stencil/depth+stencil attachment clear.
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20055>
This commit is contained in:
parent
5611dedd78
commit
821c6b9342
5 changed files with 421 additions and 9 deletions
|
|
@ -25,6 +25,7 @@
|
|||
#include <stdint.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include "pvr_clear.h"
|
||||
#include "pvr_csb.h"
|
||||
#include "pvr_private.h"
|
||||
#include "util/list.h"
|
||||
|
|
@ -141,13 +142,414 @@ void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
|
|||
pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Returns the maximum number of layers to clear starting from base_layer
|
||||
* that contain or match the target rectangle.
|
||||
*
|
||||
* \param[in] target_rect The region which the clear should contain or
|
||||
* match.
|
||||
* \param[in] base_layer The layer index to start at.
|
||||
* \param[in] clear_rect_count Amount of clear_rects
|
||||
* \param[in] clear_rects Array of clear rects.
|
||||
*
|
||||
* \return Max number of layers that cover or match the target region.
|
||||
*/
|
||||
static uint32_t
|
||||
pvr_get_max_layers_covering_target(VkRect2D target_rect,
|
||||
uint32_t base_layer,
|
||||
uint32_t clear_rect_count,
|
||||
const VkClearRect *clear_rects)
|
||||
{
|
||||
const int32_t target_x0 = target_rect.offset.x;
|
||||
const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
|
||||
const int32_t target_y0 = target_rect.offset.y;
|
||||
const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
|
||||
|
||||
uint32_t layer_count = 0;
|
||||
|
||||
assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
|
||||
assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
|
||||
|
||||
for (uint32_t i = 0; i < clear_rect_count; i++) {
|
||||
const VkClearRect *clear_rect = &clear_rects[i];
|
||||
const uint32_t max_layer =
|
||||
clear_rect->baseArrayLayer + clear_rect->layerCount;
|
||||
bool target_is_covered;
|
||||
int32_t x0, x1;
|
||||
int32_t y0, y1;
|
||||
|
||||
if (clear_rect->baseArrayLayer == 0)
|
||||
continue;
|
||||
|
||||
assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
|
||||
UINT32_MAX);
|
||||
|
||||
/* Check for layer intersection. */
|
||||
if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
|
||||
continue;
|
||||
|
||||
x0 = clear_rect->rect.offset.x;
|
||||
x1 = x0 + (int32_t)clear_rect->rect.extent.width;
|
||||
y0 = clear_rect->rect.offset.y;
|
||||
y1 = y0 + (int32_t)clear_rect->rect.extent.height;
|
||||
|
||||
assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
|
||||
assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
|
||||
INT32_MAX);
|
||||
|
||||
target_is_covered = x0 <= target_x0 && x1 >= target_x1;
|
||||
target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
|
||||
|
||||
if (target_is_covered)
|
||||
layer_count = MAX2(layer_count, max_layer - base_layer);
|
||||
}
|
||||
|
||||
return layer_count;
|
||||
}
|
||||
|
||||
/* Return true if vertex shader is required to output render target id to pick
|
||||
* the texture array layer.
|
||||
*/
|
||||
static inline bool
|
||||
pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
|
||||
uint32_t rect_count,
|
||||
const VkClearRect *rects)
|
||||
{
|
||||
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
|
||||
return false;
|
||||
|
||||
for (uint32_t i = 0; i < rect_count; i++) {
|
||||
if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
pvr_clear_template_idx_from_aspect(VkImageAspectFlags aspect)
|
||||
{
|
||||
switch (aspect) {
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
/* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
|
||||
*
|
||||
* "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
|
||||
* include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
|
||||
*
|
||||
*/
|
||||
return PVR_STATIC_CLEAR_COLOR_BIT;
|
||||
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
return PVR_STATIC_CLEAR_DEPTH_BIT;
|
||||
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
return PVR_STATIC_CLEAR_STENCIL_BIT;
|
||||
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
return PVR_STATIC_CLEAR_DEPTH_BIT | PVR_STATIC_CLEAR_STENCIL_BIT;
|
||||
|
||||
default:
|
||||
unreachable("Invalid aspect mask for clear.");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
|
||||
uint32_t attachment_count,
|
||||
const VkClearAttachment *attachments,
|
||||
uint32_t rect_count,
|
||||
const VkClearRect *rects)
|
||||
{
|
||||
const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
|
||||
struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
|
||||
const struct pvr_renderpass_hwsetup_subpass *hw_pass =
|
||||
pvr_get_hw_subpass(pass, pass_info->subpass_idx);
|
||||
struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
|
||||
struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
|
||||
bool z_replicate = hw_pass->z_replicate != -1;
|
||||
uint32_t vs_output_size_in_bytes;
|
||||
bool vs_has_rt_id_output;
|
||||
|
||||
/* TODO: This function can be optimized so that most of the device memory
|
||||
* gets allocated together in one go and then filled as needed. There might
|
||||
* also be opportunities to reuse pds code and data segments.
|
||||
*/
|
||||
|
||||
assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
|
||||
|
||||
pvr_reset_graphics_dirty_state(cmd_buffer, false);
|
||||
|
||||
/* We'll be emitting to the control stream. */
|
||||
sub_cmd->empty_cmd = false;
|
||||
|
||||
vs_has_rt_id_output =
|
||||
pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
|
||||
|
||||
/* 4 because we're expecting the USC to output X, Y, Z, and W. */
|
||||
vs_output_size_in_bytes = 4 * sizeof(uint32_t);
|
||||
if (vs_has_rt_id_output)
|
||||
vs_output_size_in_bytes += sizeof(uint32_t);
|
||||
|
||||
for (uint32_t i = 0; i < attachment_count; i++) {
|
||||
const VkClearAttachment *attachment = &attachments[i];
|
||||
struct pvr_pds_vertex_shader_program pds_program;
|
||||
struct pvr_pds_upload pds_program_upload = { 0 };
|
||||
uint64_t current_base_array_layer = ~0;
|
||||
VkResult result;
|
||||
float depth;
|
||||
|
||||
if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
pvr_finishme("Implement clear for color attachment.");
|
||||
} else if (z_replicate &&
|
||||
attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
pvr_finishme("Implement clear for depth/depth+stencil attachment on "
|
||||
"z_replicate.");
|
||||
} else {
|
||||
struct pvr_static_clear_ppp_template template;
|
||||
uint32_t template_idx;
|
||||
struct pvr_bo *pvr_bo;
|
||||
|
||||
template_idx =
|
||||
pvr_clear_template_idx_from_aspect(attachment->aspectMask);
|
||||
template =
|
||||
cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
|
||||
|
||||
if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
template.config.ispa.sref =
|
||||
attachment->clearValue.depthStencil.stencil;
|
||||
}
|
||||
|
||||
if (vs_has_rt_id_output) {
|
||||
template.config.output_sel.rhw_pres = true;
|
||||
template.config.output_sel.render_tgt_pres = true;
|
||||
template.config.output_sel.vtxsize = 4 + 1;
|
||||
}
|
||||
|
||||
result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
|
||||
&template,
|
||||
&pvr_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
cmd_buffer->state.status = result;
|
||||
return;
|
||||
}
|
||||
|
||||
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
|
||||
}
|
||||
|
||||
if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
depth = attachment->clearValue.depthStencil.depth;
|
||||
else
|
||||
depth = 1.0f;
|
||||
|
||||
if (vs_has_rt_id_output) {
|
||||
const struct pvr_device_static_clear_state *dev_clear_state =
|
||||
&cmd_buffer->device->static_clear_state;
|
||||
const struct pvr_bo *multi_layer_vert_bo =
|
||||
dev_clear_state->usc_multi_layer_vertex_shader_bo;
|
||||
|
||||
/* We can't use the device's passthrough pds program since it doesn't
|
||||
* have iterate_instance_id enabled. We'll be uploading code sections
|
||||
* per each clear rect.
|
||||
*/
|
||||
|
||||
/* TODO: See if we can allocate all the code section memory in one go.
|
||||
* We'd need to make sure that changing instance_id_modifier doesn't
|
||||
* change the code section size.
|
||||
* Also check if we can reuse the same code segment for each rect.
|
||||
* Seems like the instance_id_modifier is written into the data section
|
||||
* and used by the pds ADD instruction that way instead of it being
|
||||
* embedded into the code section.
|
||||
*/
|
||||
|
||||
pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
|
||||
multi_layer_vert_bo);
|
||||
} else {
|
||||
/* We can reuse the device's code section but we'll need to upload data
|
||||
* sections so initialize the program.
|
||||
*/
|
||||
pvr_pds_clear_vertex_shader_program_init_base(
|
||||
&pds_program,
|
||||
cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
|
||||
|
||||
pds_program_upload.code_offset =
|
||||
cmd_buffer->device->static_clear_state.pds.code_offset;
|
||||
/* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
|
||||
* let's change its interface to make that clear and not set this?
|
||||
*/
|
||||
pds_program_upload.code_size =
|
||||
cmd_buffer->device->static_clear_state.pds.code_size;
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < rect_count; j++) {
|
||||
struct pvr_pds_upload pds_program_data_upload;
|
||||
const VkClearRect *clear_rect = &rects[j];
|
||||
struct pvr_bo *vertices_bo;
|
||||
uint32_t *vdm_cs_buffer;
|
||||
VkResult result;
|
||||
|
||||
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
|
||||
(clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
|
||||
pvr_finishme("Add deferred RTA clear.");
|
||||
|
||||
if (clear_rect->baseArrayLayer != 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* TODO: Allocate all the buffers in one go before the loop, and add
|
||||
* support to multi-alloc bo.
|
||||
*/
|
||||
result = pvr_clear_vertices_upload(cmd_buffer->device,
|
||||
&clear_rect->rect,
|
||||
depth,
|
||||
&vertices_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
cmd_buffer->state.status = result;
|
||||
return;
|
||||
}
|
||||
|
||||
list_add(&vertices_bo->link, &cmd_buffer->bo_list);
|
||||
|
||||
if (vs_has_rt_id_output) {
|
||||
if (current_base_array_layer != clear_rect->baseArrayLayer) {
|
||||
const uint32_t base_array_layer = clear_rect->baseArrayLayer;
|
||||
struct pvr_pds_upload pds_program_code_upload;
|
||||
|
||||
result =
|
||||
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
|
||||
&pds_program,
|
||||
cmd_buffer,
|
||||
base_array_layer,
|
||||
&pds_program_code_upload);
|
||||
if (result != VK_SUCCESS) {
|
||||
cmd_buffer->state.status = result;
|
||||
return;
|
||||
}
|
||||
|
||||
pds_program_upload.code_offset =
|
||||
pds_program_code_upload.code_offset;
|
||||
/* TODO: The code size doesn't get used by pvr_clear_vdm_state()
|
||||
* maybe let's change its interface to make that clear and not
|
||||
* set this?
|
||||
*/
|
||||
pds_program_upload.code_size = pds_program_code_upload.code_size;
|
||||
|
||||
current_base_array_layer = base_array_layer;
|
||||
}
|
||||
|
||||
result =
|
||||
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
|
||||
&pds_program,
|
||||
cmd_buffer,
|
||||
vertices_bo,
|
||||
&pds_program_data_upload);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
} else {
|
||||
result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
|
||||
&pds_program,
|
||||
cmd_buffer,
|
||||
vertices_bo,
|
||||
&pds_program_data_upload);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
}
|
||||
|
||||
pds_program_upload.data_offset = pds_program_data_upload.data_offset;
|
||||
pds_program_upload.data_size = pds_program_data_upload.data_size;
|
||||
|
||||
vdm_cs_buffer = pvr_csb_alloc_dwords(&sub_cmd->control_stream,
|
||||
PVR_CLEAR_VDM_STATE_DWORD_COUNT);
|
||||
if (!vdm_cs_buffer) {
|
||||
result = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
cmd_buffer->state.status = result;
|
||||
return;
|
||||
}
|
||||
|
||||
pvr_pack_clear_vdm_state(dev_info,
|
||||
&pds_program_upload,
|
||||
pds_program.temps_used,
|
||||
4,
|
||||
vs_output_size_in_bytes,
|
||||
clear_rect->layerCount,
|
||||
vdm_cs_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
|
||||
uint32_t attachmentCount,
|
||||
const VkClearAttachment *pAttachments,
|
||||
uint32_t rectCount,
|
||||
const VkClearRect *pRects)
|
||||
{
|
||||
assert(!"Unimplemented");
|
||||
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
|
||||
|
||||
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
||||
assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
|
||||
|
||||
/* TODO: There are some optimizations that can be made here:
|
||||
* - For a full screen clear, update the clear values for the corresponding
|
||||
* attachment index.
|
||||
* - For a full screen color attachment clear, add its index to a load op
|
||||
* override to add it to the background shader. This will elide any load
|
||||
* op loads currently in the background shader as well as the usual
|
||||
* frag kick for geometry clear.
|
||||
*/
|
||||
|
||||
/* If we have any depth/stencil clears, update the sub command depth/stencil
|
||||
* modification and usage flags.
|
||||
*/
|
||||
if (state->depth_format != VK_FORMAT_UNDEFINED) {
|
||||
uint32_t full_screen_clear_count;
|
||||
bool has_stencil_clear = false;
|
||||
bool has_depth_clear = false;
|
||||
|
||||
for (uint32_t i = 0; i < attachmentCount; i++) {
|
||||
const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
|
||||
|
||||
if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
has_stencil_clear = true;
|
||||
|
||||
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
has_depth_clear = true;
|
||||
|
||||
if (has_stencil_clear && has_depth_clear)
|
||||
break;
|
||||
}
|
||||
|
||||
sub_cmd->modifies_stencil |= has_stencil_clear;
|
||||
sub_cmd->modifies_depth |= has_depth_clear;
|
||||
|
||||
/* We only care about clears that have a baseArrayLayer of 0 as any
|
||||
* attachment clears we move to the background shader must apply to all of
|
||||
* the attachment's sub resources.
|
||||
*/
|
||||
full_screen_clear_count =
|
||||
pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
|
||||
0,
|
||||
rectCount,
|
||||
pRects);
|
||||
|
||||
if (full_screen_clear_count > 0) {
|
||||
if (has_stencil_clear &&
|
||||
sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
|
||||
sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
|
||||
}
|
||||
|
||||
if (has_depth_clear &&
|
||||
sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
|
||||
sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pvr_clear_attachments(cmd_buffer,
|
||||
attachmentCount,
|
||||
pAttachments,
|
||||
rectCount,
|
||||
pRects);
|
||||
}
|
||||
|
||||
void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
|
||||
|
|
|
|||
|
|
@ -237,10 +237,10 @@ VkResult pvr_emit_ppp_from_template(
|
|||
* \param[out] pvr_bo_out BO upload object.
|
||||
* \return VK_SUCCESS if the upload succeeded.
|
||||
*/
|
||||
static VkResult pvr_clear_vertices_upload(struct pvr_device *device,
|
||||
const VkRect2D *rect,
|
||||
float depth,
|
||||
struct pvr_bo **const pvr_bo_out)
|
||||
VkResult pvr_clear_vertices_upload(struct pvr_device *device,
|
||||
const VkRect2D *rect,
|
||||
float depth,
|
||||
struct pvr_bo **const pvr_bo_out)
|
||||
{
|
||||
const float y1 = (float)(rect->offset.y + rect->extent.height);
|
||||
const float x1 = (float)(rect->offset.x + rect->extent.width);
|
||||
|
|
|
|||
|
|
@ -185,4 +185,9 @@ void pvr_pack_clear_vdm_state(
|
|||
uint32_t layer_count,
|
||||
uint32_t state_buffer[const static PVR_CLEAR_VDM_STATE_DWORD_COUNT]);
|
||||
|
||||
VkResult pvr_clear_vertices_upload(struct pvr_device *device,
|
||||
const VkRect2D *rect,
|
||||
float depth,
|
||||
struct pvr_bo **const pvr_bo_out);
|
||||
|
||||
#endif /* PVR_CLEAR_H */
|
||||
|
|
|
|||
|
|
@ -1636,9 +1636,8 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
bool start_geom)
|
||||
void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
bool start_geom)
|
||||
{
|
||||
struct vk_dynamic_graphics_state *const dynamic_state =
|
||||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
|
|
@ -2165,7 +2164,7 @@ static VkResult pvr_init_render_targets(struct pvr_device *device,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static const struct pvr_renderpass_hwsetup_subpass *
|
||||
const struct pvr_renderpass_hwsetup_subpass *
|
||||
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
|
||||
{
|
||||
const struct pvr_renderpass_hw_map *map =
|
||||
|
|
|
|||
|
|
@ -1586,6 +1586,12 @@ void pvr_device_destroy_compute_query_programs(struct pvr_device *device);
|
|||
VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_query_info *query_info);
|
||||
|
||||
void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
bool start_geom);
|
||||
|
||||
const struct pvr_renderpass_hwsetup_subpass *
|
||||
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass);
|
||||
|
||||
#define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
|
||||
VK_FROM_HANDLE(__pvr_type, __name, __handle)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue