diff --git a/src/imagination/vulkan/pvr_blit.c b/src/imagination/vulkan/pvr_blit.c index ff4c3be6895..c770addff19 100644 --- a/src/imagination/vulkan/pvr_blit.c +++ b/src/imagination/vulkan/pvr_blit.c @@ -1834,7 +1834,7 @@ static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer, struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info; struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx; const struct pvr_renderpass_hwsetup_render *hw_render = - &pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx]; + pvr_pass_info_get_hw_render(pass_info, sub_cmd->hw_render_idx); const struct pvr_image_view *image_view; const struct pvr_image *image; uint32_t base_layer; @@ -1872,6 +1872,12 @@ static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer, assert(attachment->colorAttachment < hw_render->color_init_count); index = hw_render->color_init[attachment->colorAttachment].index; + image_view = pass_info->attachments[index]; + } else if (cmd_buffer->state.current_sub_cmd->is_dynamic_render) { + const struct pvr_dynamic_render_info *dr_info = pass_info->dr_info; + const uint32_t index = + dr_info->color_attachments[attachment->colorAttachment].index_color; + image_view = pass_info->attachments[index]; } else { const struct pvr_renderpass_hwsetup_subpass *hw_pass = @@ -1938,19 +1944,25 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, { const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass; struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info; - const struct pvr_renderpass_hwsetup_subpass *hw_pass = - pvr_get_hw_subpass(pass, pass_info->subpass_idx); struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx; struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info; - struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index]; + const struct pvr_renderpass_hwsetup_subpass *hw_pass; + bool vs_has_rt_id_output, multiview_enabled; uint32_t vs_output_size_in_bytes; - bool vs_has_rt_id_output; /* TODO: This function can be optimized so that most of the device memory * gets allocated together in one go and then filled as needed. There might * also be opportunities to reuse pds code and data segments. */ + if (pass) { + hw_pass = pvr_get_hw_subpass(pass, pass_info->subpass_idx); + multiview_enabled = pass->multiview_enabled; + } else { + multiview_enabled = pass_info->dr_info->hw_render.multiview_enabled; + hw_pass = NULL; + } + assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); pvr_reset_graphics_dirty_state(cmd_buffer, false); @@ -1959,7 +1971,7 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, sub_cmd->empty_cmd = false; vs_has_rt_id_output = pvr_clear_needs_rt_id_output(dev_info, - pass->multiview_enabled, + multiview_enabled, rect_count, rects); @@ -1978,6 +1990,7 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE]; + struct pvr_renderpass_hwsetup_render *hw_render; const struct usc_mrt_resource *mrt_resource; uint32_t global_attachment_idx; uint32_t local_attachment_idx; @@ -1985,19 +1998,31 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, local_attachment_idx = attachment->colorAttachment; + assert(cmd_buffer->state.current_sub_cmd->is_dynamic_render || + pass->hw_setup->render_count > 0); + hw_render = + pvr_pass_info_get_hw_render(&cmd_buffer->state.render_pass_info, 0); + + /* TODO: verify that the hw_render if is_render_init is true is exclusive to + * a non dynamic rendering path. + */ if (is_render_init) { - struct pvr_renderpass_hwsetup_render *hw_render; - - assert(pass->hw_setup->render_count > 0); - hw_render = &pass->hw_setup->renders[0]; - mrt_resource = &hw_render->init_setup.mrt_resources[local_attachment_idx]; assert(local_attachment_idx < hw_render->color_init_count); global_attachment_idx = hw_render->color_init[local_attachment_idx].index; + } else if (cmd_buffer->state.current_sub_cmd->is_dynamic_render) { + const struct pvr_dynamic_render_info *dr_info = pass_info->dr_info; + + mrt_resource = + &dr_info->mrt_setup->mrt_resources[local_attachment_idx]; + global_attachment_idx = + dr_info->color_attachments[local_attachment_idx].index_color; } else { + struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index]; + mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx]; assert(local_attachment_idx < sub_pass->color_count); @@ -2008,8 +2033,14 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, if (global_attachment_idx == VK_ATTACHMENT_UNUSED) continue; - assert(global_attachment_idx < pass->attachment_count); - format = pass->attachments[global_attachment_idx].vk_format; + if (cmd_buffer->state.current_sub_cmd->is_dynamic_render) { + const struct pvr_dynamic_render_info *dr_info = pass_info->dr_info; + + format = dr_info->attachments[global_attachment_idx].vk_format; + } else { + assert(global_attachment_idx < pass->attachment_count); + format = pass->attachments[global_attachment_idx].vk_format; + } assert(format != VK_FORMAT_UNDEFINED); @@ -2026,7 +2057,7 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, vs_has_rt_id_output); if (result != VK_SUCCESS) return; - } else if (hw_pass->z_replicate != -1 && + } else if (pass && hw_pass->z_replicate != -1 && attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { const VkClearColorValue clear_color = { .float32 = { [0] = attachment->clearValue.depthStencil.depth, }, @@ -2140,14 +2171,19 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) && (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) { - result = pvr_add_deferred_rta_clear(cmd_buffer, - attachment, - clear_rect, - is_render_init); - if (result != VK_SUCCESS) - return; - continue; + if (pass_info->attachments) { + result = pvr_add_deferred_rta_clear(cmd_buffer, + attachment, + clear_rect, + is_render_init); + if (result != VK_SUCCESS) + return; + + continue; + } else { + pvr_finishme("incomplete support for deferred (emulated) RTA clears"); + } } /* TODO: Allocate all the buffers in one go before the loop, and add diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 170aefff077..d7dbd497d53 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -101,18 +101,51 @@ struct pvr_compute_kernel_info { uint32_t max_instances; }; +static void +pvr_dynamic_render_info_destroy(const struct pvr_device *device, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_dynamic_render_info *dr_info); + +static void +pvr_cmd_buffer_clear_values_free(struct pvr_cmd_buffer *cmd_buffer); + +static void +pvr_cmd_buffer_attachments_free(struct pvr_cmd_buffer *cmd_buffer); + +struct pvr_renderpass_hwsetup_render * +pvr_pass_info_get_hw_render(const struct pvr_render_pass_info *render_pass_info, + uint32_t idx) +{ + if (render_pass_info->dr_info) + return &render_pass_info->dr_info->hw_render; + + return &render_pass_info->pass->hw_setup->renders[idx]; +} + static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd *sub_cmd) { + bool secondary_cont; + if (sub_cmd->owned) { switch (sub_cmd->type) { case PVR_SUB_CMD_TYPE_GRAPHICS: + secondary_cont = + cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && + (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); + util_dynarray_fini(&sub_cmd->gfx.sec_query_indices); pvr_csb_finish(&sub_cmd->gfx.control_stream); pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.terminate_ctrl_stream); pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.multiview_ctrl_stream); pvr_bo_suballoc_free(sub_cmd->gfx.depth_bias_bo); pvr_bo_suballoc_free(sub_cmd->gfx.scissor_bo); + if (sub_cmd->is_dynamic_render && !secondary_cont) { + pvr_rstate_entry_remove(cmd_buffer->device, sub_cmd->gfx.rstate); + pvr_dynamic_render_info_destroy(cmd_buffer->device, cmd_buffer, + sub_cmd->gfx.dr_info); + } break; case PVR_SUB_CMD_TYPE_COMPUTE: @@ -156,10 +189,8 @@ static void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer) static void pvr_cmd_buffer_free_resources(struct pvr_cmd_buffer *cmd_buffer) { - vk_free(&cmd_buffer->vk.pool->alloc, - cmd_buffer->state.render_pass_info.attachments); - vk_free(&cmd_buffer->vk.pool->alloc, - cmd_buffer->state.render_pass_info.clear_values); + pvr_cmd_buffer_attachments_free(cmd_buffer); + pvr_cmd_buffer_clear_values_free(cmd_buffer); util_dynarray_fini(&cmd_buffer->state.query_indices); @@ -1546,12 +1577,10 @@ pvr_is_render_area_tile_aligned(const struct pvr_cmd_buffer *cmd_buffer, render_area->extent.width == iview->vk.extent.width; } -static inline struct pvr_renderpass_hwsetup_render * -pvr_pass_info_get_hw_render(const struct pvr_render_pass_info *render_pass_info, - uint32_t idx) -{ - return &render_pass_info->pass->hw_setup->renders[idx]; -} +static VkResult +pvr_render_targets_init(struct pvr_device *device, + struct pvr_render_state *rstate, + const struct pvr_renderpass_hwsetup_render *hw_render); static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, struct pvr_cmd_buffer *cmd_buffer, @@ -1703,9 +1732,17 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, spm_eot_state->pixel_event_program_data_offset; } - render_target = pvr_get_render_target(hw_render, render_pass_info->rstate); + render_target = pvr_get_render_target(hw_render, rstate); job->view_state.rt_datasets = &render_target->rt_dataset[0]; + if (cmd_buffer->state.current_sub_cmd->is_dynamic_render) { + result = pvr_render_targets_init(cmd_buffer->device, rstate, hw_render); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + return result; + } + } + job->ctrl_stream_addr = pvr_csb_get_start_address(&sub_cmd->control_stream); if (sub_cmd->depth_bias_bo) @@ -1926,8 +1963,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, job->frag_uses_atomic_ops = sub_cmd->frag_uses_atomic_ops; job->disable_compute_overlap = false; job->max_shared_registers = cmd_buffer->state.max_shared_regs; - job->run_frag = true; - job->geometry_terminate = true; + + if (!cmd_buffer->state.current_sub_cmd->is_suspend) { + assert(cmd_buffer->state.current_sub_cmd->type == + PVR_SUB_CMD_TYPE_GRAPHICS); + job->geometry_terminate = true; + job->run_frag = true; + } /* TODO: Enable pixel merging when it's safe to do. */ job->disable_pixel_merging = true; @@ -2331,6 +2373,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) const struct pvr_query_pool *query_pool = NULL; struct pvr_suballoc_bo *query_bo = NULL; size_t query_indices_size = 0; + bool suspend, dynamic_render; VkResult result; /* FIXME: Is this NULL check required because this function is called from @@ -2375,6 +2418,8 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) query_pool = gfx_sub_cmd->query_pool; } + dynamic_render = sub_cmd->is_dynamic_render; + suspend = sub_cmd->is_suspend; gfx_sub_cmd->has_query = true; util_dynarray_clear(&state->query_indices); @@ -2389,7 +2434,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) } /* TODO: Check if the sub_cmd can be skipped based on - * sub_cmd->gfx.empty_cmd flag. + * sub_cmd->gfx.empty_cmd flag and if dynamic rendering isn't enabled. */ /* TODO: Set the state in the functions called with the command buffer @@ -2498,6 +2543,9 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) query_info.availability_write.availability_bo = query_pool->availability_buffer; + query_info.is_dynamic_render = dynamic_render; + query_info.is_suspend = suspend; + /* Insert a barrier after the graphics sub command and before the * query sub command so that the availability write program waits for the * fragment shader to complete. @@ -2655,13 +2703,26 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, isp_max_tiles_in_flight, 1); - sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass; sub_cmd->gfx.rstate = state->render_pass_info.rstate; sub_cmd->gfx.empty_cmd = true; sub_cmd->gfx.view_mask = pvr_render_pass_info_get_view_mask(&state->render_pass_info); - sub_cmd->gfx.multiview_enabled = - state->render_pass_info.pass->multiview_enabled; + if (state->render_pass_info.dynamic_render) { + sub_cmd->is_suspend = state->render_pass_info.suspend; + sub_cmd->is_resume = state->render_pass_info.resume; + sub_cmd->is_dynamic_render = true; + sub_cmd->gfx.hw_render = state->render_pass_info.hw_render; + sub_cmd->gfx.multiview_enabled = + state->render_pass_info.hw_render->multiview_enabled; + sub_cmd->gfx.hw_render_idx = 0; + sub_cmd->gfx.dr_info = state->render_pass_info.dr_info; + } else { + sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass; + sub_cmd->gfx.multiview_enabled = + state->render_pass_info.pass + ? state->render_pass_info.pass->multiview_enabled + : false; + } if (state->vis_test_enabled) sub_cmd->gfx.query_pool = state->query_pool; @@ -3040,11 +3101,16 @@ void pvr_CmdPushConstants2KHR(VkCommandBuffer commandBuffer, } } -static inline void +static void pvr_cmd_buffer_attachments_free(struct pvr_cmd_buffer *cmd_buffer) { struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + if (!state->render_pass_info.attachments) { + assert(!state->render_pass_info.attachment_count); + return; + } + vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments); state->render_pass_info.attachment_count = 0; state->render_pass_info.attachments = NULL; @@ -3058,7 +3124,7 @@ static VkResult pvr_cmd_buffer_attachments_alloc( struct pvr_render_pass_info *info = &state->render_pass_info; /* Free any previously allocated attachments. */ - vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments); + pvr_cmd_buffer_attachments_free(cmd_buffer); state->render_pass_info.attachment_count = attachment_count; if (attachment_count == 0) { @@ -3202,7 +3268,8 @@ static void pvr_perform_start_of_render_attachment_clear( uint32_t layers, uint32_t index, bool is_depth_stencil, - uint32_t *index_list_clear_mask) + uint32_t *index_list_clear_mask, + bool resume_render) { ASSERTED static const VkImageAspectFlags dsc_aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT | @@ -3281,7 +3348,7 @@ static void pvr_perform_start_of_render_attachment_clear( image_aspect &= ~VK_IMAGE_ASPECT_STENCIL_BIT; } - if (image_aspect != VK_IMAGE_ASPECT_NONE) { + if (image_aspect != VK_IMAGE_ASPECT_NONE && !resume_render) { VkClearAttachment clear_attachment = { .aspectMask = image_aspect, .colorAttachment = index, @@ -3308,12 +3375,21 @@ static void pvr_perform_start_of_render_attachment_clear( } static void -pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer) +pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer, + bool resume_render) { struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info; - const struct pvr_renderpass_hwsetup *hw_setup = info->pass->hw_setup; - const struct pvr_renderpass_hwsetup_render *hw_render = - pvr_pass_info_get_hw_render(info, hw_setup->subpass_map[info->subpass_idx].render); + const struct pvr_renderpass_hwsetup_render *hw_render; + const struct pvr_render_pass *pass = info->pass; + + if (pass) { + const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup; + + hw_render = + &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render]; + } else { + hw_render = pvr_pass_info_get_hw_render(info, 0); + } /* Mask of attachment clears using index lists instead of background object * to clear. @@ -3325,7 +3401,8 @@ pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer) info->rstate->layers, i, false, - &index_list_clear_mask); + &index_list_clear_mask, + resume_render); } info->enable_bg_tag = !!hw_render->color_init_count; @@ -3347,7 +3424,8 @@ pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer) info->rstate->layers, 0, true, - &ds_index_list); + &ds_index_list, + resume_render); } } @@ -3390,11 +3468,16 @@ static bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup) return false; } -static inline void +static void pvr_cmd_buffer_clear_values_free(struct pvr_cmd_buffer *cmd_buffer) { struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + if (!state->render_pass_info.clear_values) { + assert(!state->render_pass_info.clear_value_count); + return; + } + vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values); state->render_pass_info.clear_value_count = 0; state->render_pass_info.clear_values = NULL; @@ -3407,7 +3490,7 @@ pvr_cmd_buffer_clear_values_alloc(struct pvr_cmd_buffer *cmd_buffer, struct pvr_cmd_buffer_state *state = &cmd_buffer->state; /* Free any previously allocated clear values. */ - vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values); + pvr_cmd_buffer_clear_values_free(cmd_buffer); state->render_pass_info.clear_value_count = clear_value_count; if (!clear_value_count) { @@ -3749,23 +3832,25 @@ pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer, pvr_resolve_image_copy_region_init(src_view, dst_view, info, ®ion); - resolve_attachment = - info->pass->attachments[hw_render->ds_attach_resolve_idx]; + if (!info->dr_info) { + resolve_attachment = + info->pass->attachments[hw_render->ds_attach_resolve_idx]; - if (both_depth && !hw_render->depth_resolve_mode && - resolve_attachment.store_op == VK_ATTACHMENT_STORE_OP_STORE && - resolve_attachment.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_values.depth = resolve_clear_values->depthStencil.depth; - clear_depth = true; - } + if (both_depth && !hw_render->depth_resolve_mode && + resolve_attachment.store_op == VK_ATTACHMENT_STORE_OP_STORE && + resolve_attachment.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_values.depth = resolve_clear_values->depthStencil.depth; + clear_depth = true; + } - if (both_stencil && !hw_render->stencil_resolve_mode && - resolve_attachment.stencil_store_op == - VK_ATTACHMENT_STORE_OP_STORE && - resolve_attachment.stencil_load_op == - VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_values.stencil = resolve_clear_values->depthStencil.stencil; - clear_stencil = true; + if (both_stencil && !hw_render->stencil_resolve_mode && + resolve_attachment.stencil_store_op == + VK_ATTACHMENT_STORE_OP_STORE && + resolve_attachment.stencil_load_op == + VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_values.stencil = resolve_clear_values->depthStencil.stencil; + clear_stencil = true; + } } /* Resolve depth and if it can clear stencil */ @@ -3898,11 +3983,1097 @@ void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, return; } - pvr_perform_start_of_render_clears(cmd_buffer); + pvr_perform_start_of_render_clears(cmd_buffer, false); pvr_stash_depth_format(&cmd_buffer->state, &cmd_buffer->state.current_sub_cmd->gfx); } +static inline uint32_t pvr_get_ds_component_bits(const VkFormat vk_format, + const VkImageAspectFlags ds_aspect) +{ + uint32_t component; + + if (ds_aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(vk_format_has_stencil(vk_format)); + component = 0; + } else { + assert(ds_aspect == VK_IMAGE_ASPECT_DEPTH_BIT); + assert(vk_format_has_depth(vk_format)); + component = 1; + } + + return vk_format_get_component_bits(vk_format, + UTIL_FORMAT_COLORSPACE_ZS, + component); +} + +static inline bool pvr_can_pbe_resolve_ds_attachment( + const struct pvr_device_info *dev_info, + VkFormat vk_format, + uint32_t sample_count, + VkResolveModeFlagBits resolve_mode) +{ + if (!PVR_HAS_FEATURE(dev_info, gs_rta_support)) + return false; + + if (resolve_mode != VK_RESOLVE_MODE_AVERAGE_BIT) + return false; + + if (pvr_get_ds_component_bits(vk_format, VK_IMAGE_ASPECT_STENCIL_BIT)) + return false; + + return pvr_format_is_pbe_downscalable(dev_info, vk_format); +} + +static inline VkResult pvr_mrt_setup_partial_init( + struct pvr_device *const device, + struct usc_mrt_setup *mrt_setup, + uint32_t num_renger_targets, + uint32_t num_output_regs, + uint32_t num_tile_buffers) +{ + struct usc_mrt_resource *mrt_resources = NULL; + + if (num_renger_targets) { + const uint32_t size = + num_renger_targets * sizeof(*mrt_setup->mrt_resources); + + mrt_resources = vk_zalloc(&device->vk.alloc, + size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!mrt_resources) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + *mrt_setup = (struct usc_mrt_setup){ + .num_render_targets = num_renger_targets, + .num_output_regs = num_output_regs, + .num_tile_buffers = num_tile_buffers, + .mrt_resources = mrt_resources, + }; + + return VK_SUCCESS; +} + +static void pvr_dynamic_rendering_output_attachments_cleanup( + const struct pvr_device *device, + const VkAllocationCallbacks *const allocator, + struct pvr_dynamic_render_info *dr_info) +{ + if (!dr_info) + return; + + pvr_mrt_load_op_state_cleanup(device, allocator, + dr_info->hw_render.load_op_state); + + pvr_destroy_mrt_setup(device, &dr_info->hw_render.eot_setup); + pvr_destroy_mrt_setup(device, &dr_info->hw_render.init_setup); + pvr_destroy_mrt_setup(device, dr_info->mrt_setup); + + vk_free2(&device->vk.alloc, allocator, dr_info->hw_render.eot_surfaces); + vk_free2(&device->vk.alloc, allocator, dr_info->hw_render.color_init); + vk_free2(&device->vk.alloc, allocator, dr_info->color_attachments); +} + +static VkResult pvr_dynamic_rendering_output_attachments_setup( + struct pvr_cmd_buffer *cmd_buffer, + const VkRenderingInfo *pRenderingInfo, + struct pvr_dynamic_render_info *dr_info, + uint32_t *attach_idx) +{ + VkFormat attachment_formats[pRenderingInfo->colorAttachmentCount]; + uint32_t mrt_attachment_map[pRenderingInfo->colorAttachmentCount]; + uint32_t mrt_count = 0, eot_mrt_count = 0, eot_mrt_idx = 0, + eot_surface_idx = 0, color_idx = 0, pbe_emits = 0, + init_setup_idx = 0; + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_device *device = cmd_buffer->device; + struct usc_mrt_setup mrt_setup; + VkResult result; + + dr_info->color_attachment_count = pRenderingInfo->colorAttachmentCount; + + if (dr_info->color_attachment_count) { + const uint32_t size = sizeof(*dr_info->color_attachments) * + dr_info->color_attachment_count; + + dr_info->color_attachments = vk_zalloc(&device->vk.alloc, + size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dr_info->color_attachments) { + return vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { + uint32_t cur_attach_idx = *attach_idx; + struct pvr_render_pass_attachment *attachment = + &dr_info->attachments[cur_attach_idx]; + const VkRenderingAttachmentInfo *attachment_info = + &pRenderingInfo->pColorAttachments[i]; + const struct pvr_image *image; + struct pvr_image_view *iview; + + /* Start off clean */ + dr_info->color_attachments[i].index_color = VK_ATTACHMENT_UNUSED; + dr_info->color_attachments[i].index_resolve = VK_ATTACHMENT_UNUSED; + attachment_formats[mrt_count] = VK_FORMAT_UNDEFINED; + mrt_attachment_map[i] = VK_ATTACHMENT_UNUSED; + + if (attachment_info->imageView) + iview = pvr_image_view_from_handle(attachment_info->imageView); + else if (attachment_info->resolveImageView) + iview = pvr_image_view_from_handle(attachment_info->resolveImageView); + else + continue; + + /* We have an output color attachment */ + image = pvr_image_view_get_image(iview); + + state->render_pass_info.attachments[cur_attach_idx] = iview; + state->render_pass_info.rstate->height = + MIN2(state->render_pass_info.rstate->height, image->vk.extent.height); + state->render_pass_info.rstate->width = + MIN2(state->render_pass_info.rstate->width, image->vk.extent.width); + + mrt_attachment_map[i] = mrt_count; + attachment_formats[mrt_count++] = iview->vk.view_format; + + attachment->index = cur_attach_idx; + attachment->vk_format = iview->vk.view_format; + attachment->sample_count = image->vk.samples; + attachment->resolve_target = VK_ATTACHMENT_UNUSED; + attachment->load_op = attachment_info->loadOp; + attachment->store_op = attachment_info->storeOp; + attachment->resolve_mode = attachment_info->resolveMode; + attachment->stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment->stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment->stencil_resolve_mode = VK_RESOLVE_MODE_NONE; + attachment->need_eot = true; + /* TODO: verify logic for is_pbe_downscalable */ + attachment->is_pbe_downscalable = pvr_can_pbe_resolve_ds_attachment( + &cmd_buffer->device->pdevice->dev_info, + iview->vk.view_format, + image->vk.samples, + attachment_info->resolveMode); + + dr_info->color_attachments[i].index_color = cur_attach_idx; + dr_info->hw_render.sample_count = + MAX2(dr_info->hw_render.sample_count, attachment->sample_count); + + *attach_idx += 1; + + if (attachment->resolve_mode && + cur_attach_idx < dr_info->attachment_count) { + const uint32_t resolve_attach_idx = *attach_idx; + struct pvr_render_pass_attachment *resolve_attach = + &dr_info->attachments[resolve_attach_idx]; + struct pvr_image_view *resolve_iview = + pvr_image_view_from_handle(attachment_info->resolveImageView); + + assert(resolve_iview && + resolve_attach_idx <= dr_info->attachment_count); + + state->render_pass_info.attachments[resolve_attach_idx] = + resolve_iview; + + attachment->resolve_target = resolve_attach_idx; + + dr_info->color_attachments[i].index_resolve = resolve_attach_idx; + dr_info->hw_render.has_side_effects = true; + dr_info->hw_render.requires_frag_pr = true; + dr_info->hw_render.eot_surface_count++; + + resolve_attach->sample_count = image->vk.samples; + resolve_attach->index = resolve_attach_idx; + resolve_attach->vk_format = resolve_iview->vk.view_format; + resolve_attach->load_op = + resolve_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + resolve_attach->store_op = + resolve_attach->stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + resolve_attach->resolve_mode = + resolve_attach->stencil_resolve_mode = VK_RESOLVE_MODE_NONE; + resolve_attach->resolve_target = VK_ATTACHMENT_UNUSED; + + /* If resolving with a tranfer, then always store the source */ + if (!attachment->is_pbe_downscalable) + attachment->store_op = VK_ATTACHMENT_STORE_OP_STORE; + + *attach_idx += 1; + } + + if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + VkClearColorValue *clear_value = + &state->render_pass_info.clear_values[cur_attach_idx].color; + + *clear_value = attachment_info->clearValue.color; + + if (attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) + dr_info->hw_render.has_side_effects = true; + } + + if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR || + attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { + dr_info->hw_render.color_init_count++; + } + + if (attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) + dr_info->hw_render.eot_surface_count++; + else + dr_info->hw_render.requires_frag_pr = true; + + if (attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE || + attachment->resolve_mode) { + eot_mrt_count++; + } + } + + if (dr_info->hw_render.color_init_count) { + const uint32_t size = dr_info->hw_render.color_init_count * + sizeof(*dr_info->hw_render.color_init); + + dr_info->hw_render.color_init = vk_zalloc(&device->vk.alloc, + size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dr_info->hw_render.color_init) { + result = vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_color_attachments; + } + } + + if (dr_info->hw_render.eot_surface_count) { + const uint32_t size = dr_info->hw_render.eot_surface_count * + sizeof(*dr_info->hw_render.eot_surfaces); + + dr_info->hw_render.eot_surfaces = vk_zalloc(&device->vk.alloc, + size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dr_info->hw_render.eot_surfaces) { + result = vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_hw_render_color_init; + } + } + + result = + pvr_init_usc_mrt_setup(device, mrt_count, attachment_formats, &mrt_setup); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_hw_render_eot_surface; + } + + dr_info->hw_render.tile_buffers_count = mrt_setup.num_tile_buffers; + dr_info->hw_render.output_regs_count = mrt_setup.num_output_regs; + + result = pvr_mrt_setup_partial_init(cmd_buffer->device, + dr_info->mrt_setup, + pRenderingInfo->colorAttachmentCount, + dr_info->hw_render.output_regs_count, + dr_info->hw_render.tile_buffers_count); + if (result != VK_SUCCESS) { + pvr_destroy_mrt_setup(device, &mrt_setup); + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_hw_render_eot_surface; + } + + /* We fully initialised only a mrt_count number of render targets, + * now scatter gather them over the whole number of attachments. + * This is necessary because render targets are attachment-indexed + * throughout the driver. + */ + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { + if (mrt_attachment_map[i] != VK_ATTACHMENT_UNUSED) { + memcpy(&dr_info->mrt_setup->mrt_resources[i], + &mrt_setup.mrt_resources[mrt_attachment_map[i]], + sizeof(mrt_setup.mrt_resources[0])); + } + } + + result = pvr_mrt_setup_partial_init(cmd_buffer->device, + &dr_info->hw_render.init_setup, + dr_info->hw_render.color_init_count, + dr_info->hw_render.output_regs_count, + dr_info->hw_render.tile_buffers_count); + if (result != VK_SUCCESS) { + pvr_destroy_mrt_setup(device, &mrt_setup); + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_finish_mrt_setup; + } + + result = pvr_mrt_setup_partial_init(cmd_buffer->device, + &dr_info->hw_render.eot_setup, + eot_mrt_count, + dr_info->hw_render.output_regs_count, + dr_info->hw_render.tile_buffers_count); + if (result != VK_SUCCESS) { + pvr_destroy_mrt_setup(device, &mrt_setup); + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_finish_mrt_init_setup; + } + + for (uint32_t i = 0; i < dr_info->attachment_count; i++) { + struct pvr_render_pass_attachment *attachment = &dr_info->attachments[i]; + bool need_eot_setup = false; + + if (!attachment->need_eot) + continue; + + if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR || + attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { + dr_info->hw_render.color_init[init_setup_idx].index = i; + dr_info->hw_render.color_init[init_setup_idx].op = attachment->load_op; + dr_info->hw_render.init_setup.mrt_resources[init_setup_idx] = + mrt_setup.mrt_resources[color_idx]; + init_setup_idx++; + } + + if (attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE || + (attachment->resolve_mode && !attachment->is_pbe_downscalable)) { + struct pvr_renderpass_hwsetup_eot_surface *surface = + &dr_info->hw_render.eot_surfaces[eot_surface_idx]; + + surface->src_attachment_idx = VK_ATTACHMENT_UNUSED; + surface->resolve_type = PVR_RESOLVE_TYPE_INVALID; + surface->mrt_idx = eot_mrt_idx; + surface->attachment_idx = i; + + need_eot_setup = true; + eot_surface_idx++; + pbe_emits++; + } + + if (attachment->resolve_mode) { + struct pvr_renderpass_hwsetup_eot_surface *surface = + &dr_info->hw_render.eot_surfaces[eot_surface_idx]; /* next surface */ + + surface->src_attachment_idx = i; + surface->mrt_idx = eot_mrt_idx; + surface->need_resolve = true; + surface->attachment_idx = attachment->resolve_target; + + if (attachment->store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE && + attachment->is_pbe_downscalable) { + surface->resolve_type = PVR_RESOLVE_TYPE_PBE; + pbe_emits++; + } else if (!attachment->is_pbe_downscalable) { + surface->resolve_type = PVR_RESOLVE_TYPE_TRANSFER; + } else { + surface->resolve_type = PVR_RESOLVE_TYPE_INVALID; + } + + need_eot_setup = true; + eot_surface_idx++; + } + + if (need_eot_setup) { + dr_info->hw_render.eot_setup.mrt_resources[eot_mrt_idx++] = + mrt_setup.mrt_resources[color_idx]; + } + + color_idx++; + } + + assert(dr_info->hw_render.eot_surface_count == eot_surface_idx); + assert(pbe_emits <= PVR_MAX_COLOR_ATTACHMENTS); + + for (uint32_t i = 0; i < eot_surface_idx; i++) { + struct pvr_renderpass_hwsetup_eot_surface *surface = + &dr_info->hw_render.eot_surfaces[i]; + struct pvr_render_pass_attachment *attachment; + + if (!surface->need_resolve || + surface->resolve_type != PVR_RESOLVE_TYPE_INVALID) { + continue; + } + + assert(surface->src_attachment_idx != VK_ATTACHMENT_UNUSED); + + attachment = &dr_info->attachments[surface->src_attachment_idx]; + if (attachment->resolve_mode) { + if (pbe_emits < PVR_MAX_COLOR_ATTACHMENTS) { + surface->resolve_type = PVR_RESOLVE_TYPE_PBE; + pbe_emits++; + } else { + surface->resolve_type = PVR_RESOLVE_TYPE_TRANSFER; + } + } + } + + dr_info->hw_render.pbe_emits = pbe_emits; + + pvr_destroy_mrt_setup(device, &mrt_setup); + + return VK_SUCCESS; + +err_finish_mrt_init_setup: + pvr_destroy_mrt_setup(device, &dr_info->hw_render.init_setup); + +err_finish_mrt_setup: + pvr_destroy_mrt_setup(device, dr_info->mrt_setup); + +err_free_hw_render_eot_surface: + vk_free(&device->vk.alloc, dr_info->hw_render.eot_surfaces); + +err_free_hw_render_color_init: + vk_free(&device->vk.alloc, dr_info->hw_render.color_init); + +err_free_color_attachments: + vk_free(&device->vk.alloc, dr_info->color_attachments); + + return result; +} + +static void +pvr_dynamic_render_info_destroy(const struct pvr_device *device, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_dynamic_render_info *dr_info) +{ + pvr_dynamic_rendering_output_attachments_cleanup( + device, + &device->vk.alloc, + dr_info); + + pvr_cmd_buffer_clear_values_free(cmd_buffer); + pvr_cmd_buffer_attachments_free(cmd_buffer); + + if (dr_info) + vk_free(&device->vk.alloc, dr_info->attachments); + + vk_free(&device->vk.alloc, dr_info); +} + +static VkResult +pvr_dynamic_render_info_create(struct pvr_cmd_buffer *cmd_buffer, + const VkRenderingInfo *pRenderingInfo, + struct pvr_dynamic_render_info **dr_info_out) +{ + const bool has_stencil_resolve_iview = + pRenderingInfo->pStencilAttachment && + pRenderingInfo->pStencilAttachment->resolveMode && + pRenderingInfo->pStencilAttachment->resolveImageView; + const bool has_depth_resolve_iview = + pRenderingInfo->pDepthAttachment && + pRenderingInfo->pDepthAttachment->resolveMode && + pRenderingInfo->pDepthAttachment->resolveImageView; + const bool has_stencil_iview = + pRenderingInfo->pStencilAttachment && + pRenderingInfo->pStencilAttachment->imageView; + const bool has_depth_iview = + pRenderingInfo->pDepthAttachment && + pRenderingInfo->pDepthAttachment->imageView; + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_device *device = cmd_buffer->device; + struct pvr_dynamic_render_info *dr_info; + struct pvr_render_state *rstate; + struct usc_mrt_setup *mrt_setup; + uint32_t attach_idx = 0; + VkResult result; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &dr_info, __typeof__(*dr_info), 1); + vk_multialloc_add(&ma, &mrt_setup, __typeof__(*mrt_setup), 1); + if (!vk_multialloc_zalloc(&ma, + &device->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) { + return vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { + const VkRenderingAttachmentInfo *attachment_info = + &pRenderingInfo->pColorAttachments[i]; + + if (attachment_info->imageView) + dr_info->attachment_count++; + + if (attachment_info->resolveMode && attachment_info->resolveImageView) + dr_info->attachment_count++; + } + + dr_info->attachment_count += has_depth_iview || has_stencil_iview; + dr_info->attachment_count += has_depth_resolve_iview || has_stencil_resolve_iview; + dr_info->mrt_setup = mrt_setup; + + rstate = vk_zalloc(&device->vk.alloc, sizeof(*rstate), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!rstate) { + result = vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_dr_info; + } + + *rstate = (struct pvr_render_state){ + .width = UINT32_MAX, + .height = UINT32_MAX, + .width_alignment = 1, + .height_alignment = 1, + }; + + state->render_pass_info.rstate = rstate; + + if (dr_info->attachment_count) { + dr_info->attachments = + vk_zalloc(&device->vk.alloc, + sizeof(*dr_info->attachments) * dr_info->attachment_count, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dr_info->attachments) { + result = vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_rstate; + } + + result = pvr_cmd_buffer_attachments_alloc(cmd_buffer, dr_info->attachment_count); + if (result != VK_SUCCESS) { + result = vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_dr_info_attachments; + } + + result = pvr_cmd_buffer_clear_values_alloc(cmd_buffer, dr_info->attachment_count); + if (result != VK_SUCCESS){ + result = vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_cmd_buffer_attachments; + } + } else { + pvr_cmd_buffer_clear_values_free(cmd_buffer); + pvr_cmd_buffer_attachments_free(cmd_buffer); + } + + dr_info->hw_render = (struct pvr_renderpass_hwsetup_render){ + .ds_attach_resolve_idx = VK_ATTACHMENT_UNUSED, + .ds_attach_idx = VK_ATTACHMENT_UNUSED, + .depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .multiview_enabled = !!pRenderingInfo->viewMask, + .view_mask = MAX2(1, pRenderingInfo->viewMask), + .sample_count = 1, + }; + + if (has_depth_iview || has_stencil_iview) { + struct pvr_render_pass_attachment *ds_attach = &dr_info->attachments[attach_idx]; + VkClearDepthStencilValue *clear_value; + const struct pvr_image *image; + struct pvr_image_view *iview; + + ds_attach->index = attach_idx; + + ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + ds_attach->store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + if (has_depth_iview) { + iview = pvr_image_view_from_handle( + pRenderingInfo->pDepthAttachment->imageView); + ds_attach->is_depth = true; + ds_attach->load_op = pRenderingInfo->pDepthAttachment->loadOp; + ds_attach->store_op = pRenderingInfo->pDepthAttachment->storeOp; + ds_attach->resolve_mode = pRenderingInfo->pDepthAttachment->resolveMode; + } + + ds_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + ds_attach->stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + if (has_stencil_iview) { + iview = pvr_image_view_from_handle( + pRenderingInfo->pStencilAttachment->imageView); + ds_attach->is_stencil = true; + ds_attach->stencil_load_op = pRenderingInfo->pStencilAttachment->loadOp; + ds_attach->stencil_store_op = pRenderingInfo->pStencilAttachment->storeOp; + ds_attach->stencil_resolve_mode = pRenderingInfo->pStencilAttachment->resolveMode; + } + + image = pvr_image_view_get_image(iview); + + ds_attach->vk_format = iview->vk.view_format; + ds_attach->sample_count = image->vk.samples; + ds_attach->resolve_target = VK_ATTACHMENT_UNUSED; + + state->render_pass_info.attachments[attach_idx] = iview; + state->render_pass_info.rstate->height = + MIN2(state->render_pass_info.rstate->height, image->vk.extent.height); + state->render_pass_info.rstate->width = + MIN2(state->render_pass_info.rstate->width, image->vk.extent.width); + + dr_info->hw_render.ds_attach_idx = attach_idx; + dr_info->hw_render.depth_init = ds_attach->load_op; + dr_info->hw_render.depth_store = + ds_attach->store_op == VK_ATTACHMENT_STORE_OP_STORE; + dr_info->hw_render.depth_resolve_mode = ds_attach->resolve_mode; + + dr_info->hw_render.stencil_init = ds_attach->stencil_load_op; + dr_info->hw_render.stencil_store = + ds_attach->stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE; + dr_info->hw_render.sample_count = + MAX2(dr_info->hw_render.sample_count, ds_attach->sample_count); + dr_info->hw_render.stencil_resolve_mode = ds_attach->stencil_resolve_mode; + + clear_value = &state->render_pass_info.clear_values[attach_idx].depthStencil; + + if (ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_value->depth = + pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth; + if (ds_attach->store_op == VK_ATTACHMENT_STORE_OP_STORE) + dr_info->hw_render.has_side_effects = true; + } + + if (ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_value->stencil = + pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil; + if (ds_attach->stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE) + dr_info->hw_render.has_side_effects = true; + } + + attach_idx++; + + if (ds_attach->resolve_mode || ds_attach->stencil_resolve_mode) { + struct pvr_render_pass_attachment *resolve_ds_attach = + &dr_info->attachments[attach_idx]; + struct pvr_image_view *resolve_iview; + + ds_attach->resolve_target = attach_idx; + + if (has_depth_resolve_iview) { + resolve_iview = pvr_image_view_from_handle( + pRenderingInfo->pDepthAttachment->resolveImageView); + } else if (has_stencil_resolve_iview) { + resolve_iview = pvr_image_view_from_handle( + pRenderingInfo->pStencilAttachment->resolveImageView); + } else { + UNREACHABLE("invalid image view for DS resolve attachment"); + } + + state->render_pass_info.attachments[attach_idx] = resolve_iview; + + dr_info->hw_render.ds_attach_resolve_idx = attach_idx; + dr_info->hw_render.has_side_effects = true; + + assert(iview->vk.view_format == resolve_iview->vk.view_format); + + resolve_ds_attach->vk_format = ds_attach->vk_format; + resolve_ds_attach->sample_count = 1; + resolve_ds_attach->index = attach_idx; + resolve_ds_attach->resolve_target = VK_ATTACHMENT_UNUSED; + resolve_ds_attach->is_stencil = ds_attach->is_stencil; + resolve_ds_attach->store_op = ds_attach->store_op; + resolve_ds_attach->stencil_store_op = ds_attach->stencil_store_op; + resolve_ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + resolve_ds_attach->stencil_load_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + + attach_idx++; + } + } + + result = pvr_dynamic_rendering_output_attachments_setup(cmd_buffer, + pRenderingInfo, + dr_info, + &attach_idx); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_cmd_buffer_clear_values; + } + + if (state->render_pass_info.rstate->height == UINT32_MAX) { + state->render_pass_info.rstate->height = pRenderingInfo->renderArea.offset.y + + pRenderingInfo->renderArea.extent.height; + } + + if (state->render_pass_info.rstate->width == UINT32_MAX) { + state->render_pass_info.rstate->width = pRenderingInfo->renderArea.offset.x + + pRenderingInfo->renderArea.extent.width; + } + + *dr_info_out = dr_info; + + /* FIXME: When adding support for caching rt datasets and render states, + * remove this and handle it with a proper hash table. + */ + pvr_rstate_entry_add(device, rstate); + + return VK_SUCCESS; + +err_free_cmd_buffer_clear_values: + pvr_cmd_buffer_clear_values_free(cmd_buffer); + +err_free_cmd_buffer_attachments: + pvr_cmd_buffer_attachments_free(cmd_buffer); + +err_free_dr_info_attachments: + vk_free(&device->vk.alloc, dr_info->attachments); + +err_free_rstate: + vk_free(&device->vk.alloc, rstate); + +err_free_dr_info: + vk_free(&device->vk.alloc, dr_info); + + return result; +} + +static inline uint64_t +pvr_render_pass_info_get_scratch_buffer_size(struct pvr_device *device, + const struct pvr_render_pass_info *info) +{ + return pvr_spm_scratch_buffer_calc_required_size( + &info->dr_info->hw_render, + 1, + info->dr_info->hw_render.sample_count, + info->rstate->width, + info->rstate->height); +} + +void pvr_CmdBeginRendering(VkCommandBuffer commandBuffer, + const VkRenderingInfo *pRenderingInfo) +{ + VK_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_device *const device = cmd_buffer->device; + struct pvr_dynamic_render_info *dr_info = NULL; + struct pvr_sub_cmd_gfx *sub_cmd; + bool resume, suspend; + VkResult result; + + /* TODO: Check not in renderpess? */ + + suspend = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT_KHR; + resume = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT_KHR; + + if (state->current_sub_cmd && resume) { + assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); + assert(state->current_sub_cmd->is_dynamic_render); + assert(state->current_sub_cmd->is_suspend); + + if (!suspend) + state->current_sub_cmd->is_suspend = false; + + return; + } + + result = pvr_dynamic_render_info_create(cmd_buffer, pRenderingInfo, &dr_info); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + return; + } + + state->render_pass_info.dr_info = dr_info; + state->render_pass_info.hw_render = &dr_info->hw_render; + state->render_pass_info.render_area = (VkRect2D){ + .extent = pRenderingInfo->renderArea.extent, + .offset = (VkOffset2D){ + .x = MAX2(0, pRenderingInfo->renderArea.offset.x), + .y = MAX2(0, pRenderingInfo->renderArea.offset.y), + }, + }; + state->render_pass_info.pipeline_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + state->render_pass_info.dynamic_render = true; + state->render_pass_info.suspend = suspend; + state->render_pass_info.resume = resume; + state->render_pass_info.current_hw_subpass = -1; + + if (dr_info->hw_render.tile_buffers_count) { + result = pvr_device_tile_buffer_ensure_cap( + device, + dr_info->hw_render.tile_buffers_count); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_destroy_dynamic_render_info; + } + } + + if (pRenderingInfo->pDepthAttachment && + pRenderingInfo->pDepthAttachment->imageView) { + struct pvr_image_view *iview = pvr_image_view_from_handle( + pRenderingInfo->pDepthAttachment->imageView); + state->depth_format = iview->vk.format; + } else if (pRenderingInfo->pStencilAttachment && + pRenderingInfo->pStencilAttachment->imageView) { + struct pvr_image_view *iview = pvr_image_view_from_handle( + pRenderingInfo->pStencilAttachment->imageView); + state->depth_format = iview->vk.format; + } else { + state->depth_format = VK_FORMAT_UNDEFINED; + } + + if (dr_info->color_attachment_count || + dr_info->hw_render.ds_attach_idx != VK_ATTACHMENT_UNUSED) { + state->render_pass_info.sample_count = dr_info->hw_render.sample_count; + } + + state->render_pass_info.rstate->layers = + !pRenderingInfo->viewMask ? pRenderingInfo->layerCount : 1; + state->render_pass_info.rstate->scratch_buffer_size = + pvr_render_pass_info_get_scratch_buffer_size(device, + &state->render_pass_info); + + result = pvr_render_state_setup(device, + NULL, + state->render_pass_info.rstate, + 1, + &dr_info->hw_render); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_cleanup_tile_buffers; + } + + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_cleanup_render_state; + } + + sub_cmd = &state->current_sub_cmd->gfx; + sub_cmd->rstate = state->render_pass_info.rstate; + sub_cmd->dr_info = dr_info; + assert(sub_cmd->dr_info); + + result = pvr_mrt_load_ops_setup(cmd_buffer, + &cmd_buffer->vk.pool->alloc, + &dr_info->hw_render.load_op_state); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_cleanup_render_state; + } + + pvr_perform_start_of_render_clears(cmd_buffer, resume); + + return; + +err_cleanup_render_state: + pvr_render_state_cleanup(device, state->render_pass_info.rstate); + +err_cleanup_tile_buffers: + pvr_device_free_tile_buffer_state(device); + +err_destroy_dynamic_render_info: + pvr_dynamic_render_info_destroy(device, cmd_buffer, dr_info); +} + +static VkResult +pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_render_pass_info *info); + +void pvr_CmdEndRendering(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + VkResult result; + + if (state->current_sub_cmd && + state->current_sub_cmd->is_suspend) { + return; + } + + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto exit_teardown_render; + } + + result = + pvr_resolve_unemitted_resolve_attachments(cmd_buffer, + &state->render_pass_info); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto exit_teardown_render; + } + +exit_teardown_render: + state->render_pass_info.dynamic_render = false; + pvr_cmd_buffer_clear_values_free(cmd_buffer); + pvr_cmd_buffer_attachments_free(cmd_buffer); +} + +static void pvr_cmd_buffer_state_from_dynamic_inheritance( + struct pvr_cmd_buffer *cmd_buffer, + const VkCommandBufferInheritanceRenderingInfo *inheritance_info) +{ + const bool has_stencil = + inheritance_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED; + const bool has_depth = + inheritance_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED; + VkFormat attachment_formats[inheritance_info->colorAttachmentCount]; + uint32_t mrt_attachment_map[inheritance_info->colorAttachmentCount]; + struct pvr_device *const device = cmd_buffer->device; + struct pvr_dynamic_render_info *dr_info; + uint32_t attach_idx = 0, mrt_count = 0; + struct usc_mrt_setup mrt_setup, *setup; + VkResult result; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &dr_info, __typeof__(*dr_info), 1); + vk_multialloc_add(&ma, &setup, __typeof__(*setup), 1); + if (!vk_multialloc_zalloc(&ma, + &device->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) { + vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + } + + dr_info->hw_render.sample_count = inheritance_info->rasterizationSamples; + dr_info->hw_render.multiview_enabled = !!inheritance_info->viewMask; + dr_info->hw_render.view_mask = MAX2(1, inheritance_info->viewMask); + dr_info->attachment_count = has_depth || has_stencil; + dr_info->mrt_setup = setup; + + for (uint32_t i = 0; i < inheritance_info->colorAttachmentCount; i++) + if (inheritance_info->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED) + dr_info->attachment_count++; + + if (dr_info->attachment_count) { + dr_info->attachments = vk_zalloc( + &cmd_buffer->vk.pool->alloc, + sizeof(*dr_info->attachments) * dr_info->attachment_count, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dr_info->attachments) { + vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_dr_info; + } + } + + dr_info->color_attachment_count = inheritance_info->colorAttachmentCount; + if (dr_info->color_attachment_count) { + dr_info->color_attachments = vk_zalloc( + &device->vk.alloc, + sizeof(*dr_info->color_attachments) * dr_info->color_attachment_count, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dr_info->color_attachments) { + vk_command_buffer_set_error(&cmd_buffer->vk, + VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_attachments; + } + } + + assert(inheritance_info->stencilAttachmentFormat == VK_FORMAT_UNDEFINED || + inheritance_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED || + inheritance_info->depthAttachmentFormat == + inheritance_info->stencilAttachmentFormat); + + dr_info->hw_render.ds_attach_idx = VK_ATTACHMENT_UNUSED; + + if (has_depth || has_stencil) { + struct pvr_render_pass_attachment *ds_attachment = + &dr_info->attachments[attach_idx]; + const VkFormat vk_format = has_depth + ? inheritance_info->depthAttachmentFormat + : inheritance_info->stencilAttachmentFormat; + + dr_info->hw_render.ds_attach_idx = attach_idx; + dr_info->hw_render.stencil_store = has_stencil; + dr_info->hw_render.depth_store = has_depth; + + ds_attachment->index = attach_idx; + ds_attachment->vk_format = vk_format; + ds_attachment->is_depth = dr_info->hw_render.depth_store; + ds_attachment->is_stencil = dr_info->hw_render.stencil_store; + ds_attachment->sample_count = inheritance_info->rasterizationSamples; + ds_attachment->resolve_target = VK_ATTACHMENT_UNUSED; + ds_attachment->load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + ds_attachment->store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + ds_attachment->stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + ds_attachment->stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + + attach_idx++; + } + + for (uint32_t i = 0; i < inheritance_info->colorAttachmentCount; i++) { + struct pvr_render_pass_attachment *attachment; + + dr_info->color_attachments[i].index_color = VK_ATTACHMENT_UNUSED; + dr_info->color_attachments[i].index_resolve = VK_ATTACHMENT_UNUSED; + attachment_formats[mrt_count] = VK_FORMAT_UNDEFINED; + mrt_attachment_map[i] = VK_ATTACHMENT_UNUSED; + + if (inheritance_info->pColorAttachmentFormats[i] == VK_FORMAT_UNDEFINED) + continue; + + dr_info->color_attachments[i].index_color = attach_idx; + mrt_attachment_map[i] = mrt_count; + attachment_formats[mrt_count++] = + inheritance_info->pColorAttachmentFormats[i]; + + attachment = &dr_info->attachments[attach_idx]; + attachment->index = attach_idx; + attachment->vk_format = inheritance_info->pColorAttachmentFormats[i]; + attachment->sample_count = inheritance_info->rasterizationSamples; + attachment->resolve_target = VK_ATTACHMENT_UNUSED; + attachment->load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment->store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment->stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment->stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment->stencil_resolve_mode = VK_RESOLVE_MODE_NONE; + attach_idx++; + } + + result = + pvr_init_usc_mrt_setup(device, mrt_count, attachment_formats, &mrt_setup); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_attachments; + } + + result = pvr_mrt_setup_partial_init(cmd_buffer->device, + dr_info->mrt_setup, + inheritance_info->colorAttachmentCount, + mrt_setup.num_output_regs, + mrt_setup.num_tile_buffers); + if (result != VK_SUCCESS) { + pvr_destroy_mrt_setup(device, &mrt_setup); + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_free_attachments; + } + + /* We fully initialised only a mrt_count number of render targets, + * now scatter gather them over the whole number of attachments. + * This is necessary because render targets are attachment-indexed + * throughout the driver. + */ + for (uint32_t i = 0; i < inheritance_info->colorAttachmentCount; i++) { + if (mrt_attachment_map[i] != VK_ATTACHMENT_UNUSED) { + memcpy(&dr_info->mrt_setup->mrt_resources[i], + &mrt_setup.mrt_resources[mrt_attachment_map[i]], + sizeof(mrt_setup.mrt_resources[0])); + } + } + + pvr_destroy_mrt_setup(device, &mrt_setup); + + if (dr_info->mrt_setup->num_tile_buffers) { + result = pvr_device_tile_buffer_ensure_cap( + device, + dr_info->mrt_setup->num_tile_buffers); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); + goto err_destroy_mrt_setup; + } + } + + cmd_buffer->state.render_pass_info.hw_render = &dr_info->hw_render; + cmd_buffer->state.render_pass_info.dr_info = dr_info; + + /* For setting the depth_format, first check stencil and then override */ + if (has_stencil) + cmd_buffer->state.depth_format = inheritance_info->stencilAttachmentFormat; + + if (has_depth) + cmd_buffer->state.depth_format = inheritance_info->depthAttachmentFormat; + + return; + +err_destroy_mrt_setup: + pvr_destroy_mrt_setup(device, dr_info->mrt_setup); + +err_free_attachments: + vk_free(&device->vk.alloc, dr_info->attachments); + +err_free_dr_info: + vk_free(&device->vk.alloc, dr_info); +} + static inline void pvr_cmd_buffer_state_from_render_pass_inheritance( struct pvr_cmd_buffer_state *state, const VkCommandBufferInheritanceInfo *inheritance_info) @@ -3947,9 +5118,20 @@ VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer, if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { - pvr_cmd_buffer_state_from_render_pass_inheritance( - state, - pBeginInfo->pInheritanceInfo); + const VkCommandBufferInheritanceRenderingInfo *dyn_inheritance_info = + vk_find_struct_const( + pBeginInfo->pInheritanceInfo->pNext, + COMMAND_BUFFER_INHERITANCE_RENDERING_INFO); + if (!pBeginInfo->pInheritanceInfo->renderPass) { + assert(dyn_inheritance_info); + pvr_cmd_buffer_state_from_dynamic_inheritance( + cmd_buffer, + dyn_inheritance_info); + } else { + pvr_cmd_buffer_state_from_render_pass_inheritance( + state, + pBeginInfo->pInheritanceInfo); + } result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); @@ -3958,6 +5140,9 @@ VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer, state->vis_test_enabled = pBeginInfo->pInheritanceInfo->occlusionQueryEnable; + + if (!pBeginInfo->pInheritanceInfo->renderPass) + state->current_sub_cmd->is_dynamic_render = true; } state->dirty.isp_userpass = true; @@ -5340,11 +6525,22 @@ pvr_setup_isp_faces_and_control(struct pvr_cmd_buffer *const cmd_buffer, const bool rasterizer_discard = dynamic_state->rs.rasterizer_discard_enable; const uint32_t subpass_idx = pass_info->subpass_idx; const uint32_t depth_stencil_attachment_idx = - pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment; - const struct pvr_render_pass_attachment *const attachment = - depth_stencil_attachment_idx != VK_ATTACHMENT_UNUSED - ? &pass_info->pass->attachments[depth_stencil_attachment_idx] - : NULL; + pass_info->pass + ? pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment + : pass_info->dr_info->hw_render.ds_attach_idx; + struct pvr_render_pass_attachment *attachment; + + if (pass_info->pass) { + attachment = + depth_stencil_attachment_idx != VK_ATTACHMENT_UNUSED + ? &pass_info->pass->attachments[depth_stencil_attachment_idx] + : NULL; + } else { + attachment = + depth_stencil_attachment_idx != VK_ATTACHMENT_UNUSED + ? &pass_info->dr_info->attachments[depth_stencil_attachment_idx] + : NULL; + } const enum ROGUE_TA_OBJTYPE obj_type = pvr_ta_objtype(dynamic_state->ia.primitive_topology); @@ -8195,6 +9391,7 @@ pvr_cmd_buffer_insert_mid_frag_barrier_event(struct pvr_cmd_buffer *cmd_buffer, uint32_t src_stage_mask, uint32_t dst_stage_mask) { + struct pvr_sub_cmd *prev_sub_cmd = cmd_buffer->state.current_sub_cmd; VkResult result; assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); @@ -8216,16 +9413,24 @@ pvr_cmd_buffer_insert_mid_frag_barrier_event(struct pvr_cmd_buffer *cmd_buffer, .wait_at_stage_mask = dst_stage_mask, }, }; - pvr_cmd_buffer_end_sub_cmd(cmd_buffer); pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); + cmd_buffer->state.current_sub_cmd->gfx.dr_info = prev_sub_cmd->gfx.dr_info; + prev_sub_cmd->gfx.dr_info = NULL; + assert(cmd_buffer->state.current_sub_cmd->gfx.dr_info == + cmd_buffer->state.render_pass_info.dr_info); + /* Use existing render setup, but load color attachments from HW BGOBJ */ cmd_buffer->state.current_sub_cmd->gfx.barrier_load = true; cmd_buffer->state.current_sub_cmd->gfx.barrier_store = false; - cmd_buffer->state.current_sub_cmd->gfx.empty_cmd = false; + cmd_buffer->state.current_sub_cmd->is_dynamic_render = + prev_sub_cmd->is_dynamic_render; + cmd_buffer->state.current_sub_cmd->is_suspend = + prev_sub_cmd->is_suspend; + return VK_SUCCESS; } diff --git a/src/imagination/vulkan/pvr_cmd_buffer.h b/src/imagination/vulkan/pvr_cmd_buffer.h index a1cb5dcf2d9..075de7a9375 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.h +++ b/src/imagination/vulkan/pvr_cmd_buffer.h @@ -19,6 +19,7 @@ #include "pvr_common.h" #include "pvr_framebuffer.h" #include "pvr_job_render.h" +#include "pvr_hw_pass.h" #include "pvr_types.h" struct pvr_pds_upload; @@ -113,7 +114,30 @@ struct pvr_transfer_cmd { struct pvr_cmd_buffer *cmd_buffer; }; +struct pvr_color_attachment_output_map { + uint32_t index_color; + uint32_t index_resolve; +}; + +struct pvr_dynamic_render_info { + /* Inputs */ + struct pvr_render_pass_attachment *attachments; + uint32_t attachment_count; + + /* Outputs */ + struct pvr_color_attachment_output_map *color_attachments; + uint32_t color_attachment_count; + + struct usc_mrt_setup *mrt_setup; + + struct pvr_renderpass_hwsetup_render hw_render; +}; + struct pvr_sub_cmd_gfx { + struct pvr_dynamic_render_info *dr_info; + struct pvr_renderpass_hwsetup_render *hw_render; + uint32_t attachment_count; + const struct pvr_render_state *rstate; struct pvr_render_job job; @@ -249,11 +273,16 @@ struct pvr_sub_cmd { enum pvr_sub_cmd_type type; - /* True if the sub_cmd is owned by this command buffer. False if taken from - * a secondary command buffer, in that case we are not supposed to free any - * resources associated with the sub_cmd. - */ - bool owned; + struct { + /* True if the sub_cmd is owned by this command buffer. False if taken from + * a secondary command buffer, in that case we are not supposed to free any + * resources associated with the sub_cmd. + */ + bool owned : 1; + bool is_dynamic_render : 1; + bool is_suspend : 1; + bool is_resume : 1; + }; union { struct pvr_sub_cmd_gfx gfx; @@ -267,6 +296,8 @@ struct pvr_render_pass_info { const struct pvr_render_pass *pass; struct pvr_framebuffer *framebuffer; + struct pvr_dynamic_render_info *dr_info; + struct pvr_renderpass_hwsetup_render *hw_render; struct pvr_render_state *rstate; uint32_t attachment_count; @@ -274,6 +305,7 @@ struct pvr_render_pass_info { uint32_t subpass_idx; uint32_t current_hw_subpass; + uint32_t sample_count; VkRect2D render_area; @@ -285,6 +317,10 @@ struct pvr_render_pass_info { bool process_empty_tiles; bool enable_bg_tag; uint32_t isp_userpass; + + bool dynamic_render; + bool suspend; + bool resume; }; struct pvr_ppp_state { @@ -627,4 +663,8 @@ void pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info, const struct pvr_renderpass_hwsetup_subpass * pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass); +struct pvr_renderpass_hwsetup_render * +pvr_pass_info_get_hw_render(const struct pvr_render_pass_info *render_pass_info, + uint32_t idx); + #endif /* PVR_CMD_BUFFER_H */ diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index d2b75a257dc..bc44bd725b0 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -991,6 +991,9 @@ pvr_create_device(struct pvr_physical_device *pdevice, device->global_cmd_buffer_submit_count = 0; device->global_queue_present_count = 0; + simple_mtx_init(&device->rs_mtx, mtx_plain); + list_inithead(&device->render_states); + *pDevice = pvr_device_to_handle(device); return VK_SUCCESS; @@ -1057,6 +1060,36 @@ err_out: return result; } +void pvr_rstate_entry_add(struct pvr_device *device, + struct pvr_render_state *rstate) +{ + simple_mtx_lock(&device->rs_mtx); + list_addtail(&rstate->link, &device->render_states); + simple_mtx_unlock(&device->rs_mtx); +} + +void pvr_rstate_entry_remove(struct pvr_device *device, + const struct pvr_render_state *rstate) +{ + simple_mtx_lock(&device->rs_mtx); + assert(rstate); + + list_for_each_entry_safe (struct pvr_render_state, + entry, + &device->render_states, + link) { + if (entry != rstate) + continue; + + pvr_render_state_cleanup(device, rstate); + list_del(&entry->link); + + vk_free(&device->vk.alloc, entry); + } + + simple_mtx_unlock(&device->rs_mtx); +} + void pvr_destroy_device(struct pvr_device *device, const VkAllocationCallbacks *pAllocator) @@ -1064,6 +1097,19 @@ pvr_destroy_device(struct pvr_device *device, if (!device) return; + simple_mtx_lock(&device->rs_mtx); + list_for_each_entry_safe (struct pvr_render_state, + rstate, + &device->render_states, + link) { + pvr_render_state_cleanup(device, rstate); + list_del(&rstate->link); + + vk_free(&device->vk.alloc, rstate); + } + simple_mtx_unlock(&device->rs_mtx); + simple_mtx_destroy(&device->rs_mtx); + pvr_border_color_table_finish(device); pvr_robustness_buffer_finish(device); pvr_spm_finish_scratch_buffer_store(device); @@ -1929,6 +1975,7 @@ void pvr_render_state_cleanup(struct pvr_device *device, pvr_render_targets_fini(rstate->render_targets, rstate->render_targets_count); pvr_bo_suballoc_free(rstate->ppp_state_bo); + vk_free(&device->vk.alloc, rstate->render_targets); } VkResult pvr_render_state_setup( @@ -2128,6 +2175,7 @@ void pvr_DestroyFramebuffer(VkDevice _device, return; pvr_render_state_cleanup(device, framebuffer->rstate); + /* the render state is freed with the framebuffer */ vk_object_base_finish(&framebuffer->base); vk_free2(&device->vk.alloc, pAllocator, framebuffer); diff --git a/src/imagination/vulkan/pvr_device.h b/src/imagination/vulkan/pvr_device.h index 5d229811f45..953bd1f34bb 100644 --- a/src/imagination/vulkan/pvr_device.h +++ b/src/imagination/vulkan/pvr_device.h @@ -142,6 +142,9 @@ struct pvr_device { struct vk_sync *presignaled_sync; struct pvr_border_color_table *border_color_table; + + simple_mtx_t rs_mtx; + struct list_head render_states; }; struct pvr_device_memory { @@ -221,4 +224,10 @@ VkResult pvr_gpu_upload_usc(struct pvr_device *device, uint64_t code_alignment, struct pvr_suballoc_bo **const pvr_bo_out); +void pvr_rstate_entry_add(struct pvr_device *device, + struct pvr_render_state *rstate); + +void pvr_rstate_entry_remove(struct pvr_device *device, + const struct pvr_render_state *rstate); + #endif /* PVR_DEVICE_H */ diff --git a/src/imagination/vulkan/pvr_framebuffer.h b/src/imagination/vulkan/pvr_framebuffer.h index 0f911819fc2..ef14e6ca8a9 100644 --- a/src/imagination/vulkan/pvr_framebuffer.h +++ b/src/imagination/vulkan/pvr_framebuffer.h @@ -23,6 +23,8 @@ struct pvr_render_target { }; struct pvr_render_state { + struct list_head link; + uint32_t width; uint32_t height; uint32_t layers; @@ -51,29 +53,7 @@ struct pvr_framebuffer { uint32_t attachment_count; struct pvr_image_view **attachments; - -#if 0 - /* Saved information from pCreateInfo. */ - uint32_t width; - uint32_t height; - uint32_t layers; - - /* Derived and other state. */ - struct pvr_suballoc_bo *ppp_state_bo; - /* PPP state size in dwords. */ - size_t ppp_state_size; - - uint32_t render_targets_count; - struct pvr_render_target *render_targets; - - struct pvr_spm_scratch_buffer *scratch_buffer; - - uint32_t render_count; - struct pvr_spm_eot_state *spm_eot_state_per_render; - struct pvr_spm_bgobj_state *spm_bgobj_state_per_render; -#else struct pvr_render_state *rstate; -#endif }; struct pvr_device; diff --git a/src/imagination/vulkan/pvr_hw_pass.h b/src/imagination/vulkan/pvr_hw_pass.h index 1c2d5f5f5b4..3e7e42065a7 100644 --- a/src/imagination/vulkan/pvr_hw_pass.h +++ b/src/imagination/vulkan/pvr_hw_pass.h @@ -189,6 +189,8 @@ struct pvr_renderpass_hwsetup_render { bool requires_frag_pr; + bool multiview_enabled; + /* View mask for multiview. */ uint32_t view_mask; diff --git a/src/imagination/vulkan/pvr_mrt.c b/src/imagination/vulkan/pvr_mrt.c index e4d54b5827b..9ddf9d78c80 100644 --- a/src/imagination/vulkan/pvr_mrt.c +++ b/src/imagination/vulkan/pvr_mrt.c @@ -8,15 +8,20 @@ #include "vk_log.h" +#include "pvr_cmd_buffer.h" #include "pvr_csb.h" #include "pvr_device.h" #include "pvr_formats.h" +#include "pvr_image.h" +#include "pvr_mrt.h" +#include "pvr_pass.h" #include "pvr_physical_device.h" #include "hwdef/rogue_hw_utils.h" #include "util/macros.h" - -#include "pvr_mrt.h" +#include "vulkan/vulkan_core.h" +#include "vulkan/runtime/vk_graphics_state.h" +#include "vulkan/runtime/vk_image.h" /* Which parts of the output registers/a tile buffer are currently allocated. */ struct pvr_mrt_alloc_mask { @@ -237,6 +242,246 @@ pvr_destroy_mrt_setup(const struct pvr_device *device, vk_free(&device->vk.alloc, setup->mrt_resources); } +static bool +pvr_rendering_info_needs_load(const struct pvr_dynamic_render_info *dr_info) +{ + for (unsigned i = 0; i < dr_info->hw_render.color_init_count; i++) { + const uint32_t index = dr_info->hw_render.color_init[i].index; + if (index == VK_ATTACHMENT_UNUSED) + continue; + + const VkAttachmentLoadOp op = dr_info->hw_render.color_init[i].op; + if (op == VK_ATTACHMENT_LOAD_OP_LOAD || op == VK_ATTACHMENT_LOAD_OP_CLEAR) + return true; + } + + return false; +} + +static VkResult pvr_mrt_load_op_init(struct pvr_device *device, + const VkAllocationCallbacks *alloc, + const struct pvr_render_pass_info *rp_info, + struct pvr_load_op *load_op, + uint32_t view_idx) +{ + const struct pvr_dynamic_render_info *dr_info = rp_info->dr_info; + VkResult result; + + load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG; + + assert(dr_info->hw_render.color_init_count <= + PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); + for (unsigned i = 0; i < dr_info->hw_render.color_init_count; i++) { + const struct pvr_renderpass_colorinit *color_init = + &dr_info->hw_render.color_init[i]; + const struct pvr_image *image; + + assert(color_init->index < rp_info->attachment_count); + load_op->clears_loads_state.dest_vk_format[i] = + rp_info->attachments[color_init->index]->vk.view_format; + + image = pvr_image_view_get_image(rp_info->attachments[color_init->index]); + if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) + load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i); + + switch (color_init->op) { + case VK_ATTACHMENT_LOAD_OP_CLEAR: + load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i); + break; + case VK_ATTACHMENT_LOAD_OP_LOAD: + load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i); + break; + case VK_ATTACHMENT_LOAD_OP_DONT_CARE: + case VK_ATTACHMENT_LOAD_OP_NONE: + break; + default: + UNREACHABLE("unsupported loadOp"); + } + } + + load_op->clears_loads_state.mrt_setup = &dr_info->hw_render.init_setup; + + result = pvr_load_op_shader_generate(device, alloc, load_op); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, alloc, load_op); + return result; + } + + load_op->view_indices[0] = view_idx; + load_op->view_count = 1; + + load_op->is_hw_object = true; + load_op->hw_render = &dr_info->hw_render; + + return VK_SUCCESS; +} + +static void pvr_load_op_fini(struct pvr_load_op *load_op) +{ + pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo); + pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo); + pvr_bo_suballoc_free(load_op->usc_frag_prog_bo); +} + +static void pvr_load_op_destroy(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_load_op *load_op) +{ + pvr_load_op_fini(load_op); + vk_free2(&device->vk.alloc, allocator, load_op); +} + +void +pvr_mrt_load_op_state_cleanup(const struct pvr_device *device, + const VkAllocationCallbacks *alloc, + struct pvr_load_op_state *state) +{ + if (!state) + return; + + while (state->load_op_count--) { + const uint32_t load_op_idx = state->load_op_count; + struct pvr_load_op *load_op = &state->load_ops[load_op_idx]; + + pvr_load_op_fini(load_op); + } + + vk_free2(&device->vk.alloc, alloc, state); +} + +static VkResult +pvr_mrt_load_op_state_create(struct pvr_device *device, + const VkAllocationCallbacks *alloc, + const struct pvr_render_pass_info *rp_info, + struct pvr_load_op_state **state) +{ + const struct pvr_dynamic_render_info *dr_info = rp_info->dr_info; + const uint32_t view_count = util_bitcount(dr_info->hw_render.view_mask); + struct pvr_load_op_state *load_op_state; + struct pvr_load_op *load_ops; + VkResult result; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &load_op_state, __typeof__(*load_op_state), 1); + vk_multialloc_add(&ma, &load_ops, __typeof__(*load_ops), view_count); + + if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + load_op_state->load_ops = load_ops; + + u_foreach_bit (view_idx, dr_info->hw_render.view_mask) { + struct pvr_load_op *const load_op = + &load_op_state->load_ops[load_op_state->load_op_count]; + + result = pvr_mrt_load_op_init(device, alloc, rp_info, load_op, view_idx); + if (result != VK_SUCCESS) + goto err_load_op_state_cleanup; + + load_op_state->load_op_count++; + } + + *state = load_op_state; + + return VK_SUCCESS; + +err_load_op_state_cleanup: + pvr_mrt_load_op_state_cleanup(device, alloc, load_op_state); + + return result; +} + +/* TODO: Can we gaurantee that if we have at least one render target there will + * be a render target allocated as a REG? + */ +static inline bool pvr_needs_output_register_writes( + const struct usc_mrt_setup *setup) +{ + for (uint32_t i = 0; i < setup->num_render_targets; i++) { + struct usc_mrt_resource *mrt_resource = + &setup->mrt_resources[i]; + + if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) + return true; + } + + return false; +} + +static inline VkResult pvr_mrt_add_missing_output_register_write( + struct usc_mrt_setup *setup, + const VkAllocationCallbacks *alloc) +{ + const uint32_t last = setup->num_render_targets; + struct usc_mrt_resource *mrt_resources; + + if (pvr_needs_output_register_writes(setup)) + return VK_SUCCESS; + + setup->num_render_targets++; + + mrt_resources = vk_realloc(alloc, + setup->mrt_resources, + setup->num_render_targets * + sizeof(*mrt_resources), + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!mrt_resources) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + setup->mrt_resources = mrt_resources; + + mrt_resources[last].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG; + mrt_resources[last].reg.output_reg = 0U; + mrt_resources[last].reg.offset = 0U; + mrt_resources[last].intermediate_size = 4U; + mrt_resources[last].mrt_desc.intermediate_size = 4U; + mrt_resources[last].mrt_desc.priority = 0U; + mrt_resources[last].mrt_desc.valid_mask[0U] = ~0; + mrt_resources[last].mrt_desc.valid_mask[1U] = ~0; + mrt_resources[last].mrt_desc.valid_mask[2U] = ~0; + mrt_resources[last].mrt_desc.valid_mask[3U] = ~0; + + return VK_SUCCESS; +} + +VkResult pvr_mrt_load_ops_setup(struct pvr_cmd_buffer *cmd_buffer, + const VkAllocationCallbacks *alloc, + struct pvr_load_op_state **load_op_state) +{ + const struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + const struct pvr_dynamic_render_info *dr_info = + state->render_pass_info.dr_info; + struct pvr_device *device = cmd_buffer->device; + VkResult result = VK_SUCCESS; + + if (dr_info->mrt_setup->num_tile_buffers) { + result = pvr_device_tile_buffer_ensure_cap( + device, + dr_info->mrt_setup->num_tile_buffers); + + if (result != VK_SUCCESS) + return result; + } + + if (!pvr_rendering_info_needs_load(dr_info)) + return VK_SUCCESS; + + result = + pvr_mrt_add_missing_output_register_write(dr_info->mrt_setup, + alloc); + if (result != VK_SUCCESS) + return result; + + result = pvr_mrt_load_op_state_create(device, + alloc, + &state->render_pass_info, + load_op_state); + + return result; +} + VkResult pvr_pds_unitex_state_program_create_and_upload( struct pvr_device *device, const VkAllocationCallbacks *allocator, @@ -413,3 +658,4 @@ err_free_usc_frag_prog_bo: return result; } + diff --git a/src/imagination/vulkan/pvr_mrt.h b/src/imagination/vulkan/pvr_mrt.h index 178bb039bda..989d9b558ad 100644 --- a/src/imagination/vulkan/pvr_mrt.h +++ b/src/imagination/vulkan/pvr_mrt.h @@ -189,4 +189,15 @@ struct pvr_load_op_state { struct pvr_load_op *load_ops; }; +struct pvr_dynamic_render_info; +struct pvr_cmd_buffer; + +VkResult pvr_mrt_load_ops_setup(struct pvr_cmd_buffer *cmd_buffer, + const VkAllocationCallbacks *alloc, + struct pvr_load_op_state **state); +void +pvr_mrt_load_op_state_cleanup(const struct pvr_device *device, + const VkAllocationCallbacks *alloc, + struct pvr_load_op_state *state); + #endif diff --git a/src/imagination/vulkan/pvr_pass.h b/src/imagination/vulkan/pvr_pass.h index bcbe75b7a18..7ef8568978d 100644 --- a/src/imagination/vulkan/pvr_pass.h +++ b/src/imagination/vulkan/pvr_pass.h @@ -26,10 +26,14 @@ struct pvr_render_pass_attachment { VkAttachmentStoreOp store_op; + VkResolveModeFlagBits resolve_mode; + VkAttachmentLoadOp stencil_load_op; VkAttachmentStoreOp stencil_store_op; + VkResolveModeFlagBits stencil_resolve_mode; + VkFormat vk_format; uint32_t sample_count; VkImageLayout initial_layout; @@ -39,7 +43,11 @@ struct pvr_render_pass_attachment { /* Can this surface be resolved by the PBE. */ bool is_pbe_downscalable; + bool is_depth; + bool is_stencil; + bool need_eot; + uint32_t resolve_target; uint32_t index; }; diff --git a/src/imagination/vulkan/pvr_query.c b/src/imagination/vulkan/pvr_query.c index a87f163bd8f..d375e31de3b 100644 --- a/src/imagination/vulkan/pvr_query.c +++ b/src/imagination/vulkan/pvr_query.c @@ -410,7 +410,7 @@ pvr_cmd_buffer_state_get_view_count(const struct pvr_cmd_buffer_state *state) const struct pvr_sub_cmd_gfx *gfx_sub_cmd = &state->current_sub_cmd->gfx; const uint32_t hw_render_idx = gfx_sub_cmd->hw_render_idx; const struct pvr_renderpass_hwsetup_render *hw_render = - &render_pass_info->pass->hw_setup->renders[hw_render_idx]; + pvr_pass_info_get_hw_render(render_pass_info, hw_render_idx); const uint32_t view_count = util_bitcount(hw_render->view_mask); assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); diff --git a/src/imagination/vulkan/pvr_query.h b/src/imagination/vulkan/pvr_query.h index 636ed493499..109c1cce040 100644 --- a/src/imagination/vulkan/pvr_query.h +++ b/src/imagination/vulkan/pvr_query.h @@ -37,6 +37,11 @@ struct pvr_query_pool { struct pvr_query_info { enum pvr_query_type type; + struct { + bool is_dynamic_render : 1; + bool is_suspend : 1; + }; + union { struct { uint32_t num_query_indices; diff --git a/src/imagination/vulkan/pvr_query_compute.c b/src/imagination/vulkan/pvr_query_compute.c index a925ef6aaba..be461128064 100644 --- a/src/imagination/vulkan/pvr_query_compute.c +++ b/src/imagination/vulkan/pvr_query_compute.c @@ -412,6 +412,9 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, if (result != VK_SUCCESS) return result; + cmd_buffer->state.current_sub_cmd->is_dynamic_render = query_info->is_dynamic_render; + cmd_buffer->state.current_sub_cmd->is_suspend = query_info->is_suspend; + switch (query_info->type) { case PVR_QUERY_TYPE_AVAILABILITY_WRITE: /* Adds a compute shader (fenced on the last 3D) that writes a non-zero