From a1002a6673bc77b279a79c8223c13db00be6a142 Mon Sep 17 00:00:00 2001 From: Luigi Santivetti Date: Thu, 1 Feb 2024 14:51:52 +0000 Subject: [PATCH] pvr: add initial driver support for VK_KHR_multiview Signed-off-by: Luigi Santivetti Acked-by: Erik Faye-Lund Part-of: --- src/imagination/common/pvr_limits.h | 1 + src/imagination/vulkan/pvr_blit.c | 9 +- src/imagination/vulkan/pvr_cmd_buffer.c | 305 +++++++++++---- src/imagination/vulkan/pvr_device.c | 6 +- src/imagination/vulkan/pvr_hw_pass.c | 4 + src/imagination/vulkan/pvr_hw_pass.h | 7 +- src/imagination/vulkan/pvr_job_render.c | 75 ++-- src/imagination/vulkan/pvr_job_render.h | 33 +- src/imagination/vulkan/pvr_pass.c | 495 ++++++++++++++++++++---- src/imagination/vulkan/pvr_pipeline.c | 6 +- src/imagination/vulkan/pvr_private.h | 43 +- src/imagination/vulkan/pvr_query.c | 24 +- src/imagination/vulkan/pvr_queue.c | 60 ++- 13 files changed, 851 insertions(+), 217 deletions(-) diff --git a/src/imagination/common/pvr_limits.h b/src/imagination/common/pvr_limits.h index 9b992639b1d..eb74fc82708 100644 --- a/src/imagination/common/pvr_limits.h +++ b/src/imagination/common/pvr_limits.h @@ -36,6 +36,7 @@ #define PVR_MAX_QUEUES 2U #define PVR_MAX_VIEWPORTS 1U #define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U +#define PVR_MAX_MULTIVIEW 6U #define PVR_MAX_PUSH_CONSTANTS_SIZE 128U diff --git a/src/imagination/vulkan/pvr_blit.c b/src/imagination/vulkan/pvr_blit.c index 12c1b476064..2ffd4450376 100644 --- a/src/imagination/vulkan/pvr_blit.c +++ b/src/imagination/vulkan/pvr_blit.c @@ -1496,10 +1496,11 @@ pvr_get_max_layers_covering_target(VkRect2D target_rect, */ static inline bool pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info, + bool multiview_enabled, uint32_t rect_count, const VkClearRect *rects) { - if (!PVR_HAS_FEATURE(dev_info, gs_rta_support)) + if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) || multiview_enabled) return false; for (uint32_t i = 0; i < rect_count; i++) { @@ -1890,8 +1891,10 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, /* We'll be emitting to the control stream. */ sub_cmd->empty_cmd = false; - vs_has_rt_id_output = - pvr_clear_needs_rt_id_output(dev_info, rect_count, rects); + vs_has_rt_id_output = pvr_clear_needs_rt_id_output(dev_info, + pass->multiview_enabled, + rect_count, + rects); /* 4 because we're expecting the USC to output X, Y, Z, and W. */ vs_output_size_in_bytes = PVR_DW_TO_BYTES(4); diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 5919e7962d9..052e1ee1954 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -634,7 +634,8 @@ err_csb_finish: static VkResult pvr_setup_texture_state_words( struct pvr_device *device, struct pvr_combined_image_sampler_descriptor *descriptor, - const struct pvr_image_view *image_view) + const struct pvr_image_view *image_view, + uint32_t view_index) { const struct pvr_image *image = vk_to_pvr_image(image_view->vk.image); struct pvr_texture_state_info info = { @@ -648,6 +649,7 @@ static VkResult pvr_setup_texture_state_words( .mip_levels = 1, .sample_count = image_view->vk.image->samples, .stride = image->physical_extent.width, + .offset = image->layer_size * view_index, .addr = image->dev_addr, }; const uint8_t *const swizzle = pvr_get_format_swizzle(info.format); @@ -682,6 +684,7 @@ static VkResult pvr_setup_texture_state_words( static VkResult pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_load_op *load_op, + uint32_t view_index, pvr_dev_addr_t *const addr_out) { const struct pvr_render_pass_info *render_pass_info = @@ -725,7 +728,8 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, if (load_op->clears_loads_state.rt_load_mask & BITFIELD_BIT(i)) { result = pvr_setup_texture_state_words(cmd_buffer->device, &texture_states[texture_count], - image_view); + image_view, + view_index); if (result != VK_SUCCESS) return result; @@ -786,7 +790,8 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, result = pvr_setup_texture_state_words(cmd_buffer->device, &texture_states[texture_count], - image_view); + image_view, + view_index); if (result != VK_SUCCESS) return result; @@ -919,6 +924,7 @@ static VkResult pvr_load_op_pds_data_create_and_upload( static VkResult pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_load_op *load_op, + uint32_t view_index, struct pvr_pds_upload *const pds_upload_out) { pvr_dev_addr_t constants_addr; @@ -926,6 +932,7 @@ pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, result = pvr_load_op_constants_create_and_upload(cmd_buffer, load_op, + view_index, &constants_addr); if (result != VK_SUCCESS) return result; @@ -964,6 +971,63 @@ static void pvr_pds_bgnd_pack_state( } } +static inline VkResult pvr_load_op_state_data_create_and_upload_for_view( + struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_load_op *load_op, + uint32_t view_index, + uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS]) +{ + struct pvr_pds_upload load_op_program; + VkResult result; + + /* FIXME: Should we free the PDS pixel event data or let it be freed + * when the pool gets emptied? + */ + result = pvr_load_op_data_create_and_upload(cmd_buffer, + load_op, + view_index, + &load_op_program); + if (result != VK_SUCCESS) + return result; + + pvr_pds_bgnd_pack_state(load_op, &load_op_program, pds_reg_values); + + return VK_SUCCESS; +} + +static VkResult pvr_load_op_state_data_create_and_upload( + struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_load_op_state *load_op_state, + struct pvr_view_state *view_state) +{ + for (uint32_t i = 0; i < load_op_state->load_op_count; i++) { + const struct pvr_load_op *load_op = &load_op_state->load_ops[i]; + uint32_t view_index = load_op->view_indices[0]; + uint64_t *pds_reg_values; + VkResult result; + + pds_reg_values = view_state->view[view_index].pds_bgnd_reg_values; + result = + pvr_load_op_state_data_create_and_upload_for_view(cmd_buffer, + load_op, + view_index, + pds_reg_values); + if (result != VK_SUCCESS) + return result; + + pds_reg_values = view_state->view[view_index].pr_pds_bgnd_reg_values; + result = + pvr_load_op_state_data_create_and_upload_for_view(cmd_buffer, + load_op, + view_index, + pds_reg_values); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + /** * \brief Calculates the stride in pixels based on the pitch in bytes and pixel * format. @@ -991,7 +1055,8 @@ static void pvr_setup_pbe_state( const bool down_scale, const uint32_t samples, uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], - uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]) + uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS], + uint32_t view_index) { const struct pvr_image *image = pvr_image_view_get_image(iview); uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch; @@ -1033,9 +1098,10 @@ static void pvr_setup_pbe_state( /* FIXME: Should we have an inline function to return the address of a mip * level? */ - surface_params.addr = - PVR_DEV_ADDR_OFFSET(image->vma->dev_addr, - image->mip_levels[iview->vk.base_mip_level].offset); + surface_params.addr = PVR_DEV_ADDR_OFFSET( + image->vma->dev_addr, + image->layer_size * view_index + + image->mip_levels[iview->vk.base_mip_level].offset); if (!iview->vk.storage.z_slice_offset) { surface_params.addr = @@ -1381,6 +1447,7 @@ static void pvr_setup_emit_state(const struct pvr_device_info *dev_info, const struct pvr_renderpass_hwsetup_render *hw_render, struct pvr_render_pass_info *render_pass_info, + uint32_t view_index, struct pvr_emit_state *emit_state) { assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS); @@ -1450,7 +1517,8 @@ pvr_setup_emit_state(const struct pvr_device_info *dev_info, surface->need_resolve, samples, emit_state->pbe_cs_words[emit_state->emit_count], - emit_state->pbe_reg_words[emit_state->emit_count]); + emit_state->pbe_reg_words[emit_state->emit_count], + view_index); emit_state->emit_count += 1; } } @@ -1486,7 +1554,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, const struct pvr_renderpass_hwsetup_render *hw_render = &render_pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx]; struct pvr_render_job *job = &sub_cmd->job; - struct pvr_pds_upload pds_pixel_event_program; struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer; struct pvr_spm_bgobj_state *spm_bgobj_state = &framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx]; @@ -1495,6 +1562,12 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, struct pvr_render_target *render_target; VkResult result; + /* Unless for barrier_{store,load}, where the index defaults to zero, the + * view index associated with a gfx job is known and set only at submission + * time. + */ + job->view_state.view_index = 0; + if (sub_cmd->barrier_store) { /* Store to the SPM scratch buffer. */ @@ -1514,15 +1587,46 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, memcpy(job->pbe_reg_words, &spm_eot_state->pbe_reg_words, sizeof(job->pbe_reg_words)); - job->pds_pixel_event_data_offset = + + /* Configure the job view state for a barrier store */ + assert(!job->view_state.view_index); + + job->view_state.view[0].pds_pixel_event_data_offset = spm_eot_state->pixel_event_program_data_offset; + job->view_state.force_pds_pixel_event_data_offset_zero = true; } else { + struct pvr_pds_upload pds_pixel_event_program; struct pvr_emit_state emit_state = { 0 }; memset(emit_state.tile_buffer_ids, ~0, sizeof(emit_state.tile_buffer_ids)); - pvr_setup_emit_state(dev_info, hw_render, render_pass_info, &emit_state); + u_foreach_bit (view_idx, hw_render->view_mask) { + pvr_setup_emit_state(dev_info, + hw_render, + render_pass_info, + view_idx, + &emit_state); + + unsigned pixel_output_width = + pvr_pass_get_pixel_output_width(render_pass_info->pass, + sub_cmd->hw_render_idx, + dev_info); + + result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( + cmd_buffer, + emit_state.emit_count, + emit_state.pbe_cs_words[0], + emit_state.tile_buffer_ids, + pixel_output_width, + &pds_pixel_event_program); + if (result != VK_SUCCESS) + return result; + + /* Configure the job view state */ + job->view_state.view[view_idx].pds_pixel_event_data_offset = + pds_pixel_event_program.data_offset; + } job->z_only_render = !hw_render->eot_surface_count && !sub_cmd->frag_has_side_effects && @@ -1531,23 +1635,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, memcpy(job->pbe_reg_words, emit_state.pbe_reg_words, sizeof(job->pbe_reg_words)); - - unsigned pixel_output_width = - pvr_pass_get_pixel_output_width(render_pass_info->pass, - sub_cmd->hw_render_idx, - dev_info); - - result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( - cmd_buffer, - emit_state.emit_count, - emit_state.pbe_cs_words[0], - emit_state.tile_buffer_ids, - pixel_output_width, - &pds_pixel_event_program); - if (result != VK_SUCCESS) - return result; - - job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset; } if (sub_cmd->barrier_load) { @@ -1556,45 +1643,45 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, /* Load the previously stored render from the SPM scratch buffer. */ - STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) == + STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pds_bgnd_reg_values) == ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); - typed_memcpy(job->pds_bgnd_reg_values, + typed_memcpy(job->view_state.view[0].pds_bgnd_reg_values, spm_bgobj_state->pds_reg_values, ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); - STATIC_ASSERT(ARRAY_SIZE(job->pr_pds_bgnd_reg_values) == - ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); - typed_memcpy(job->pr_pds_bgnd_reg_values, + STATIC_ASSERT( + ARRAY_SIZE(job->view_state.view[0].pr_pds_bgnd_reg_values) == + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + typed_memcpy(job->view_state.view[0].pr_pds_bgnd_reg_values, spm_bgobj_state->pds_reg_values, ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); - } else if (hw_render->load_op) { - const struct pvr_load_op *load_op = hw_render->load_op; - struct pvr_pds_upload load_op_program; + + /* Configure the job view state for a barrier load */ + assert(!job->view_state.view_index); + job->view_state.force_pds_bgnd_reg_values_zero = true; + } else if (hw_render->load_op_state) { + const struct pvr_load_op_state *load_op_state = hw_render->load_op_state; + + /* We always have at least 1 bit set in the view_mask */ + assert(load_op_state->load_op_count); /* Recalculate Background Object(s). */ - /* FIXME: Should we free the PDS pixel event data or let it be freed - * when the pool gets emptied? - */ - result = pvr_load_op_data_create_and_upload(cmd_buffer, - load_op, - &load_op_program); + result = pvr_load_op_state_data_create_and_upload(cmd_buffer, + load_op_state, + &job->view_state); if (result != VK_SUCCESS) return result; job->enable_bg_tag = render_pass_info->enable_bg_tag; job->process_empty_tiles = render_pass_info->process_empty_tiles; - - pvr_pds_bgnd_pack_state(load_op, - &load_op_program, - job->pds_bgnd_reg_values); } if (!hw_render->requires_frag_pr) { memcpy(job->pr_pbe_reg_words, job->pbe_reg_words, sizeof(job->pbe_reg_words)); - job->pr_pds_pixel_event_data_offset = job->pds_pixel_event_data_offset; + job->view_state.use_pds_pixel_event_data_offset = true; } else { memcpy(job->pr_pbe_reg_words, &spm_eot_state->pbe_reg_words, @@ -1606,7 +1693,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, render_target = pvr_get_render_target(render_pass_info->pass, framebuffer, sub_cmd->hw_render_idx); - job->rt_dataset = render_target->rt_dataset; + job->view_state.rt_datasets = &render_target->rt_dataset[0]; job->ctrl_stream_addr = pvr_csb_get_start_address(&sub_cmd->control_stream); @@ -2097,6 +2184,9 @@ pvr_compute_generate_idfwdf(struct pvr_cmd_buffer *cmd_buffer, pvr_compute_generate_control_stream(csb, sub_cmd, &info); } +/* TODO: This can be pre-packed and uploaded directly. Would that provide any + * speed up? + */ void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd, bool deallocate_shareds) @@ -2416,6 +2506,17 @@ pvr_cmd_uses_deferred_cs_cmds(const struct pvr_cmd_buffer *const cmd_buffer) deferred_control_stream_flags; } +static inline uint32_t +pvr_render_pass_info_get_view_mask(const struct pvr_render_pass_info *rp_info) +{ + const uint32_t hw_render_idx = rp_info->current_hw_subpass; + const struct pvr_render_pass *pass = rp_info->pass; + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[hw_render_idx]; + + return hw_render->view_mask; +} + VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, enum pvr_sub_cmd_type type) { @@ -2468,6 +2569,8 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass; sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer; sub_cmd->gfx.empty_cmd = true; + sub_cmd->gfx.view_mask = + pvr_render_pass_info_get_view_mask(&state->render_pass_info); if (state->vis_test_enabled) sub_cmd->gfx.query_pool = state->query_pool; @@ -2892,40 +2995,63 @@ static VkResult pvr_cmd_buffer_attachments_setup( return VK_SUCCESS; } -static VkResult pvr_render_targets_init(struct pvr_device *device, - struct pvr_render_pass *pass, - struct pvr_framebuffer *framebuffer) +static inline VkResult pvr_render_targets_datasets_create( + struct pvr_device *device, + struct pvr_framebuffer *framebuffer, + const struct pvr_renderpass_hwsetup_render *hw_render, + struct pvr_render_target *render_target) { const struct pvr_device_info *const dev_info = &device->pdevice->dev_info; const uint32_t layers = PVR_HAS_FEATURE(dev_info, gs_rta_support) ? framebuffer->layers : 1; + pthread_mutex_lock(&render_target->mutex); + + u_foreach_bit (view_idx, hw_render->view_mask) { + struct pvr_rt_dataset *rt_dataset; + VkResult result; + + if (render_target->valid_mask & BITFIELD_BIT(view_idx)) + continue; + + result = pvr_render_target_dataset_create(device, + framebuffer->width, + framebuffer->height, + hw_render->sample_count, + layers, + &rt_dataset); + if (result != VK_SUCCESS) { + pvr_render_targets_datasets_destroy(render_target); + pthread_mutex_unlock(&render_target->mutex); + return result; + } + + render_target->valid_mask |= BITFIELD_BIT(view_idx); + render_target->rt_dataset[view_idx] = rt_dataset; + } + + pthread_mutex_unlock(&render_target->mutex); + + return VK_SUCCESS; +} + +static VkResult pvr_render_targets_init(struct pvr_device *device, + struct pvr_render_pass *pass, + struct pvr_framebuffer *framebuffer) +{ for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { struct pvr_render_target *render_target = pvr_get_render_target(pass, framebuffer, i); + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[i]; + VkResult result; - pthread_mutex_lock(&render_target->mutex); - - if (!render_target->valid) { - const struct pvr_renderpass_hwsetup_render *hw_render = - &pass->hw_setup->renders[i]; - VkResult result; - - result = pvr_render_target_dataset_create(device, - framebuffer->width, - framebuffer->height, - hw_render->sample_count, - layers, - &render_target->rt_dataset); - if (result != VK_SUCCESS) { - pthread_mutex_unlock(&render_target->mutex); - return result; - } - - render_target->valid = true; - } - - pthread_mutex_unlock(&render_target->mutex); + result = pvr_render_targets_datasets_create(device, + framebuffer, + hw_render, + render_target); + if (result != VK_SUCCESS) + return result; } return VK_SUCCESS; @@ -3213,10 +3339,11 @@ static void pvr_emit_clear_words(struct pvr_cmd_buffer *const cmd_buffer, pvr_csb_clear_relocation_mark(csb); } -static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer, - struct pvr_sub_cmd_gfx *sub_cmd, - struct pvr_load_op *load_op, - uint32_t isp_userpass) +static VkResult pvr_cs_write_load_op_for_view(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_gfx *sub_cmd, + struct pvr_load_op *load_op, + uint32_t view_index, + uint32_t isp_userpass) { const struct pvr_device *device = cmd_buffer->device; struct pvr_static_clear_ppp_template template = @@ -3228,6 +3355,7 @@ static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer, result = pvr_load_op_data_create_and_upload(cmd_buffer, load_op, + view_index, &shareds_update_program); if (result != VK_SUCCESS) return result; @@ -3295,6 +3423,29 @@ static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer, return VK_SUCCESS; } +static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_gfx *sub_cmd, + struct pvr_load_op *load_op, + uint32_t isp_userpass) +{ + assert(load_op->view_count); + + for (uint32_t i = 0; i < load_op->view_count; i++) { + const uint32_t view_index = load_op->view_indices[i]; + VkResult result; + + result = pvr_cs_write_load_op_for_view(cmd_buffer, + sub_cmd, + load_op, + view_index, + isp_userpass); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBeginInfo, const VkSubpassBeginInfo *pSubpassBeginInfo) diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 513d9e2b6ce..c9c40a5277e 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -3016,11 +3016,7 @@ static void pvr_render_targets_fini(struct pvr_render_target *render_targets, uint32_t render_targets_count) { for (uint32_t i = 0; i < render_targets_count; i++) { - if (render_targets[i].valid) { - pvr_render_target_dataset_destroy(render_targets[i].rt_dataset); - render_targets[i].valid = false; - } - + pvr_render_targets_datasets_destroy(&render_targets[i]); pthread_mutex_destroy(&render_targets[i].mutex); } } diff --git a/src/imagination/vulkan/pvr_hw_pass.c b/src/imagination/vulkan/pvr_hw_pass.c index 8b33b9f0991..ace19794c96 100644 --- a/src/imagination/vulkan/pvr_hw_pass.c +++ b/src/imagination/vulkan/pvr_hw_pass.c @@ -1877,6 +1877,9 @@ pvr_can_combine_with_render(const struct pvr_device_info *dev_info, sp_dsts->color = NULL; new_alloc->tile_buffers = NULL; + if (ctx->hw_render && (ctx->hw_render->view_mask != subpass->view_mask)) + return false; + /* The hardware doesn't support replicating the stencil, so we need to store * the depth to memory if a stencil attachment is used as an input * attachment. @@ -2060,6 +2063,7 @@ pvr_merge_subpass(const struct pvr_device *device, ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE; ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE; ctx->hw_render->sample_count = input_subpass->sample_count; + ctx->hw_render->view_mask = input_subpass->view_mask; } /* Allocate a new subpass in the in-progress render. */ diff --git a/src/imagination/vulkan/pvr_hw_pass.h b/src/imagination/vulkan/pvr_hw_pass.h index b542ab0ad52..06941e038f3 100644 --- a/src/imagination/vulkan/pvr_hw_pass.h +++ b/src/imagination/vulkan/pvr_hw_pass.h @@ -255,9 +255,12 @@ struct pvr_renderpass_hwsetup_render { /* true if this HW render has lasting effects on its attachments. */ bool has_side_effects; - struct pvr_load_op *load_op; - bool requires_frag_pr; + + /* View mask for multiview. */ + uint32_t view_mask; + + struct pvr_load_op_state *load_op_state; }; struct pvr_renderpass_hw_map { diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index a477b132884..ac57b792e29 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -992,9 +992,11 @@ static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx, stream_ptr += pvr_cmd_length(CR_PPP_CTRL); pvr_csb_pack (stream_ptr, CR_TE_PSG, value) { + struct pvr_rt_dataset *rt_dataset = + job->view_state.rt_datasets[job->view_state.view_index]; value.completeonterminate = job->geometry_terminate; - value.region_stride = job->rt_dataset->rgn_headers_stride / + value.region_stride = rt_dataset->rgn_headers_stride / ROGUE_CR_TE_PSG_REGION_STRIDE_UNIT_SIZE; value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942); @@ -1015,9 +1017,9 @@ static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx, } stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0); - /* clang-format off */ - pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value); - /* clang-format on */ + pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value) { + value.idx = job->view_state.view_index; + } stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX); state->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)state->fw_stream; @@ -1070,7 +1072,8 @@ pvr_geom_state_flags_init(const struct pvr_render_job *const job, struct pvr_winsys_geometry_state_flags *flags) { *flags = (struct pvr_winsys_geometry_state_flags){ - .is_first_geometry = !job->rt_dataset->need_frag, + .is_first_geometry = + !job->view_state.rt_datasets[job->view_state.view_index]->need_frag, .is_last_geometry = job->geometry_terminate, .use_single_core = job->frag_uses_atomic_ops, }; @@ -1147,7 +1150,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, const struct pvr_device_runtime_info *dev_runtime_info = &pdevice->dev_runtime_info; const struct pvr_device_info *dev_info = &pdevice->dev_info; - const struct pvr_rt_dataset *rt_dataset = job->rt_dataset; + const struct pvr_rt_dataset *rt_dataset = + job->view_state.rt_datasets[job->view_state.view_index]; const enum ROGUE_CR_ISP_AA_MODE_TYPE isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples); struct pvr_rt_mtile_info tiling_info = { 0 }; @@ -1155,6 +1159,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, enum ROGUE_CR_ZLS_FORMAT_TYPE zload_format = ROGUE_CR_ZLS_FORMAT_TYPE_F32Z; uint32_t *stream_ptr = (uint32_t *)state->fw_stream; uint32_t *stream_len_ptr = stream_ptr; + uint32_t view_index; uint32_t pixel_ctl; uint32_t isp_ctl; @@ -1296,20 +1301,28 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, } stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM); - STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) == + STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pds_bgnd_reg_values) == ROGUE_NUM_CR_PDS_BGRND_WORDS); - STATIC_ASSERT(sizeof(job->pds_bgnd_reg_values[0]) == sizeof(uint64_t)); + STATIC_ASSERT(sizeof(job->view_state.view[0].pds_bgnd_reg_values[0]) == + sizeof(uint64_t)); + + if (job->view_state.force_pds_bgnd_reg_values_zero) + view_index = 0; + else + view_index = job->view_state.view_index; + memcpy(stream_ptr, - job->pds_bgnd_reg_values, - sizeof(job->pds_bgnd_reg_values)); + job->view_state.view[view_index].pds_bgnd_reg_values, + sizeof(job->view_state.view[view_index].pds_bgnd_reg_values)); stream_ptr += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64; - STATIC_ASSERT(ARRAY_SIZE(job->pr_pds_bgnd_reg_values) == + STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pr_pds_bgnd_reg_values) == ROGUE_NUM_CR_PDS_BGRND_WORDS); - STATIC_ASSERT(sizeof(job->pr_pds_bgnd_reg_values[0]) == sizeof(uint64_t)); + STATIC_ASSERT(sizeof(job->view_state.view[0].pr_pds_bgnd_reg_values[0]) == + sizeof(uint64_t)); memcpy(stream_ptr, - job->pr_pds_bgnd_reg_values, - sizeof(job->pr_pds_bgnd_reg_values)); + job->view_state.view[view_index].pr_pds_bgnd_reg_values, + sizeof(job->view_state.view[view_index].pr_pds_bgnd_reg_values)); stream_ptr += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64; #undef DWORDS_PER_U64 @@ -1445,9 +1458,9 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, stream_ptr += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM); } - /* clang-format off */ - pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value); - /* clang-format on */ + pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value) { + value.idx = job->view_state.view_index; + } stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX); /* Make sure that the pvr_frag_km_...() function is returning the correct @@ -1456,8 +1469,14 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream == pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info)); + if (job->view_state.force_pds_pixel_event_data_offset_zero) + view_index = 0; + else + view_index = job->view_state.view_index; + pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) { - value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset); + value.addr = PVR_DEV_ADDR( + job->view_state.view[view_index].pds_pixel_event_data_offset); } stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA); @@ -1599,6 +1618,14 @@ static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state( pvr_frag_km_stream_pbe_reg_words_offset(dev_info); const uint32_t eot_data_addr_byte_offset = pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info); + const uint32_t view_index = + job->view_state.force_pds_pixel_event_data_offset_zero + ? 0 + : job->view_state.view_index; + const uint32_t pr_pds_pixel_event_data_offset = + job->view_state.use_pds_pixel_event_data_offset + ? job->view_state.view[view_index].pds_pixel_event_data_offset + : job->pr_pds_pixel_event_data_offset; /* Massive copy :( */ *state = *frag; @@ -1613,10 +1640,11 @@ static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state( assert(state->fw_stream_len >= eot_data_addr_byte_offset + PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA))); + pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset], CR_EVENT_PIXEL_PDS_DATA, eot_pds_data) { - eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset); + eot_pds_data.addr = PVR_DEV_ADDR(pr_pds_pixel_event_data_offset); } } @@ -1629,8 +1657,10 @@ static void pvr_render_job_ws_submit_info_init( { memset(submit_info, 0, sizeof(*submit_info)); - submit_info->rt_dataset = job->rt_dataset->ws_rt_dataset; - submit_info->rt_data_idx = job->rt_dataset->rt_data_idx; + submit_info->rt_dataset = + job->view_state.rt_datasets[job->view_state.view_index]->ws_rt_dataset; + submit_info->rt_data_idx = + job->view_state.rt_datasets[job->view_state.view_index]->rt_data_idx; submit_info->frame_num = ctx->device->global_queue_present_count; submit_info->job_num = ctx->device->global_cmd_buffer_submit_count; @@ -1671,7 +1701,8 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag) { - struct pvr_rt_dataset *rt_dataset = job->rt_dataset; + struct pvr_rt_dataset *rt_dataset = + job->view_state.rt_datasets[job->view_state.view_index]; struct pvr_winsys_render_submit_info submit_info; struct pvr_device *device = ctx->device; VkResult result; diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h index 66343dc20e5..50c0761841f 100644 --- a/src/imagination/vulkan/pvr_job_render.h +++ b/src/imagination/vulkan/pvr_job_render.h @@ -71,8 +71,6 @@ struct pvr_rt_mtile_info { * (although it doesn't subclass). */ struct pvr_render_job { - struct pvr_rt_dataset *rt_dataset; - struct { bool run_frag : 1; bool geometry_terminate : 1; @@ -88,7 +86,7 @@ struct pvr_render_job { bool z_only_render : 1; }; - uint32_t pds_pixel_event_data_offset; + /* PDS pixel event for partial renders do not depend on the view index. */ uint32_t pr_pds_pixel_event_data_offset; pvr_dev_addr_t ctrl_stream_addr; @@ -154,8 +152,33 @@ struct pvr_render_job { "CR_PDS_BGRND3_SIZEINFO cannot be stored in uint64_t"); static_assert(ROGUE_NUM_CR_PDS_BGRND_WORDS == 3, "Cannot store all CR_PDS_BGRND words"); - uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; - uint64_t pr_pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; + + struct pvr_view_state { + struct { + uint32_t pds_pixel_event_data_offset; + uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; + uint64_t pr_pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; + } view[PVR_MAX_MULTIVIEW]; + + /* True if pds_pixel_event_data_offset should be taken from the first + * element of the view array. Otherwise view_index should be used. + */ + bool force_pds_pixel_event_data_offset_zero : 1; + + /* True if a partial render job uses the same EOT program data for a + * pixel event as the fragment job and not from the scratch buffer. + */ + bool use_pds_pixel_event_data_offset : 1; + + /* True if first_pds_bgnd_reg_values should be taken from the first + * element of the view array. Otherwise view_index should be used. + */ + bool force_pds_bgnd_reg_values_zero : 1; + + struct pvr_rt_dataset **rt_datasets; + + uint32_t view_index; + } view_state; }; void pvr_rt_mtile_info_init(const struct pvr_device_info *dev_info, diff --git a/src/imagination/vulkan/pvr_pass.c b/src/imagination/vulkan/pvr_pass.c index 7464a25f52a..a220d626597 100644 --- a/src/imagination/vulkan/pvr_pass.c +++ b/src/imagination/vulkan/pvr_pass.c @@ -384,32 +384,90 @@ pvr_subpass_load_op_init(struct pvr_device *device, return result; } + load_op->view_count = 0; + u_foreach_bit (view_idx, hw_render->view_mask) { + load_op->view_indices[load_op->view_count] = view_idx; + load_op->view_count++; + } + hw_render->subpasses[hw_subpass_idx].load_op = load_op; return VK_SUCCESS; } -static VkResult -pvr_render_load_op_init(struct pvr_device *device, - const VkAllocationCallbacks *allocator, - const struct pvr_render_pass *pass, - struct pvr_renderpass_hwsetup_render *hw_render) +struct pvr_per_view_attachment_first_use_info { + uint32_t *first_subpass[PVR_MAX_MULTIVIEW]; + uint32_t *first_subpass_memory; +}; + +/** + * \brief Returns true if a clear op is needed instead of the hw render reported + * load op load. + * + * The hw render isn't aware of multiview renders so it thinks we're reusing the + * attachment of a previous subpass even if it's the first time the attachment + * is used in the render pass, so a clear op gets reported as a load op load + * instead. + */ +/* FIXME: Investigate whether we can change the HW render code so it reports + * the correct load operation. This will mean we can get rid of struct + * pvr_per_view_attachment_first_use_info and struct pvr_load_op_state. + * Instead we'll be able to have a single render struct load_op like we do for + * subpasses. + */ +static bool pvr_render_load_op_multiview_load_should_be_clear( + const struct pvr_render_pass *pass, + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t hw_render_index, + const struct pvr_renderpass_colorinit *color_init, + const struct pvr_per_view_attachment_first_use_info *first_use_info, + uint32_t view_index) { - VkResult result; + uint32_t first_use_view_index; - struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc, - allocator, - sizeof(*load_op), - 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!load_op) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + if (!pass->multiview_enabled) + return false; + /* Check we have a load op load to see if we might need to correct the hw + * render. + */ + if (color_init->op != VK_ATTACHMENT_LOAD_OP_LOAD) + return false; + + first_use_view_index = + first_use_info->first_subpass[view_index][color_init->index]; + + /* Check that we're looking at the render where the attachment is used for + * the first time. + */ + if (first_use_view_index != hw_render_index) + return false; + + /* Check that the original load op was a clear op. */ + if (pass->attachments[color_init->index].load_op != + VK_ATTACHMENT_LOAD_OP_CLEAR) { + return false; + } + + return true; +} + +static VkResult pvr_render_load_op_init( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_load_op *const load_op, + const struct pvr_render_pass *pass, + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t hw_render_index, + uint32_t view_index, + const struct pvr_per_view_attachment_first_use_info *first_use_info) +{ load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG; assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); for (uint32_t i = 0; i < hw_render->color_init_count; i++) { struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i]; + bool multiview_load_op_should_be_clear = false; assert(color_init->index < pass->attachment_count); load_op->clears_loads_state.dest_vk_format[i] = @@ -418,37 +476,115 @@ pvr_render_load_op_init(struct pvr_device *device, if (pass->attachments[color_init->index].sample_count > 1) load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i); - if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD) - load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i); - else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR) + multiview_load_op_should_be_clear = + pvr_render_load_op_multiview_load_should_be_clear(pass, + hw_render, + hw_render_index, + color_init, + first_use_info, + view_index); + + if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR || + multiview_load_op_should_be_clear) { load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i); + } else if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD) { + load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i); + } } load_op->is_hw_object = true; load_op->hw_render = hw_render; load_op->clears_loads_state.mrt_setup = &hw_render->init_setup; + load_op->view_indices[0] = view_index; + load_op->view_count = 1; - result = pvr_load_op_shader_generate(device, allocator, load_op); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, allocator, load_op); - return result; - } + return pvr_load_op_shader_generate(device, allocator, load_op); +} - hw_render->load_op = load_op; - - return VK_SUCCESS; +static void pvr_load_op_fini(struct pvr_load_op *load_op) +{ + pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo); + pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo); + pvr_bo_suballoc_free(load_op->usc_frag_prog_bo); } static void pvr_load_op_destroy(struct pvr_device *device, const VkAllocationCallbacks *allocator, struct pvr_load_op *load_op) { - pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo); - pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo); - pvr_bo_suballoc_free(load_op->usc_frag_prog_bo); + pvr_load_op_fini(load_op); vk_free2(&device->vk.alloc, allocator, load_op); } +static void +pvr_render_load_op_state_destroy(struct pvr_device *device, + const VkAllocationCallbacks *pAllocator, + struct pvr_load_op_state *load_op_state) +{ + if (!load_op_state) + return; + + while (load_op_state->load_op_count--) { + const uint32_t load_op_idx = load_op_state->load_op_count; + struct pvr_load_op *load_op = &load_op_state->load_ops[load_op_idx]; + + pvr_load_op_fini(load_op); + } + + vk_free2(&device->vk.alloc, pAllocator, load_op_state); +} + +static VkResult pvr_render_load_op_state_create( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + const struct pvr_render_pass *pass, + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t hw_render_index, + const struct pvr_per_view_attachment_first_use_info *first_use_info, + struct pvr_load_op_state **const load_op_state_out) +{ + const uint32_t view_count = util_bitcount(hw_render->view_mask); + struct pvr_load_op_state *load_op_state; + struct pvr_load_op *load_ops; + VkResult result; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &load_op_state, __typeof__(*load_op_state), 1); + vk_multialloc_add(&ma, &load_ops, __typeof__(*load_ops), view_count); + + if (!vk_multialloc_zalloc(&ma, allocator, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + load_op_state->load_ops = load_ops; + + u_foreach_bit (view_idx, hw_render->view_mask) { + struct pvr_load_op *const load_op = + &load_op_state->load_ops[load_op_state->load_op_count]; + + result = pvr_render_load_op_init(device, + allocator, + load_op, + pass, + hw_render, + hw_render_index, + view_idx, + first_use_info); + if (result != VK_SUCCESS) + goto err_load_op_state_destroy; + + load_op_state->load_op_count++; + } + + *load_op_state_out = load_op_state; + + return VK_SUCCESS; + +err_load_op_state_destroy: + pvr_render_load_op_state_destroy(device, allocator, load_op_state); + + return result; +} + #define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \ ({ \ int __ret = PVR_MAX_TILE_BUFFER_COUNT; \ @@ -486,29 +622,94 @@ pvr_is_load_op_needed(const struct pvr_render_pass *pass, return false; } -static void -pvr_render_pass_load_ops_cleanup(struct pvr_device *device, - const VkAllocationCallbacks *pAllocator, - struct pvr_render_pass *pass) +static VkResult pvr_per_view_attachment_first_use_info_init( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_render_pass *pass, + struct pvr_per_view_attachment_first_use_info *first_use_info) { - if (!pass) - return; + size_t alloc_size; - for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { - struct pvr_renderpass_hwsetup_render *hw_render = - &pass->hw_setup->renders[i]; + if (!pass->attachment_count) { + memset(first_use_info, 0, sizeof(*first_use_info)); - for (uint32_t j = 0; j < hw_render->subpass_count; j++) { - if (hw_render->subpasses[j].load_op) { - pvr_load_op_destroy(device, - pAllocator, - hw_render->subpasses[j].load_op); + return VK_SUCCESS; + } + + STATIC_ASSERT(ARRAY_SIZE(first_use_info->first_subpass) == + PVR_MAX_MULTIVIEW); + + alloc_size = + sizeof(first_use_info->first_subpass_memory[0]) * pass->attachment_count; + alloc_size *= ARRAY_SIZE(first_use_info->first_subpass); + + first_use_info->first_subpass_memory = + vk_zalloc2(&device->vk.alloc, + allocator, + alloc_size, + 4, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!first_use_info->first_subpass_memory) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + +#define PVR_SUBPASS_INVALID (~0U) + for (uint32_t i = 0; i < ARRAY_SIZE(first_use_info->first_subpass); i++) { + first_use_info->first_subpass[i] = + &first_use_info->first_subpass_memory[i * pass->attachment_count]; + + for (uint32_t j = 0; j < pass->attachment_count; j++) + first_use_info->first_subpass[i][j] = PVR_SUBPASS_INVALID; + } + + for (uint32_t subpass_idx = 0; subpass_idx < pass->subpass_count; + subpass_idx++) { + struct pvr_render_subpass *const subpass = &pass->subpasses[subpass_idx]; + + u_foreach_bit (view_idx, subpass->view_mask) { + for (uint32_t i = 0; i < subpass->color_count; i++) { + const uint32_t attach_idx = subpass->color_attachments[i]; + uint32_t *first_use = + &first_use_info->first_subpass[view_idx][attach_idx]; + + if (attach_idx < pass->attachment_count && + *first_use == PVR_SUBPASS_INVALID) { + *first_use = subpass_idx; + } + } + + for (uint32_t i = 0; i < subpass->input_count; i++) { + const uint32_t input_attach_idx = + subpass->input_attachments[i].attachment_idx; + uint32_t *first_use = + &first_use_info->first_subpass[view_idx][input_attach_idx]; + + if (input_attach_idx < pass->attachment_count && + *first_use == PVR_SUBPASS_INVALID) { + *first_use = subpass_idx; + } + } + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const uint32_t ds_attach_idx = subpass->depth_stencil_attachment; + uint32_t *first_use = + &first_use_info->first_subpass[view_idx][ds_attach_idx]; + + if (*first_use == PVR_SUBPASS_INVALID) + *first_use = subpass_idx; } } - - if (hw_render->load_op) - pvr_load_op_destroy(device, pAllocator, hw_render->load_op); } +#undef PVR_SUBPASS_INVALID + + return VK_SUCCESS; +} + +static inline void pvr_per_view_attachment_first_use_info_fini( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_per_view_attachment_first_use_info *first_use_info) +{ + vk_free2(&device->vk.alloc, allocator, first_use_info->first_subpass_memory); } static inline VkResult pvr_render_add_missing_output_register_write( @@ -551,55 +752,173 @@ static inline VkResult pvr_render_add_missing_output_register_write( return VK_SUCCESS; } +static inline void +pvr_subpass_load_op_cleanup(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t subpass_count) +{ + while (subpass_count--) { + const uint32_t subpass_idx = subpass_count; + + if (hw_render->subpasses[subpass_idx].load_op) { + pvr_load_op_destroy(device, + allocator, + hw_render->subpasses[subpass_idx].load_op); + } + } +} + +static inline VkResult +pvr_subpass_load_op_setup(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_render_pass *pass, + struct pvr_renderpass_hwsetup_render *hw_render) +{ + for (uint32_t i = 0; i < hw_render->subpass_count; i++) { + VkResult result; + + if (!pvr_is_load_op_needed(pass, hw_render, i)) + continue; + + result = pvr_subpass_load_op_init(device, allocator, pass, hw_render, i); + if (result != VK_SUCCESS) { + /* pvr_subpass_load_op_setup() is responsible for cleaning + * up all load_ops created in this loop for this hw_render. + */ + pvr_subpass_load_op_cleanup(device, allocator, hw_render, i); + return result; + } + } + + return VK_SUCCESS; +} + +static inline VkResult pvr_hw_render_load_ops_setup( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_render_pass *pass, + struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t hw_render_idx, + struct pvr_per_view_attachment_first_use_info *first_use_info) +{ + VkResult result; + + if (hw_render->tile_buffers_count) { + result = pvr_device_tile_buffer_ensure_cap( + device, + hw_render->tile_buffers_count, + hw_render->eot_setup.tile_buffer_size); + if (result != VK_SUCCESS) + return result; + } + + assert(!hw_render->load_op_state); + + if (hw_render->color_init_count != 0U) { + struct pvr_load_op_state *load_op_state = NULL; + + result = + pvr_render_add_missing_output_register_write(hw_render, allocator); + if (result != VK_SUCCESS) + return result; + + result = pvr_render_load_op_state_create(device, + allocator, + pass, + hw_render, + hw_render_idx, + first_use_info, + &load_op_state); + if (result != VK_SUCCESS) + return result; + + hw_render->load_op_state = load_op_state; + } + + result = pvr_subpass_load_op_setup(device, allocator, pass, hw_render); + if (result != VK_SUCCESS) { + /* pvr_hw_render_load_ops_setup() is responsible for cleaning up only + * one load_op_state for this hw_render. + */ + pvr_render_load_op_state_destroy(device, + allocator, + hw_render->load_op_state); + return result; + } + + return VK_SUCCESS; +} + +static void +pvr_render_pass_load_ops_cleanup(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_render_pass *pass, + uint32_t hw_render_count) +{ + while (hw_render_count--) { + const uint32_t hw_render_idx = hw_render_count; + struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[hw_render_idx]; + + pvr_subpass_load_op_cleanup(device, + allocator, + hw_render, + hw_render->subpass_count); + pvr_render_load_op_state_destroy(device, + allocator, + hw_render->load_op_state); + } +} + static VkResult pvr_render_pass_load_ops_setup(struct pvr_device *device, const VkAllocationCallbacks *allocator, struct pvr_render_pass *pass) { + struct pvr_per_view_attachment_first_use_info first_use_info; + uint32_t hw_render_idx; VkResult result; - for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { + result = pvr_per_view_attachment_first_use_info_init(device, + allocator, + pass, + &first_use_info); + if (result != VK_SUCCESS) + goto err_return; + + for (hw_render_idx = 0; hw_render_idx < pass->hw_setup->render_count; + hw_render_idx++) { struct pvr_renderpass_hwsetup_render *hw_render = - &pass->hw_setup->renders[i]; + &pass->hw_setup->renders[hw_render_idx]; - if (hw_render->tile_buffers_count) { - result = pvr_device_tile_buffer_ensure_cap( - device, - hw_render->tile_buffers_count, - hw_render->eot_setup.tile_buffer_size); - if (result != VK_SUCCESS) - goto err_load_op_cleanup; - } - - assert(!hw_render->load_op); - - if (hw_render->color_init_count != 0U) { - result = - pvr_render_add_missing_output_register_write(hw_render, allocator); - if (result != VK_SUCCESS) - goto err_load_op_cleanup; - - result = pvr_render_load_op_init(device, allocator, pass, hw_render); - if (result != VK_SUCCESS) - goto err_load_op_cleanup; - } - - for (uint32_t j = 0; j < hw_render->subpass_count; j++) { - if (!pvr_is_load_op_needed(pass, hw_render, j)) - continue; - - result = - pvr_subpass_load_op_init(device, allocator, pass, hw_render, j); - if (result != VK_SUCCESS) - goto err_load_op_cleanup; - } + result = pvr_hw_render_load_ops_setup(device, + allocator, + pass, + hw_render, + hw_render_idx, + &first_use_info); + if (result != VK_SUCCESS) + goto err_pvr_render_pass_load_ops_cleanup; } + pvr_per_view_attachment_first_use_info_fini(device, + allocator, + &first_use_info); + return VK_SUCCESS; -err_load_op_cleanup: - pvr_render_pass_load_ops_cleanup(device, allocator, pass); +err_pvr_render_pass_load_ops_cleanup: + /* pvr_render_pass_load_ops_setup() is responsible for cleaning + * up all load_ops created in this loop for each hw_render. + */ + pvr_render_pass_load_ops_cleanup(device, allocator, pass, hw_render_idx); + pvr_per_view_attachment_first_use_info_fini(device, + allocator, + &first_use_info); + +err_return: return result; } @@ -718,6 +1037,13 @@ VkResult pvr_CreateRenderPass2(VkDevice _device, } } + /* Multiview is considered enabled for all subpasses when the viewMask + * of them all isn't 0. Assume this now and assert later that it holds + * for each subpass viewMask. + */ + pass->multiview_enabled = pass->subpass_count && + pCreateInfo->pSubpasses[0].viewMask; + /* Assign reference pointers to lists, and fill in the attachments list, we * need to re-walk the dependencies array later to fill the per-subpass * dependencies lists in. @@ -727,6 +1053,12 @@ VkResult pvr_CreateRenderPass2(VkDevice _device, struct pvr_render_subpass *subpass = &pass->subpasses[i]; subpass->pipeline_bind_point = desc->pipelineBindPoint; + subpass->view_mask = desc->viewMask; + + assert(!pass->multiview_enabled || subpass->view_mask); + + if (!pass->multiview_enabled) + subpass->view_mask = 1; /* From the Vulkan spec. 1.3.265 * VUID-VkSubpassDescription2-multisampledRenderToSingleSampled-06872: @@ -873,7 +1205,10 @@ void pvr_DestroyRenderPass(VkDevice _device, if (!pass) return; - pvr_render_pass_load_ops_cleanup(device, allocator, pass); + pvr_render_pass_load_ops_cleanup(device, + allocator, + pass, + pass->hw_setup->render_count); pvr_destroy_renderpass_hwsetup(allocator, pass->hw_setup); vk_object_base_finish(&pass->base); vk_free2(&device->vk.alloc, pAllocator, pass); diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index e7cd5b8504d..49673af035b 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -2834,11 +2834,7 @@ pvr_create_renderpass_state(const VkGraphicsPipelineCreateInfo *const info) return (struct vk_render_pass_state){ .attachments = attachments, - - /* TODO: This is only needed for VK_KHR_create_renderpass2 (or core 1.2), - * which is not currently supported. - */ - .view_mask = 0, + .view_mask = subpass->view_mask, }; } diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index a8f0ace6c7d..4223ca4ed6d 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -518,6 +518,8 @@ struct pvr_sub_cmd_gfx { bool wait_on_previous_transfer; bool has_depth_feedback; + + uint32_t view_mask; }; struct pvr_sub_cmd_compute { @@ -1009,11 +1011,11 @@ struct pvr_query_info { }; struct pvr_render_target { - struct pvr_rt_dataset *rt_dataset; + struct pvr_rt_dataset *rt_dataset[PVR_MAX_MULTIVIEW]; pthread_mutex_t mutex; - bool valid; + uint32_t valid_mask; }; struct pvr_framebuffer { @@ -1101,6 +1103,9 @@ struct pvr_render_subpass { uint32_t isp_userpass; VkPipelineBindPoint pipeline_bind_point; + + /* View mask for multiview. */ + uint32_t view_mask; }; struct pvr_render_pass { @@ -1123,6 +1128,13 @@ struct pvr_render_pass { /* The maximum number of tile buffers to use in any subpass. */ uint32_t max_tilebuffer_count; + + /* VkSubpassDescription2::viewMask or 1 when non-multiview + * + * To determine whether multiview is enabled, check + * pvr_render_pass::multiview_enabled. + */ + bool multiview_enabled; }; /* Max render targets for the clears loads state in load op. @@ -1166,6 +1178,10 @@ struct pvr_load_op { const struct usc_mrt_setup *mrt_setup; } clears_loads_state; + + uint32_t view_indices[PVR_MAX_MULTIVIEW]; + + uint32_t view_count; }; #define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits) \ @@ -1185,6 +1201,15 @@ CHECK_MASK_SIZE(pvr_load_op, #undef CHECK_MASK_SIZE +struct pvr_load_op_state { + uint32_t load_op_count; + + /* Load op array indexed by HW render view (not by the index in the view + * mask). + */ + struct pvr_load_op *load_ops; +}; + uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( const struct pvr_device_info *dev_info, const struct pvr_device_runtime_info *dev_runtime_info, @@ -1428,6 +1453,20 @@ void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer, const struct pvr_renderpass_hwsetup_subpass * pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass); +static inline void +pvr_render_targets_datasets_destroy(struct pvr_render_target *render_target) +{ + u_foreach_bit (valid_idx, render_target->valid_mask) { + struct pvr_rt_dataset *rt_dataset = render_target->rt_dataset[valid_idx]; + + if (rt_dataset && render_target->valid_mask & BITFIELD_BIT(valid_idx)) + pvr_render_target_dataset_destroy(rt_dataset); + + render_target->rt_dataset[valid_idx] = NULL; + render_target->valid_mask &= ~BITFIELD_BIT(valid_idx); + } +} + VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer, vk.base, VkCommandBuffer, diff --git a/src/imagination/vulkan/pvr_query.c b/src/imagination/vulkan/pvr_query.c index f8b65113eb6..0c67a6df042 100644 --- a/src/imagination/vulkan/pvr_query.c +++ b/src/imagination/vulkan/pvr_query.c @@ -394,6 +394,24 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, }; } +static inline const uint32_t +pvr_cmd_buffer_state_get_view_count(const struct pvr_cmd_buffer_state *state) +{ + const struct pvr_render_pass_info *render_pass_info = + &state->render_pass_info; + const struct pvr_sub_cmd_gfx *gfx_sub_cmd = &state->current_sub_cmd->gfx; + const uint32_t hw_render_idx = gfx_sub_cmd->hw_render_idx; + const struct pvr_renderpass_hwsetup_render *hw_render = + &render_pass_info->pass->hw_setup->renders[hw_render_idx]; + const uint32_t view_count = util_bitcount(hw_render->view_mask); + + assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); + /* hw_render view masks have 1 bit set at least. */ + assert(view_count); + + return view_count; +} + void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, @@ -401,6 +419,7 @@ void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, { VK_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + uint32_t view_count = 1; VK_FROM_HANDLE(pvr_query_pool, pool, queryPool); PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); @@ -435,6 +454,8 @@ void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, state->current_sub_cmd->gfx.barrier_store = false; state->current_sub_cmd->gfx.query_pool = pool; } + + view_count = pvr_cmd_buffer_state_get_view_count(state); } state->query_pool = pool; @@ -443,7 +464,8 @@ void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, state->dirty.vis_test = true; /* Add the index to the list for this render. */ - util_dynarray_append(&state->query_indices, __typeof__(query), query); + for (uint32_t i = 0; i < view_count; i++) + util_dynarray_append(&state->query_indices, __typeof__(query), query); } void pvr_CmdEndQuery(VkCommandBuffer commandBuffer, diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c index 90f57016fdb..4fee75407be 100644 --- a/src/imagination/vulkan/pvr_queue.c +++ b/src/imagination/vulkan/pvr_queue.c @@ -213,16 +213,23 @@ static void pvr_update_job_syncs(struct pvr_device *device, queue->last_job_signal_sync[submitted_job_type] = new_signal_sync; } -static VkResult pvr_process_graphics_cmd(struct pvr_device *device, - struct pvr_queue *queue, - struct pvr_cmd_buffer *cmd_buffer, - struct pvr_sub_cmd_gfx *sub_cmd) +static VkResult +pvr_process_graphics_cmd_for_view(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_gfx *sub_cmd, + uint32_t view_index) { pvr_dev_addr_t original_ctrl_stream_addr = { 0 }; + struct pvr_render_job *job = &sub_cmd->job; struct vk_sync *geom_signal_sync; struct vk_sync *frag_signal_sync = NULL; VkResult result; + job->ds.addr = + PVR_DEV_ADDR_OFFSET(job->ds.addr, job->ds.stride * view_index); + job->view_state.view_index = view_index; + result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, @@ -231,7 +238,7 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device, if (result != VK_SUCCESS) return result; - if (sub_cmd->job.run_frag) { + if (job->run_frag) { result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, @@ -254,11 +261,11 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device, * and if geometry_terminate is false this kick can't have a fragment * stage without another terminating geometry kick. */ - assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag); + assert(job->geometry_terminate && job->run_frag); /* First submit must not touch fragment work. */ - sub_cmd->job.geometry_terminate = false; - sub_cmd->job.run_frag = false; + job->geometry_terminate = false; + job->run_frag = false; result = pvr_render_job_submit(queue->gfx_ctx, @@ -268,20 +275,19 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device, NULL, NULL); - sub_cmd->job.geometry_terminate = true; - sub_cmd->job.run_frag = true; + job->geometry_terminate = true; + job->run_frag = true; if (result != VK_SUCCESS) goto err_destroy_frag_sync; - original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr; + original_ctrl_stream_addr = job->ctrl_stream_addr; /* Second submit contains only a trivial control stream to terminate the * geometry work. */ assert(sub_cmd->terminate_ctrl_stream); - sub_cmd->job.ctrl_stream_addr = - sub_cmd->terminate_ctrl_stream->vma->dev_addr; + job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr; } result = pvr_render_job_submit(queue->gfx_ctx, @@ -292,14 +298,14 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device, frag_signal_sync); if (original_ctrl_stream_addr.addr > 0) - sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr; + job->ctrl_stream_addr = original_ctrl_stream_addr; if (result != VK_SUCCESS) goto err_destroy_frag_sync; pvr_update_job_syncs(device, queue, geom_signal_sync, PVR_JOB_TYPE_GEOM); - if (sub_cmd->job.run_frag) + if (job->run_frag) pvr_update_job_syncs(device, queue, frag_signal_sync, PVR_JOB_TYPE_FRAG); /* FIXME: DoShadowLoadOrStore() */ @@ -315,6 +321,30 @@ err_destroy_geom_sync: return result; } +static VkResult pvr_process_graphics_cmd(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_gfx *sub_cmd) +{ + const pvr_dev_addr_t ds_addr = sub_cmd->job.ds.addr; + + u_foreach_bit (view_idx, sub_cmd->view_mask) { + VkResult result; + + result = pvr_process_graphics_cmd_for_view(device, + queue, + cmd_buffer, + sub_cmd, + view_idx); + if (result != VK_SUCCESS) + return result; + } + + sub_cmd->job.ds.addr = ds_addr; + + return VK_SUCCESS; +} + static VkResult pvr_process_compute_cmd(struct pvr_device *device, struct pvr_queue *queue, struct pvr_sub_cmd_compute *sub_cmd)