From 9d48088428bfe52d234319d2ec9c5b01d56edbc8 Mon Sep 17 00:00:00 2001 From: Peter Quayle Date: Tue, 6 May 2025 18:30:30 +0200 Subject: [PATCH] pvr: add view index support for vertex shaders Signed-off-by: Peter Quayle Co-authored-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/vulkan/pds/pvr_pds.c | 54 ++++++++++++ src/imagination/vulkan/pds/pvr_pds.h | 17 ++++ src/imagination/vulkan/pds/pvr_pipeline_pds.c | 28 +++++++ src/imagination/vulkan/pvr_cmd_buffer.c | 83 +++++++++++++++++++ src/imagination/vulkan/pvr_device.c | 76 ++++++++++++++++- src/imagination/vulkan/pvr_private.h | 8 ++ src/imagination/vulkan/pvr_queue.c | 5 ++ 7 files changed, 270 insertions(+), 1 deletion(-) diff --git a/src/imagination/vulkan/pds/pvr_pds.c b/src/imagination/vulkan/pds/pvr_pds.c index 19c2e37823f..60f581807ed 100644 --- a/src/imagination/vulkan/pds/pvr_pds.c +++ b/src/imagination/vulkan/pds/pvr_pds.c @@ -4569,6 +4569,60 @@ uint32_t *pvr_pds_generate_stream_out_terminate_program( return NULL; } +uint32_t *pvr_pds_generate_view_index_init_program( + struct pvr_pds_view_index_init_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode) +{ + uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE; + uint32_t temps_used = 0; + uint32_t data_size = 0; + uint32_t code_size = 0; + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT || + gen_mode == PDS_GENERATE_SIZES) { + const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT); +#define APPEND(X) \ + if (encode) { \ + *buffer = X; \ + buffer++; \ + } else { \ + code_size += sizeof(uint32_t); \ + } + + uint32_t view_index_temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); + uint32_t zero_temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); + + /* Load the view index into a view_index_temp. */ + APPEND( + pvr_pds_inst_encode_limm(0, view_index_temp, program->view_index, 0)); + + /* Load the zero into a zero_temp. */ + APPEND(pvr_pds_inst_encode_limm(0, zero_temp, 0, 0)); + + /* Copy the temp into ptemp. */ + APPEND(pvr_pds_inst_encode_add32( + 0, + 0, + 0, + view_index_temp, + zero_temp, + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER + PVR_PTEMP_VIEW_INDEX)); + + APPEND(pvr_pds_inst_encode_halt(0)); +#undef APPEND + + program->temps_used = temps_used; + } + + if (gen_mode == PDS_GENERATE_SIZES) { + program->code_size = code_size; + program->data_size = data_size; + } + + return buffer; +} + /* DrawArrays works in several steps: * * 1) load data from draw_indirect buffer diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h index 3d9fac760e6..4bb87161262 100644 --- a/src/imagination/vulkan/pds/pvr_pds.h +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -517,6 +517,14 @@ struct pvr_pds_stream_out_terminate_program { uint32_t stream_out_terminate_pds_code_size; }; +struct pvr_pds_view_index_init_program { + uint16_t view_index; + + uint32_t data_size; + uint32_t code_size; + uint32_t temps_used; +}; + /* Structure representing the PDS compute shader program. * This structure describes the USC code and compute buffers required * by the PDS compute task loading program @@ -815,6 +823,13 @@ uint32_t *pvr_pds_generate_stream_out_terminate_program( enum pvr_pds_generate_mode gen_mode, const struct pvr_device_info *dev_info); +#define PVR_PTEMP_VIEW_INDEX 4U + +uint32_t *pvr_pds_generate_view_index_init_program( + struct pvr_pds_view_index_init_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode); + /* Structure representing DrawIndirect PDS programs. */ struct pvr_pds_drawindirect_program { /* --- Input to pvr_pds_drawindirect_program --- */ @@ -965,6 +980,7 @@ struct pvr_pds_descriptor_program_input { #define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U) #define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U) +#define PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED BITFIELD_BIT(7U) #define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U) @@ -1002,6 +1018,7 @@ struct pvr_pds_vertex_primary_program_input { uint16_t base_instance_register; uint16_t base_vertex_register; uint16_t draw_index_register; + uint16_t view_index_register; }; #define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0) diff --git a/src/imagination/vulkan/pds/pvr_pipeline_pds.c b/src/imagination/vulkan/pds/pvr_pipeline_pds.c index 7a7acc51ea6..c48475e3c08 100644 --- a/src/imagination/vulkan/pds/pvr_pipeline_pds.c +++ b/src/imagination/vulkan/pds/pvr_pipeline_pds.c @@ -459,6 +459,7 @@ void pvr_pds_generate_vertex_primary_program( uint32_t write_base_instance_control = ~0; uint32_t write_base_vertex_control = ~0; uint32_t pvr_write_draw_index_control = ~0; + uint32_t write_view_index_control = ~0; uint32_t ddmad_count = 0; uint32_t doutw_count = 0; @@ -497,6 +498,8 @@ void pvr_pds_generate_vertex_primary_program( PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED); pvr_debug_pds_flag(input_program->flags, PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED); pvr_debug(" "); pvr_init_pds_const_map_entry_write_state(info, &entry_write_state); @@ -607,6 +610,12 @@ void pvr_pds_generate_vertex_primary_program( } } + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED) { + doutw_count++; + write_view_index_control = + pvr_find_constant(const_usage, RESERVE_32BIT, "View index DOUTW Ctrl"); + } + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { /* Load absolute instance id into uiInstanceIdTemp. */ PVR_PDS_MODE_TOGGLE( @@ -1451,6 +1460,25 @@ void pvr_pds_generate_vertex_primary_program( } } + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED) { + bool last_dma = (++running_dma_count == total_dma_count); + uint32_t data_mask = (PVR_PTEMP_VIEW_INDEX & 1) ? 0x2 : 0x1; + + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_dma, + false, + R64_C(write_view_index_control), + R64_P(PVR_PTEMP_VIEW_INDEX >> 1), + data_mask, + input_program->view_index_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } + doutu_address_entry = pvr_prepare_next_pds_const_map_entry(&entry_write_state, sizeof(*doutu_address_entry)); diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 06408ceb3f8..e2553dbadbd 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -98,6 +98,7 @@ static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, util_dynarray_fini(&sub_cmd->gfx.sec_query_indices); pvr_csb_finish(&sub_cmd->gfx.control_stream); pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.terminate_ctrl_stream); + pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.multiview_ctrl_stream); pvr_bo_suballoc_free(sub_cmd->gfx.depth_bias_bo); pvr_bo_suballoc_free(sub_cmd->gfx.scissor_bo); break; @@ -2242,6 +2243,76 @@ pvr_cmd_buffer_process_deferred_clears(struct pvr_cmd_buffer *cmd_buffer) return VK_SUCCESS; } +static VkResult +pvr_csb_gfx_build_view_index_ctrl_stream(struct pvr_device *const device, + pvr_dev_addr_t addr, + struct pvr_bo **bo, + uint32_t *stride) +{ + struct list_head bo_list; + struct pvr_csb csb; + VkResult result; + + pvr_csb_init(device, PVR_CMD_STREAM_TYPE_GRAPHICS, &csb); + + for (uint32_t i = 0; i < PVR_MAX_MULTIVIEW; ++i) { + struct pvr_pds_view_index_init_program *program = + &device->view_index_init_info[i]; + + pvr_csb_set_relocation_mark(&csb); + + pvr_csb_emit (&csb, VDMCTRL_PDS_STATE0, state_update0) { + state_update0.block_type = ROGUE_VDMCTRL_BLOCK_TYPE_PDS_STATE_UPDATE; + state_update0.dm_target = ROGUE_VDMCTRL_DM_TARGET_VDM; + state_update0.usc_target = ROGUE_VDMCTRL_USC_TARGET_ALL; + state_update0.usc_common_size = 0; + state_update0.usc_unified_size = 0; + state_update0.pds_temp_size = program->temps_used; + state_update0.pds_data_size = DIV_ROUND_UP( + PVR_DW_TO_BYTES(device->view_index_init_programs[i].data_size), + ROGUE_VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); + } + + pvr_csb_emit (&csb, VDMCTRL_PDS_STATE1, state_update1) { + state_update1.pds_data_addr = + device->view_index_init_programs[i].pvr_bo->dev_addr; + state_update1.sd_type = ROGUE_VDMCTRL_SD_TYPE_PDS; + state_update1.sd_next_type = ROGUE_VDMCTRL_SD_TYPE_USC; + } + + pvr_csb_emit (&csb, VDMCTRL_PDS_STATE2, state_update2) { + state_update2.pds_code_addr.addr = + device->view_index_init_programs[i].pvr_bo->dev_addr.addr + + PVR_DW_TO_BYTES(device->view_index_init_programs[i].data_size); + } + + pvr_csb_clear_relocation_mark(&csb); + + pvr_csb_emit_link(&csb, addr, false); + } + + result = pvr_csb_bake(&csb, &bo_list); + if (result != VK_SUCCESS) + goto err_csb_finish; + + assert(list_is_singular(&bo_list)); + *bo = list_first_entry(&bo_list, struct pvr_bo, link); + + /* This needs to be kept in sync with the instructions emitted above. */ + *stride = pvr_cmd_length(VDMCTRL_PDS_STATE0) + + pvr_cmd_length(VDMCTRL_PDS_STATE1) + + pvr_cmd_length(VDMCTRL_PDS_STATE2) + + pvr_cmd_length(VDMCTRL_STREAM_LINK0) + + pvr_cmd_length(VDMCTRL_STREAM_LINK1); + + return VK_SUCCESS; + +err_csb_finish: + pvr_csb_finish(&csb); + + return result; +} + VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) { struct pvr_cmd_buffer_state *state = &cmd_buffer->state; @@ -2329,6 +2400,16 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); + if (gfx_sub_cmd->multiview_enabled) { + result = pvr_csb_gfx_build_view_index_ctrl_stream( + device, + pvr_csb_get_start_address(&gfx_sub_cmd->control_stream), + &gfx_sub_cmd->multiview_ctrl_stream, + &gfx_sub_cmd->multiview_ctrl_stream_stride); + if (result != VK_SUCCESS) + return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); + } + result = pvr_sub_cmd_gfx_job_init(&device->pdevice->dev_info, cmd_buffer, gfx_sub_cmd); @@ -2571,6 +2652,8 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, sub_cmd->gfx.empty_cmd = true; sub_cmd->gfx.view_mask = pvr_render_pass_info_get_view_mask(&state->render_pass_info); + sub_cmd->gfx.multiview_enabled = + state->render_pass_info.pass->multiview_enabled; if (state->vis_test_enabled) sub_cmd->gfx.query_pool = state->query_pool; diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index c9c40a5277e..82c0d5f9c5a 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -1896,6 +1896,68 @@ err_free_nop_usc_bo: return result; } +static VkResult +pvr_device_init_view_index_init_programs(struct pvr_device *device) +{ + uint32_t *staging_buffer = NULL; + VkResult result; + + for (unsigned i = 0; i < PVR_MAX_MULTIVIEW; ++i) { + uint32_t staging_buffer_size; + struct pvr_pds_view_index_init_program *program = + &device->view_index_init_info[i]; + + program->view_index = i; + + pvr_pds_generate_view_index_init_program(program, + NULL, + PDS_GENERATE_SIZES); + + staging_buffer_size = program->data_size + program->code_size; + + staging_buffer = vk_realloc(&device->vk.alloc, + staging_buffer, + staging_buffer_size, + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (!staging_buffer) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + } + + pvr_pds_generate_view_index_init_program(program, + staging_buffer, + PDS_GENERATE_DATA_SEGMENT); + pvr_pds_generate_view_index_init_program( + program, + &staging_buffer[program->data_size], + PDS_GENERATE_CODE_SEGMENT); + + result = + pvr_gpu_upload_pds(device, + (program->data_size == 0 ? NULL : staging_buffer), + program->data_size / sizeof(uint32_t), + 16U, + &staging_buffer[program->data_size], + program->code_size / sizeof(uint32_t), + 16U, + 16U, + &device->view_index_init_programs[i]); + + if (result != VK_SUCCESS) + break; + } + + vk_free(&device->vk.alloc, staging_buffer); + + if (result != VK_SUCCESS) + for (uint32_t u = 0; u < PVR_MAX_MULTIVIEW; ++u) + pvr_bo_suballoc_free(device->view_index_init_programs[u].pvr_bo); + + return result; +} + static void pvr_device_init_tile_buffer_state(struct pvr_device *device) { simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain); @@ -2104,10 +2166,14 @@ VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice, if (result != VK_SUCCESS) goto err_pvr_free_compute_fence; - result = pvr_device_create_compute_query_programs(device); + result = pvr_device_init_view_index_init_programs(device); if (result != VK_SUCCESS) goto err_pvr_free_compute_empty; + result = pvr_device_create_compute_query_programs(device); + if (result != VK_SUCCESS) + goto err_pvr_free_view_index; + result = pvr_device_init_compute_idfwdf_state(device); if (result != VK_SUCCESS) goto err_pvr_destroy_compute_query_programs; @@ -2177,6 +2243,10 @@ err_pvr_destroy_compute_query_programs: err_pvr_free_compute_empty: pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo); +err_pvr_free_view_index: + for (uint32_t u = 0; u < PVR_MAX_MULTIVIEW; ++u) + pvr_bo_suballoc_free(device->view_index_init_programs[u].pvr_bo); + err_pvr_free_compute_fence: pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo); @@ -2229,6 +2299,10 @@ void pvr_DestroyDevice(VkDevice _device, pvr_device_finish_compute_idfwdf_state(device); pvr_device_destroy_compute_query_programs(device); pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo); + + for (uint32_t u = 0; u < PVR_MAX_MULTIVIEW; ++u) + pvr_bo_suballoc_free(device->view_index_init_programs[u].pvr_bo); + pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo); pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo); pvr_bo_suballoc_free(device->nop_program.usc); diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 4223ca4ed6d..724d03063f7 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -214,6 +214,10 @@ struct pvr_device { struct pvr_suballoc_bo *usc; } nop_program; + struct pvr_pds_view_index_init_program + view_index_init_info[PVR_MAX_MULTIVIEW]; + struct pvr_pds_upload view_index_init_programs[PVR_MAX_MULTIVIEW]; + /* Issue Data Fence, Wait for Data Fence state. */ struct { uint32_t usc_shareds; @@ -482,6 +486,9 @@ struct pvr_sub_cmd_gfx { /* Control stream builder object */ struct pvr_csb control_stream; + struct pvr_bo *multiview_ctrl_stream; + uint32_t multiview_ctrl_stream_stride; + /* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */ struct pvr_bo *terminate_ctrl_stream; @@ -520,6 +527,7 @@ struct pvr_sub_cmd_gfx { bool has_depth_feedback; uint32_t view_mask; + bool multiview_enabled; }; struct pvr_sub_cmd_compute { diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c index 5453edb3c6e..1045bea6f09 100644 --- a/src/imagination/vulkan/pvr_queue.c +++ b/src/imagination/vulkan/pvr_queue.c @@ -288,6 +288,11 @@ pvr_process_graphics_cmd_for_view(struct pvr_device *device, */ assert(sub_cmd->terminate_ctrl_stream); job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr; + } else if (sub_cmd->multiview_enabled) { + original_ctrl_stream_addr = job->ctrl_stream_addr; + job->ctrl_stream_addr.addr = + sub_cmd->multiview_ctrl_stream->vma->dev_addr.addr + + (view_index * PVR_DW_TO_BYTES(sub_cmd->multiview_ctrl_stream_stride)); } result = pvr_render_job_submit(queue->gfx_ctx,