From a4d7b343c59620c9f4223b27a5217a8b38fa6fb4 Mon Sep 17 00:00:00 2001 From: Ashish Chauhan Date: Tue, 22 Oct 2024 10:03:06 +0100 Subject: [PATCH] pvr: Add support for gpu multicore MC1 configurations Signed-off-by: Ashish Chauhan Acked-by: Erik Faye-Lund Part-of: --- .../include/hwdef/rogue_hw_utils.h | 4 +++ src/imagination/vulkan/pvr_device.c | 21 +++++++++--- src/imagination/vulkan/pvr_hw_pass.c | 1 + src/imagination/vulkan/pvr_job_compute.c | 4 +-- src/imagination/vulkan/pvr_job_context.c | 34 ++++++++++++------- src/imagination/vulkan/pvr_job_render.c | 11 +++--- 6 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h index 52d8b67882e..cbe281d93de 100644 --- a/src/imagination/include/hwdef/rogue_hw_utils.h +++ b/src/imagination/include/hwdef/rogue_hw_utils.h @@ -306,6 +306,10 @@ rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info) const uint32_t cdm_context_resume_buffer_stride = ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size); + /* + * TODO: Optimise buffer size based on the core_count, + * not max_num_cores + */ return cdm_context_resume_buffer_stride * max_num_cores; } diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 78dd9b3bd6f..84be6c0d1c5 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -301,6 +301,8 @@ static bool pvr_physical_device_get_properties( struct vk_properties *const properties) { const struct pvr_device_info *const dev_info = &pdevice->dev_info; + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; /* Default value based on the minimum value found in all existing cores. */ const uint32_t max_multisample = @@ -527,11 +529,20 @@ static bool pvr_physical_device_get_properties( .uniformTexelBufferOffsetSingleTexelAlignment = false, }; - snprintf(properties->deviceName, - sizeof(properties->deviceName), - "PowerVR %s %s", - dev_info->ident.series_name, - dev_info->ident.public_name); + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + snprintf(properties->deviceName, + sizeof(properties->deviceName), + "PowerVR %s %s MC%u", + dev_info->ident.series_name, + dev_info->ident.public_name, + dev_runtime_info->core_count); + } else { + snprintf(properties->deviceName, + sizeof(properties->deviceName), + "PowerVR %s %s", + dev_info->ident.series_name, + dev_info->ident.public_name); + } return true; } diff --git a/src/imagination/vulkan/pvr_hw_pass.c b/src/imagination/vulkan/pvr_hw_pass.c index eac7f445e55..8ed561eedce 100644 --- a/src/imagination/vulkan/pvr_hw_pass.c +++ b/src/imagination/vulkan/pvr_hw_pass.c @@ -357,6 +357,7 @@ pvr_get_tile_buffer_size_per_core(const struct pvr_device *device) uint32_t pvr_get_tile_buffer_size(const struct pvr_device *device) { /* On a multicore system duplicate the buffer for each core. */ + /* TODO: Optimise tile buffer size to use core_count, not max_num_cores. */ return pvr_get_tile_buffer_size_per_core(device) * rogue_get_max_num_cores(&device->pdevice->dev_info); } diff --git a/src/imagination/vulkan/pvr_job_compute.c b/src/imagination/vulkan/pvr_job_compute.c index 2ddf5046e3f..f65b802ab59 100644 --- a/src/imagination/vulkan/pvr_job_compute.c +++ b/src/imagination/vulkan/pvr_job_compute.c @@ -126,8 +126,8 @@ pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx, } if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { - pvr_finishme( - "Emit execute_count when feature gpu_multicore_support is present"); + if (device->pdevice->dev_runtime_info.core_count > 1) + pvr_finishme("Emit execute_count, core_count is greater than one"); *stream_ptr = 0; stream_ptr++; } diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c index f213555159a..ae7156c49d9 100644 --- a/src/imagination/vulkan/pvr_job_context.c +++ b/src/imagination/vulkan/pvr_job_context.c @@ -278,6 +278,7 @@ pvr_render_job_pt_programs_cleanup(struct pvr_device *device, } static void pvr_pds_ctx_sr_program_setup( + uint32_t core_count, bool cc_enable, uint64_t usc_program_upload_offset, uint8_t usc_temps, @@ -286,17 +287,22 @@ static void pvr_pds_ctx_sr_program_setup( { /* The PDS task is the same for stores and loads. */ *program_out = (struct pvr_pds_shared_storing_program){ - .cc_enable = cc_enable, - .doutw_control = { - .dest_store = PDS_UNIFIED_STORE, - .num_const64 = 2, - .doutw_data = { - [0] = sr_addr.addr, - [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE, - }, - .last_instruction = false, - }, - }; + .cc_enable = cc_enable, + .doutw_control = { + .dest_store = PDS_UNIFIED_STORE, + .num_const64 = 2, + .doutw_data = { + [0] = sr_addr.addr, + [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE, + }, + .last_instruction = false, + }, + }; + + if (core_count > 1) { + pvr_finishme( + "Handle LLS_USC_SHARED_REGS_BUFFER_SIZE in DOUTW data_control"); + } pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control, usc_program_upload_offset, @@ -330,7 +336,8 @@ static VkResult pvr_pds_render_ctx_sr_program_create_and_upload( ASSERTED uint32_t *buffer_end; uint32_t code_offset; - pvr_pds_ctx_sr_program_setup(false, + pvr_pds_ctx_sr_program_setup(device->pdevice->dev_runtime_info.core_count, + false, usc_program_upload_offset, usc_temps, sr_addr, @@ -389,7 +396,8 @@ static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload( uint32_t *buffer_ptr; uint32_t code_offset; - pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421), + pvr_pds_ctx_sr_program_setup(device->pdevice->dev_runtime_info.core_count, + PVR_HAS_ERN(dev_info, 35421), usc_program_upload_offset, usc_temps, sr_addr, diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index 4fb6f208e08..1af084e00ce 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1342,6 +1342,9 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, */ value.dbias_is_int = PVR_HAS_ERN(dev_info, 42307) && pvr_zls_format_type_is_int(job->ds.zls_format); + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) + value.skip_init_hdrs = true; } /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be * possible to fully pack CR_ISP_CTL above rather than having to OR in part @@ -1399,8 +1402,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA); if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { - pvr_finishme( - "Emit isp_oclqry_stride when feature gpu_multicore_support is present"); + if (device->pdevice->dev_runtime_info.core_count > 1) + pvr_finishme("Emit isp_oclqry_stride, core_count is greater than one"); *stream_ptr = 0; stream_ptr++; } @@ -1429,8 +1432,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, stream_ptr++; if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { - pvr_finishme( - "Emit execute_count when feature gpu_multicore_support is present"); + if (device->pdevice->dev_runtime_info.core_count > 1) + pvr_finishme("Emit execute_count core_count is greater than one"); *stream_ptr = 0; stream_ptr++; }