diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c index de43bb72aac..404e80df71a 100644 --- a/src/imagination/vulkan/pvr_job_context.c +++ b/src/imagination/vulkan/pvr_job_context.c @@ -36,6 +36,7 @@ #include "pvr_device.h" #include "pvr_job_context.h" #include "pvr_macros.h" +#include "pvr_mrt.h" #include "pvr_pass.h" #include "pvr_pds.h" #include "pvr_physical_device.h" diff --git a/src/imagination/vulkan/pvr_mrt.c b/src/imagination/vulkan/pvr_mrt.c index 8f6c25de9fc..e4d54b5827b 100644 --- a/src/imagination/vulkan/pvr_mrt.c +++ b/src/imagination/vulkan/pvr_mrt.c @@ -8,6 +8,7 @@ #include "vk_log.h" +#include "pvr_csb.h" #include "pvr_device.h" #include "pvr_formats.h" #include "pvr_physical_device.h" @@ -235,3 +236,180 @@ pvr_destroy_mrt_setup(const struct pvr_device *device, vk_free(&device->vk.alloc, setup->mrt_resources); } + +VkResult pvr_pds_unitex_state_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + uint32_t texture_kicks, + uint32_t uniform_kicks, + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_pds_pixel_shader_sa_program program = { + .num_texture_dma_kicks = texture_kicks, + .num_uniform_dma_kicks = uniform_kicks, + }; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program); + + staging_buffer_size = PVR_DW_TO_BYTES(program.code_size); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + NULL, + 0U, + 0U, + staging_buffer, + program.code_size, + 16U, + 16U, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +static VkResult pvr_pds_fragment_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + pco_shader *fs, + struct pvr_suballoc_bo *shader_bo, + struct pvr_pds_upload *pds_frag_prog, + bool msaa) +{ + struct pvr_pds_kickusc_program program = { 0 }; + pco_data *fs_data = pco_shader_data(fs); + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + const pvr_dev_addr_t exec_addr = + PVR_DEV_ADDR_OFFSET(shader_bo->dev_addr, fs_data->common.entry_offset); + + /* Note this is not strictly required to be done before calculating the + * staging_buffer_size in this particular case. It can also be done after + * allocating the buffer. The size from pvr_pds_kick_usc() is constant. + */ + pvr_pds_setup_doutu(&program.usc_task_control, + exec_addr.addr, + fs_data->common.temps, + msaa ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL + : ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, + fs_data->fs.uses.phase_change); + + pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES); + + staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_kick_usc(&program, + staging_buffer, + 0, + false, + PDS_GENERATE_CODEDATA_SEGMENTS); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + &staging_buffer[0], + program.data_size, + 16, + &staging_buffer[program.data_size], + program.code_size, + 16, + 16, + pds_frag_prog); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +VkResult +pvr_load_op_shader_generate(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_load_op *load_op) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = pvr_get_slc_cache_line_size(dev_info); + + pco_shader *loadop = pvr_uscgen_loadop(device->pdevice->pco_ctx, load_op); + + VkResult result = pvr_gpu_upload_usc(device, + pco_shader_binary_data(loadop), + pco_shader_binary_size(loadop), + cache_line_size, + &load_op->usc_frag_prog_bo); + + if (result != VK_SUCCESS) { + ralloc_free(loadop); + return result; + } + + const bool msaa = load_op->clears_loads_state.unresolved_msaa_mask & + load_op->clears_loads_state.rt_load_mask; + + result = + pvr_pds_fragment_program_create_and_upload(device, + allocator, + loadop, + load_op->usc_frag_prog_bo, + &load_op->pds_frag_prog, + msaa); + + load_op->temps_count = pco_shader_data(loadop)->common.temps; + ralloc_free(loadop); + + if (result != VK_SUCCESS) + goto err_free_usc_frag_prog_bo; + + /* Manually hard coding `texture_kicks` to 1 since we'll pack everything into + * one buffer to be DMAed. See `pvr_load_op_data_create_and_upload()`, where + * we upload the buffer and upload the code section. + */ + result = pvr_pds_unitex_state_program_create_and_upload( + device, + allocator, + 1U, + 0U, + &load_op->pds_tex_state_prog); + if (result != VK_SUCCESS) + goto err_free_pds_frag_prog; + + return VK_SUCCESS; + +err_free_pds_frag_prog: + pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo); + +err_free_usc_frag_prog_bo: + pvr_bo_suballoc_free(load_op->usc_frag_prog_bo); + + return result; +} diff --git a/src/imagination/vulkan/pvr_mrt.h b/src/imagination/vulkan/pvr_mrt.h index dbd88bf28c4..178bb039bda 100644 --- a/src/imagination/vulkan/pvr_mrt.h +++ b/src/imagination/vulkan/pvr_mrt.h @@ -168,6 +168,18 @@ CHECK_MASK_SIZE(pvr_load_op, #undef CHECK_MASK_SIZE +VkResult pvr_pds_unitex_state_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + uint32_t texture_kicks, + uint32_t uniform_kicks, + struct pvr_pds_upload *const pds_upload_out); + +VkResult pvr_load_op_shader_generate( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_load_op *load_op); + struct pvr_load_op_state { uint32_t load_op_count; diff --git a/src/imagination/vulkan/pvr_pass.c b/src/imagination/vulkan/pvr_pass.c index 7b357fa61ae..80928e1461c 100644 --- a/src/imagination/vulkan/pvr_pass.c +++ b/src/imagination/vulkan/pvr_pass.c @@ -137,183 +137,6 @@ static inline bool pvr_has_output_register_writes( return false; } -VkResult pvr_pds_unitex_state_program_create_and_upload( - struct pvr_device *device, - const VkAllocationCallbacks *allocator, - uint32_t texture_kicks, - uint32_t uniform_kicks, - struct pvr_pds_upload *const pds_upload_out) -{ - struct pvr_pds_pixel_shader_sa_program program = { - .num_texture_dma_kicks = texture_kicks, - .num_uniform_dma_kicks = uniform_kicks, - }; - uint32_t staging_buffer_size; - uint32_t *staging_buffer; - VkResult result; - - pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program); - - staging_buffer_size = PVR_DW_TO_BYTES(program.code_size); - - staging_buffer = vk_alloc2(&device->vk.alloc, - allocator, - staging_buffer_size, - 8U, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!staging_buffer) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer); - - /* FIXME: Figure out the define for alignment of 16. */ - result = pvr_gpu_upload_pds(device, - NULL, - 0U, - 0U, - staging_buffer, - program.code_size, - 16U, - 16U, - pds_upload_out); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, allocator, staging_buffer); - return result; - } - - vk_free2(&device->vk.alloc, allocator, staging_buffer); - - return VK_SUCCESS; -} - -static VkResult pvr_pds_fragment_program_create_and_upload( - struct pvr_device *device, - const VkAllocationCallbacks *allocator, - pco_shader *fs, - struct pvr_suballoc_bo *shader_bo, - struct pvr_pds_upload *pds_frag_prog, - bool msaa) -{ - struct pvr_pds_kickusc_program program = { 0 }; - pco_data *fs_data = pco_shader_data(fs); - uint32_t staging_buffer_size; - uint32_t *staging_buffer; - VkResult result; - - const pvr_dev_addr_t exec_addr = - PVR_DEV_ADDR_OFFSET(shader_bo->dev_addr, fs_data->common.entry_offset); - - /* Note this is not strictly required to be done before calculating the - * staging_buffer_size in this particular case. It can also be done after - * allocating the buffer. The size from pvr_pds_kick_usc() is constant. - */ - pvr_pds_setup_doutu(&program.usc_task_control, - exec_addr.addr, - fs_data->common.temps, - msaa ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL - : ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, - fs_data->fs.uses.phase_change); - - pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES); - - staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size); - - staging_buffer = vk_alloc2(&device->vk.alloc, - allocator, - staging_buffer_size, - 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!staging_buffer) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - pvr_pds_kick_usc(&program, - staging_buffer, - 0, - false, - PDS_GENERATE_CODEDATA_SEGMENTS); - - /* FIXME: Figure out the define for alignment of 16. */ - result = pvr_gpu_upload_pds(device, - &staging_buffer[0], - program.data_size, - 16, - &staging_buffer[program.data_size], - program.code_size, - 16, - 16, - pds_frag_prog); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, allocator, staging_buffer); - return result; - } - - vk_free2(&device->vk.alloc, allocator, staging_buffer); - - return VK_SUCCESS; -} - -static VkResult -pvr_load_op_shader_generate(struct pvr_device *device, - const VkAllocationCallbacks *allocator, - struct pvr_load_op *load_op) -{ - const struct pvr_device_info *dev_info = &device->pdevice->dev_info; - const uint32_t cache_line_size = pvr_get_slc_cache_line_size(dev_info); - - pco_shader *loadop = pvr_uscgen_loadop(device->pdevice->pco_ctx, load_op); - - VkResult result = pvr_gpu_upload_usc(device, - pco_shader_binary_data(loadop), - pco_shader_binary_size(loadop), - cache_line_size, - &load_op->usc_frag_prog_bo); - - if (result != VK_SUCCESS) { - ralloc_free(loadop); - return result; - } - - const bool msaa = load_op->clears_loads_state.unresolved_msaa_mask & - load_op->clears_loads_state.rt_load_mask; - - result = - pvr_pds_fragment_program_create_and_upload(device, - allocator, - loadop, - load_op->usc_frag_prog_bo, - &load_op->pds_frag_prog, - msaa); - - load_op->temps_count = pco_shader_data(loadop)->common.temps; - ralloc_free(loadop); - - if (result != VK_SUCCESS) - goto err_free_usc_frag_prog_bo; - - /* Manually hard coding `texture_kicks` to 1 since we'll pack everything into - * one buffer to be DMAed. See `pvr_load_op_data_create_and_upload()`, where - * we upload the buffer and upload the code section. - */ - result = pvr_pds_unitex_state_program_create_and_upload( - device, - allocator, - 1U, - 0U, - &load_op->pds_tex_state_prog); - if (result != VK_SUCCESS) - goto err_free_pds_frag_prog; - - return VK_SUCCESS; - -err_free_pds_frag_prog: - pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo); - -err_free_usc_frag_prog_bo: - pvr_bo_suballoc_free(load_op->usc_frag_prog_bo); - - return result; -} - /* TODO: pvr_subpass_load_op_init() and pvr_render_load_op_init() are quite * similar. See if we can dedup them? */ diff --git a/src/imagination/vulkan/pvr_pass.h b/src/imagination/vulkan/pvr_pass.h index 5a3a66f44c9..bcbe75b7a18 100644 --- a/src/imagination/vulkan/pvr_pass.h +++ b/src/imagination/vulkan/pvr_pass.h @@ -122,11 +122,4 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS) -VkResult pvr_pds_unitex_state_program_create_and_upload( - struct pvr_device *device, - const VkAllocationCallbacks *allocator, - uint32_t texture_kicks, - uint32_t uniform_kicks, - struct pvr_pds_upload *const pds_upload_out); - #endif /* PVR_PASS */