pvr: move load_op_shader_generate to pvr_mrt

Signed-off-by: Ella Stanforth <ella@igalia.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38744>
This commit is contained in:
Ella Stanforth 2025-10-13 16:13:39 +01:00 committed by Marge Bot
parent b076f8170e
commit 0bf47f0435
5 changed files with 191 additions and 184 deletions

View file

@ -36,6 +36,7 @@
#include "pvr_device.h"
#include "pvr_job_context.h"
#include "pvr_macros.h"
#include "pvr_mrt.h"
#include "pvr_pass.h"
#include "pvr_pds.h"
#include "pvr_physical_device.h"

View file

@ -8,6 +8,7 @@
#include "vk_log.h"
#include "pvr_csb.h"
#include "pvr_device.h"
#include "pvr_formats.h"
#include "pvr_physical_device.h"
@ -235,3 +236,180 @@ pvr_destroy_mrt_setup(const struct pvr_device *device,
vk_free(&device->vk.alloc, setup->mrt_resources);
}
VkResult pvr_pds_unitex_state_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
uint32_t texture_kicks,
uint32_t uniform_kicks,
struct pvr_pds_upload *const pds_upload_out)
{
struct pvr_pds_pixel_shader_sa_program program = {
.num_texture_dma_kicks = texture_kicks,
.num_uniform_dma_kicks = uniform_kicks,
};
uint32_t staging_buffer_size;
uint32_t *staging_buffer;
VkResult result;
pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
staging_buffer_size = PVR_DW_TO_BYTES(program.code_size);
staging_buffer = vk_alloc2(&device->vk.alloc,
allocator,
staging_buffer_size,
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!staging_buffer)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
/* FIXME: Figure out the define for alignment of 16. */
result = pvr_gpu_upload_pds(device,
NULL,
0U,
0U,
staging_buffer,
program.code_size,
16U,
16U,
pds_upload_out);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return result;
}
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return VK_SUCCESS;
}
static VkResult pvr_pds_fragment_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
pco_shader *fs,
struct pvr_suballoc_bo *shader_bo,
struct pvr_pds_upload *pds_frag_prog,
bool msaa)
{
struct pvr_pds_kickusc_program program = { 0 };
pco_data *fs_data = pco_shader_data(fs);
uint32_t staging_buffer_size;
uint32_t *staging_buffer;
VkResult result;
const pvr_dev_addr_t exec_addr =
PVR_DEV_ADDR_OFFSET(shader_bo->dev_addr, fs_data->common.entry_offset);
/* Note this is not strictly required to be done before calculating the
* staging_buffer_size in this particular case. It can also be done after
* allocating the buffer. The size from pvr_pds_kick_usc() is constant.
*/
pvr_pds_setup_doutu(&program.usc_task_control,
exec_addr.addr,
fs_data->common.temps,
msaa ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL
: ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
fs_data->fs.uses.phase_change);
pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES);
staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
staging_buffer = vk_alloc2(&device->vk.alloc,
allocator,
staging_buffer_size,
8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!staging_buffer)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pvr_pds_kick_usc(&program,
staging_buffer,
0,
false,
PDS_GENERATE_CODEDATA_SEGMENTS);
/* FIXME: Figure out the define for alignment of 16. */
result = pvr_gpu_upload_pds(device,
&staging_buffer[0],
program.data_size,
16,
&staging_buffer[program.data_size],
program.code_size,
16,
16,
pds_frag_prog);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return result;
}
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return VK_SUCCESS;
}
VkResult
pvr_load_op_shader_generate(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_load_op *load_op)
{
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
const uint32_t cache_line_size = pvr_get_slc_cache_line_size(dev_info);
pco_shader *loadop = pvr_uscgen_loadop(device->pdevice->pco_ctx, load_op);
VkResult result = pvr_gpu_upload_usc(device,
pco_shader_binary_data(loadop),
pco_shader_binary_size(loadop),
cache_line_size,
&load_op->usc_frag_prog_bo);
if (result != VK_SUCCESS) {
ralloc_free(loadop);
return result;
}
const bool msaa = load_op->clears_loads_state.unresolved_msaa_mask &
load_op->clears_loads_state.rt_load_mask;
result =
pvr_pds_fragment_program_create_and_upload(device,
allocator,
loadop,
load_op->usc_frag_prog_bo,
&load_op->pds_frag_prog,
msaa);
load_op->temps_count = pco_shader_data(loadop)->common.temps;
ralloc_free(loadop);
if (result != VK_SUCCESS)
goto err_free_usc_frag_prog_bo;
/* Manually hard coding `texture_kicks` to 1 since we'll pack everything into
* one buffer to be DMAed. See `pvr_load_op_data_create_and_upload()`, where
* we upload the buffer and upload the code section.
*/
result = pvr_pds_unitex_state_program_create_and_upload(
device,
allocator,
1U,
0U,
&load_op->pds_tex_state_prog);
if (result != VK_SUCCESS)
goto err_free_pds_frag_prog;
return VK_SUCCESS;
err_free_pds_frag_prog:
pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
err_free_usc_frag_prog_bo:
pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
return result;
}

View file

@ -168,6 +168,18 @@ CHECK_MASK_SIZE(pvr_load_op,
#undef CHECK_MASK_SIZE
VkResult pvr_pds_unitex_state_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
uint32_t texture_kicks,
uint32_t uniform_kicks,
struct pvr_pds_upload *const pds_upload_out);
VkResult pvr_load_op_shader_generate(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_load_op *load_op);
struct pvr_load_op_state {
uint32_t load_op_count;

View file

@ -137,183 +137,6 @@ static inline bool pvr_has_output_register_writes(
return false;
}
VkResult pvr_pds_unitex_state_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
uint32_t texture_kicks,
uint32_t uniform_kicks,
struct pvr_pds_upload *const pds_upload_out)
{
struct pvr_pds_pixel_shader_sa_program program = {
.num_texture_dma_kicks = texture_kicks,
.num_uniform_dma_kicks = uniform_kicks,
};
uint32_t staging_buffer_size;
uint32_t *staging_buffer;
VkResult result;
pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
staging_buffer_size = PVR_DW_TO_BYTES(program.code_size);
staging_buffer = vk_alloc2(&device->vk.alloc,
allocator,
staging_buffer_size,
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!staging_buffer)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
/* FIXME: Figure out the define for alignment of 16. */
result = pvr_gpu_upload_pds(device,
NULL,
0U,
0U,
staging_buffer,
program.code_size,
16U,
16U,
pds_upload_out);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return result;
}
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return VK_SUCCESS;
}
static VkResult pvr_pds_fragment_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
pco_shader *fs,
struct pvr_suballoc_bo *shader_bo,
struct pvr_pds_upload *pds_frag_prog,
bool msaa)
{
struct pvr_pds_kickusc_program program = { 0 };
pco_data *fs_data = pco_shader_data(fs);
uint32_t staging_buffer_size;
uint32_t *staging_buffer;
VkResult result;
const pvr_dev_addr_t exec_addr =
PVR_DEV_ADDR_OFFSET(shader_bo->dev_addr, fs_data->common.entry_offset);
/* Note this is not strictly required to be done before calculating the
* staging_buffer_size in this particular case. It can also be done after
* allocating the buffer. The size from pvr_pds_kick_usc() is constant.
*/
pvr_pds_setup_doutu(&program.usc_task_control,
exec_addr.addr,
fs_data->common.temps,
msaa ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL
: ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
fs_data->fs.uses.phase_change);
pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES);
staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
staging_buffer = vk_alloc2(&device->vk.alloc,
allocator,
staging_buffer_size,
8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!staging_buffer)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pvr_pds_kick_usc(&program,
staging_buffer,
0,
false,
PDS_GENERATE_CODEDATA_SEGMENTS);
/* FIXME: Figure out the define for alignment of 16. */
result = pvr_gpu_upload_pds(device,
&staging_buffer[0],
program.data_size,
16,
&staging_buffer[program.data_size],
program.code_size,
16,
16,
pds_frag_prog);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return result;
}
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return VK_SUCCESS;
}
static VkResult
pvr_load_op_shader_generate(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_load_op *load_op)
{
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
const uint32_t cache_line_size = pvr_get_slc_cache_line_size(dev_info);
pco_shader *loadop = pvr_uscgen_loadop(device->pdevice->pco_ctx, load_op);
VkResult result = pvr_gpu_upload_usc(device,
pco_shader_binary_data(loadop),
pco_shader_binary_size(loadop),
cache_line_size,
&load_op->usc_frag_prog_bo);
if (result != VK_SUCCESS) {
ralloc_free(loadop);
return result;
}
const bool msaa = load_op->clears_loads_state.unresolved_msaa_mask &
load_op->clears_loads_state.rt_load_mask;
result =
pvr_pds_fragment_program_create_and_upload(device,
allocator,
loadop,
load_op->usc_frag_prog_bo,
&load_op->pds_frag_prog,
msaa);
load_op->temps_count = pco_shader_data(loadop)->common.temps;
ralloc_free(loadop);
if (result != VK_SUCCESS)
goto err_free_usc_frag_prog_bo;
/* Manually hard coding `texture_kicks` to 1 since we'll pack everything into
* one buffer to be DMAed. See `pvr_load_op_data_create_and_upload()`, where
* we upload the buffer and upload the code section.
*/
result = pvr_pds_unitex_state_program_create_and_upload(
device,
allocator,
1U,
0U,
&load_op->pds_tex_state_prog);
if (result != VK_SUCCESS)
goto err_free_pds_frag_prog;
return VK_SUCCESS;
err_free_pds_frag_prog:
pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
err_free_usc_frag_prog_bo:
pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
return result;
}
/* TODO: pvr_subpass_load_op_init() and pvr_render_load_op_init() are quite
* similar. See if we can dedup them?
*/

View file

@ -122,11 +122,4 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
VkRenderPass,
VK_OBJECT_TYPE_RENDER_PASS)
VkResult pvr_pds_unitex_state_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
uint32_t texture_kicks,
uint32_t uniform_kicks,
struct pvr_pds_upload *const pds_upload_out);
#endif /* PVR_PASS */