pvr: Refactor pvr_compute_pipeline and pvr_compute_pipeline_shader

This brings the two structs more into line with their graphics
counterparts, and removes the extra member access previously required
to access almost all members of pvr_compute_pipeline.

Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20159>
This commit is contained in:
Matt Coster 2022-11-18 15:57:07 +00:00 committed by Marge Bot
parent 327c906424
commit 293230faf1
5 changed files with 86 additions and 90 deletions

View file

@ -2960,7 +2960,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_csb *csb = &sub_cmd->control_stream;
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
const uint32_t const_shared_reg_count =
pipeline->state.shader.const_shared_reg_count;
pipeline->shader_state.const_shared_reg_count;
struct pvr_compute_kernel_info info;
/* No shared regs, no need to use an allocation kernel. */
@ -2986,9 +2986,9 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
* allocation of the local/common store shared registers. Use the
* pre-uploaded empty PDS program in this instance.
*/
if (pipeline->state.descriptor.pds_info.code_size_in_dwords) {
if (pipeline->descriptor_state.pds_info.code_size_in_dwords) {
uint32_t pds_data_size_in_dwords =
pipeline->state.descriptor.pds_info.data_size_in_dwords;
pipeline->descriptor_state.pds_info.data_size_in_dwords;
info.pds_data_offset = state->pds_compute_descriptor_data_offset;
info.pds_data_size =
@ -2996,8 +2996,8 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE));
/* Check that we have upload the code section. */
assert(pipeline->state.descriptor.pds_code.code_size);
info.pds_code_offset = pipeline->state.descriptor.pds_code.code_offset;
assert(pipeline->descriptor_state.pds_code.code_size);
info.pds_code_offset = pipeline->descriptor_state.pds_code.code_offset;
} else {
const struct pvr_pds_upload *program = &device->pds_compute_empty_program;
@ -3179,8 +3179,9 @@ static void pvr_compute_update_kernel(
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
struct pvr_csb *csb = &sub_cmd->control_stream;
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
const struct pvr_pds_info *program_info =
&pipeline->state.primary_program_info;
const struct pvr_compute_shader_state *shader_state =
&pipeline->shader_state;
const struct pvr_pds_info *program_info = &pipeline->primary_program_info;
struct pvr_compute_kernel_info info = {
.indirect_buffer_addr = indirect_addr,
@ -3192,13 +3193,13 @@ static void pvr_compute_update_kernel(
.pds_data_size =
DIV_ROUND_UP(program_info->data_size_in_dwords << 2U,
PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
.pds_data_offset = pipeline->state.primary_program.data_offset,
.pds_code_offset = pipeline->state.primary_program.code_offset,
.pds_data_offset = pipeline->primary_program.data_offset,
.pds_code_offset = pipeline->primary_program.code_offset,
.sd_type = PVRX(CDMCTRL_SD_TYPE_NONE),
.usc_unified_size =
DIV_ROUND_UP(pipeline->state.shader.input_register_count << 2U,
DIV_ROUND_UP(shader_state->input_register_count << 2U,
PVRX(CDMCTRL_KERNEL0_USC_UNIFIED_SIZE_UNIT_SIZE)),
/* clang-format off */
@ -3210,7 +3211,7 @@ static void pvr_compute_update_kernel(
/* clang-format on */
};
uint32_t work_size = pipeline->state.shader.work_size;
uint32_t work_size = shader_state->work_size;
uint32_t coeff_regs;
if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
@ -3218,7 +3219,7 @@ static void pvr_compute_update_kernel(
*/
coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs;
} else {
coeff_regs = pipeline->state.shader.coefficient_register_count;
coeff_regs = shader_state->coefficient_register_count;
}
info.usc_common_size =
@ -3228,9 +3229,9 @@ static void pvr_compute_update_kernel(
/* Use a whole slot per workgroup. */
work_size = MAX2(work_size, ROGUE_MAX_INSTANCES_PER_TASK);
coeff_regs += pipeline->state.shader.const_shared_reg_count;
coeff_regs += shader_state->const_shared_reg_count;
if (pipeline->state.shader.const_shared_reg_count > 0)
if (shader_state->const_shared_reg_count > 0)
info.sd_type = PVRX(CDMCTRL_SD_TYPE_USC);
work_size =
@ -3270,8 +3271,8 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
sub_cmd = &state->current_sub_cmd->compute;
sub_cmd->uses_atomic_ops |= compute_pipeline->state.shader.uses_atomic_ops;
sub_cmd->uses_barrier |= compute_pipeline->state.shader.uses_barrier;
sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops;
sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier;
if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
/* TODO: Add a dirty push constants mask in the cmd_buffer state and
@ -3280,7 +3281,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
pvr_finishme("Add support for push constants.");
}
if (compute_pipeline->state.shader.uses_num_workgroups) {
if (compute_pipeline->shader_state.uses_num_workgroups) {
struct pvr_bo *num_workgroups_bo;
result = pvr_cmd_buffer_upload_general(cmd_buffer,
@ -3293,7 +3294,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
result = pvr_setup_descriptor_mappings(
cmd_buffer,
PVR_STAGE_ALLOCATION_COMPUTE,
&compute_pipeline->state.descriptor,
&compute_pipeline->descriptor_state,
&num_workgroups_bo->vma->dev_addr,
&state->pds_compute_descriptor_data_offset);
if (result != VK_SUCCESS)
@ -3305,7 +3306,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
result = pvr_setup_descriptor_mappings(
cmd_buffer,
PVR_STAGE_ALLOCATION_COMPUTE,
&compute_pipeline->state.descriptor,
&compute_pipeline->descriptor_state,
NULL,
&state->pds_compute_descriptor_data_offset);
if (result != VK_SUCCESS)
@ -3343,8 +3344,8 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_COMPUTE);
sub_cmd = &state->current_sub_cmd->compute;
sub_cmd->uses_atomic_ops |= compute_pipeline->state.shader.uses_atomic_ops;
sub_cmd->uses_barrier |= compute_pipeline->state.shader.uses_barrier;
sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops;
sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier;
if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
/* TODO: Add a dirty push constants mask in the cmd_buffer state and
@ -3353,11 +3354,11 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
pvr_finishme("Add support for push constants.");
}
if (compute_pipeline->state.shader.uses_num_workgroups) {
if (compute_pipeline->shader_state.uses_num_workgroups) {
result = pvr_setup_descriptor_mappings(
cmd_buffer,
PVR_STAGE_ALLOCATION_COMPUTE,
&compute_pipeline->state.descriptor,
&compute_pipeline->descriptor_state,
&indirect_addr,
&state->pds_compute_descriptor_data_offset);
if (result != VK_SUCCESS)
@ -3369,7 +3370,7 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
result = pvr_setup_descriptor_mappings(
cmd_buffer,
PVR_STAGE_ALLOCATION_COMPUTE,
&compute_pipeline->state.descriptor,
&compute_pipeline->descriptor_state,
NULL,
&state->pds_compute_descriptor_data_offset);
if (result != VK_SUCCESS)

View file

@ -76,7 +76,7 @@ static const struct pvr_hard_coding_data {
size_t shader_size;
/* Note that the bo field will be unused. */
const struct pvr_compute_pipeline_shader_state shader_info;
const struct pvr_compute_shader_state shader_info;
const struct pvr_hard_code_compute_build_info build_info;
} compute;
@ -185,7 +185,7 @@ pvr_get_hard_coding_data(const struct pvr_device_info *const dev_info)
VkResult pvr_hard_code_compute_pipeline(
struct pvr_device *const device,
struct pvr_compute_pipeline_shader_state *const shader_state_out,
struct pvr_compute_shader_state *const shader_state_out,
struct pvr_hard_code_compute_build_info *const build_info_out)
{
const uint32_t cache_line_size =

View file

@ -38,7 +38,7 @@
* This should eventually be deleted as the compiler becomes more capable.
*/
struct pvr_compute_pipeline_shader_state;
struct pvr_compute_shader_state;
struct pvr_device;
struct pvr_fragment_shader_state;
struct pvr_hard_coding_data;
@ -80,7 +80,7 @@ bool pvr_hard_code_shader_required(const struct pvr_device_info *const dev_info)
VkResult pvr_hard_code_compute_pipeline(
struct pvr_device *const device,
struct pvr_compute_pipeline_shader_state *const shader_state_out,
struct pvr_compute_shader_state *const shader_state_out,
struct pvr_hard_code_compute_build_info *const build_info_out);
/* Returns a mask of MESA_SHADER_* (gl_shader_stage) indicating which stage

View file

@ -1065,7 +1065,7 @@ static VkResult pvr_compute_pipeline_compile(
struct pvr_hard_code_compute_build_info build_info;
result = pvr_hard_code_compute_pipeline(device,
&compute_pipeline->state.shader,
&compute_pipeline->shader_state,
&build_info);
if (result != VK_SUCCESS)
return result;
@ -1110,7 +1110,7 @@ static VkResult pvr_compute_pipeline_compile(
&explicit_const_usage,
compute_pipeline->base.layout,
PVR_STAGE_ALLOCATION_COMPUTE,
&compute_pipeline->state.descriptor);
&compute_pipeline->descriptor_state);
if (result != VK_SUCCESS)
goto err_free_shader;
@ -1121,21 +1121,21 @@ static VkResult pvr_compute_pipeline_compile(
work_group_input_regs,
barrier_coefficient,
usc_temps,
compute_pipeline->state.shader.bo->vma->dev_addr,
&compute_pipeline->state.primary_program,
&compute_pipeline->state.primary_program_info);
compute_pipeline->shader_state.bo->vma->dev_addr,
&compute_pipeline->primary_program,
&compute_pipeline->primary_program_info);
if (result != VK_SUCCESS)
goto err_free_descriptor_program;
/* If the workgroup ID is required, then we require the base workgroup
* variant of the PDS compute program as well.
*/
compute_pipeline->state.flags.base_workgroup =
compute_pipeline->flags.base_workgroup =
work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
if (compute_pipeline->state.flags.base_workgroup) {
if (compute_pipeline->flags.base_workgroup) {
result = pvr_pds_compute_base_workgroup_variant_program_init(
device,
allocator,
@ -1143,8 +1143,8 @@ static VkResult pvr_compute_pipeline_compile(
work_group_input_regs,
barrier_coefficient,
usc_temps,
compute_pipeline->state.shader.bo->vma->dev_addr,
&compute_pipeline->state.primary_base_workgroup_variant_program);
compute_pipeline->shader_state.bo->vma->dev_addr,
&compute_pipeline->primary_base_workgroup_variant_program);
if (result != VK_SUCCESS)
goto err_destroy_compute_program;
}
@ -1152,17 +1152,16 @@ static VkResult pvr_compute_pipeline_compile(
return VK_SUCCESS;
err_destroy_compute_program:
pvr_pds_compute_program_destroy(
device,
allocator,
&compute_pipeline->state.primary_program,
&compute_pipeline->state.primary_program_info);
pvr_pds_compute_program_destroy(device,
allocator,
&compute_pipeline->primary_program,
&compute_pipeline->primary_program_info);
err_free_descriptor_program:
pvr_bo_free(device, compute_pipeline->state.descriptor.pds_code.pvr_bo);
pvr_bo_free(device, compute_pipeline->descriptor_state.pds_code.pvr_bo);
err_free_shader:
pvr_bo_free(device, compute_pipeline->state.shader.bo);
pvr_bo_free(device, compute_pipeline->shader_state.bo);
return result;
}
@ -1235,22 +1234,21 @@ static void pvr_compute_pipeline_destroy(
const VkAllocationCallbacks *const allocator,
struct pvr_compute_pipeline *const compute_pipeline)
{
if (compute_pipeline->state.flags.base_workgroup) {
if (compute_pipeline->flags.base_workgroup) {
pvr_pds_compute_base_workgroup_variant_program_finish(
device,
allocator,
&compute_pipeline->state.primary_base_workgroup_variant_program);
&compute_pipeline->primary_base_workgroup_variant_program);
}
pvr_pds_compute_program_destroy(
device,
allocator,
&compute_pipeline->state.primary_program,
&compute_pipeline->state.primary_program_info);
pvr_pds_compute_program_destroy(device,
allocator,
&compute_pipeline->primary_program,
&compute_pipeline->primary_program_info);
pvr_pds_descriptor_program_destroy(device,
allocator,
&compute_pipeline->state.descriptor);
pvr_bo_free(device, compute_pipeline->state.shader.bo);
&compute_pipeline->descriptor_state);
pvr_bo_free(device, compute_pipeline->shader_state.bo);
pvr_pipeline_finish(&compute_pipeline->base);

View file

@ -1162,6 +1162,21 @@ struct pvr_pipeline_stage_state {
bool empty_program;
};
struct pvr_compute_shader_state {
/* Pointer to a buffer object that contains the shader binary. */
struct pvr_bo *bo;
bool uses_atomic_ops;
bool uses_barrier;
/* E.g. GLSL shader uses gl_NumWorkGroups. */
bool uses_num_workgroups;
uint32_t const_shared_reg_count;
uint32_t input_register_count;
uint32_t work_size;
uint32_t coefficient_register_count;
};
struct pvr_vertex_shader_state {
/* Pointer to a buffer object that contains the shader binary. */
struct pvr_bo *bo;
@ -1207,46 +1222,28 @@ struct pvr_pipeline {
struct pvr_compute_pipeline {
struct pvr_pipeline base;
struct pvr_compute_shader_state shader_state;
struct {
/* TODO: Change this to be an anonymous struct once the shader hardcoding
* is removed.
uint32_t base_workgroup : 1;
} flags;
struct pvr_stage_allocation_descriptor_state descriptor_state;
struct pvr_pds_upload primary_program;
struct pvr_pds_info primary_program_info;
struct pvr_pds_base_workgroup_program {
struct pvr_pds_upload code_upload;
uint32_t *data_section;
/* Offset within the PDS data section at which the base workgroup id
* resides.
*/
struct pvr_compute_pipeline_shader_state {
/* Pointer to a buffer object that contains the shader binary. */
struct pvr_bo *bo;
uint32_t base_workgroup_data_patching_offset;
bool uses_atomic_ops;
bool uses_barrier;
/* E.g. GLSL shader uses gl_NumWorkGroups. */
bool uses_num_workgroups;
uint32_t const_shared_reg_count;
uint32_t input_register_count;
uint32_t work_size;
uint32_t coefficient_register_count;
} shader;
struct {
uint32_t base_workgroup : 1;
} flags;
struct pvr_stage_allocation_descriptor_state descriptor;
struct pvr_pds_upload primary_program;
struct pvr_pds_info primary_program_info;
struct pvr_pds_base_workgroup_program {
struct pvr_pds_upload code_upload;
uint32_t *data_section;
/* Offset within the PDS data section at which the base workgroup id
* resides.
*/
uint32_t base_workgroup_data_patching_offset;
struct pvr_pds_info info;
} primary_base_workgroup_variant_program;
} state;
struct pvr_pds_info info;
} primary_base_workgroup_variant_program;
};
struct pvr_graphics_pipeline {