mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 01:38:06 +02:00
pvr: Refactor pvr_compute_pipeline and pvr_compute_pipeline_shader
This brings the two structs more into line with their graphics counterparts, and removes the extra member access previously required to access almost all members of pvr_compute_pipeline. Signed-off-by: Matt Coster <matt.coster@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20159>
This commit is contained in:
parent
327c906424
commit
293230faf1
5 changed files with 86 additions and 90 deletions
|
|
@ -2960,7 +2960,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
|
|||
struct pvr_csb *csb = &sub_cmd->control_stream;
|
||||
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
|
||||
const uint32_t const_shared_reg_count =
|
||||
pipeline->state.shader.const_shared_reg_count;
|
||||
pipeline->shader_state.const_shared_reg_count;
|
||||
struct pvr_compute_kernel_info info;
|
||||
|
||||
/* No shared regs, no need to use an allocation kernel. */
|
||||
|
|
@ -2986,9 +2986,9 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
|
|||
* allocation of the local/common store shared registers. Use the
|
||||
* pre-uploaded empty PDS program in this instance.
|
||||
*/
|
||||
if (pipeline->state.descriptor.pds_info.code_size_in_dwords) {
|
||||
if (pipeline->descriptor_state.pds_info.code_size_in_dwords) {
|
||||
uint32_t pds_data_size_in_dwords =
|
||||
pipeline->state.descriptor.pds_info.data_size_in_dwords;
|
||||
pipeline->descriptor_state.pds_info.data_size_in_dwords;
|
||||
|
||||
info.pds_data_offset = state->pds_compute_descriptor_data_offset;
|
||||
info.pds_data_size =
|
||||
|
|
@ -2996,8 +2996,8 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
|
|||
PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE));
|
||||
|
||||
/* Check that we have upload the code section. */
|
||||
assert(pipeline->state.descriptor.pds_code.code_size);
|
||||
info.pds_code_offset = pipeline->state.descriptor.pds_code.code_offset;
|
||||
assert(pipeline->descriptor_state.pds_code.code_size);
|
||||
info.pds_code_offset = pipeline->descriptor_state.pds_code.code_offset;
|
||||
} else {
|
||||
const struct pvr_pds_upload *program = &device->pds_compute_empty_program;
|
||||
|
||||
|
|
@ -3179,8 +3179,9 @@ static void pvr_compute_update_kernel(
|
|||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_csb *csb = &sub_cmd->control_stream;
|
||||
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
|
||||
const struct pvr_pds_info *program_info =
|
||||
&pipeline->state.primary_program_info;
|
||||
const struct pvr_compute_shader_state *shader_state =
|
||||
&pipeline->shader_state;
|
||||
const struct pvr_pds_info *program_info = &pipeline->primary_program_info;
|
||||
|
||||
struct pvr_compute_kernel_info info = {
|
||||
.indirect_buffer_addr = indirect_addr,
|
||||
|
|
@ -3192,13 +3193,13 @@ static void pvr_compute_update_kernel(
|
|||
.pds_data_size =
|
||||
DIV_ROUND_UP(program_info->data_size_in_dwords << 2U,
|
||||
PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
|
||||
.pds_data_offset = pipeline->state.primary_program.data_offset,
|
||||
.pds_code_offset = pipeline->state.primary_program.code_offset,
|
||||
.pds_data_offset = pipeline->primary_program.data_offset,
|
||||
.pds_code_offset = pipeline->primary_program.code_offset,
|
||||
|
||||
.sd_type = PVRX(CDMCTRL_SD_TYPE_NONE),
|
||||
|
||||
.usc_unified_size =
|
||||
DIV_ROUND_UP(pipeline->state.shader.input_register_count << 2U,
|
||||
DIV_ROUND_UP(shader_state->input_register_count << 2U,
|
||||
PVRX(CDMCTRL_KERNEL0_USC_UNIFIED_SIZE_UNIT_SIZE)),
|
||||
|
||||
/* clang-format off */
|
||||
|
|
@ -3210,7 +3211,7 @@ static void pvr_compute_update_kernel(
|
|||
/* clang-format on */
|
||||
};
|
||||
|
||||
uint32_t work_size = pipeline->state.shader.work_size;
|
||||
uint32_t work_size = shader_state->work_size;
|
||||
uint32_t coeff_regs;
|
||||
|
||||
if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
|
||||
|
|
@ -3218,7 +3219,7 @@ static void pvr_compute_update_kernel(
|
|||
*/
|
||||
coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs;
|
||||
} else {
|
||||
coeff_regs = pipeline->state.shader.coefficient_register_count;
|
||||
coeff_regs = shader_state->coefficient_register_count;
|
||||
}
|
||||
|
||||
info.usc_common_size =
|
||||
|
|
@ -3228,9 +3229,9 @@ static void pvr_compute_update_kernel(
|
|||
/* Use a whole slot per workgroup. */
|
||||
work_size = MAX2(work_size, ROGUE_MAX_INSTANCES_PER_TASK);
|
||||
|
||||
coeff_regs += pipeline->state.shader.const_shared_reg_count;
|
||||
coeff_regs += shader_state->const_shared_reg_count;
|
||||
|
||||
if (pipeline->state.shader.const_shared_reg_count > 0)
|
||||
if (shader_state->const_shared_reg_count > 0)
|
||||
info.sd_type = PVRX(CDMCTRL_SD_TYPE_USC);
|
||||
|
||||
work_size =
|
||||
|
|
@ -3270,8 +3271,8 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
|||
|
||||
sub_cmd = &state->current_sub_cmd->compute;
|
||||
|
||||
sub_cmd->uses_atomic_ops |= compute_pipeline->state.shader.uses_atomic_ops;
|
||||
sub_cmd->uses_barrier |= compute_pipeline->state.shader.uses_barrier;
|
||||
sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops;
|
||||
sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier;
|
||||
|
||||
if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
/* TODO: Add a dirty push constants mask in the cmd_buffer state and
|
||||
|
|
@ -3280,7 +3281,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
|||
pvr_finishme("Add support for push constants.");
|
||||
}
|
||||
|
||||
if (compute_pipeline->state.shader.uses_num_workgroups) {
|
||||
if (compute_pipeline->shader_state.uses_num_workgroups) {
|
||||
struct pvr_bo *num_workgroups_bo;
|
||||
|
||||
result = pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
|
|
@ -3293,7 +3294,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
|||
result = pvr_setup_descriptor_mappings(
|
||||
cmd_buffer,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->state.descriptor,
|
||||
&compute_pipeline->descriptor_state,
|
||||
&num_workgroups_bo->vma->dev_addr,
|
||||
&state->pds_compute_descriptor_data_offset);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -3305,7 +3306,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
|||
result = pvr_setup_descriptor_mappings(
|
||||
cmd_buffer,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->state.descriptor,
|
||||
&compute_pipeline->descriptor_state,
|
||||
NULL,
|
||||
&state->pds_compute_descriptor_data_offset);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -3343,8 +3344,8 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
|||
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_COMPUTE);
|
||||
|
||||
sub_cmd = &state->current_sub_cmd->compute;
|
||||
sub_cmd->uses_atomic_ops |= compute_pipeline->state.shader.uses_atomic_ops;
|
||||
sub_cmd->uses_barrier |= compute_pipeline->state.shader.uses_barrier;
|
||||
sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops;
|
||||
sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier;
|
||||
|
||||
if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
/* TODO: Add a dirty push constants mask in the cmd_buffer state and
|
||||
|
|
@ -3353,11 +3354,11 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
|||
pvr_finishme("Add support for push constants.");
|
||||
}
|
||||
|
||||
if (compute_pipeline->state.shader.uses_num_workgroups) {
|
||||
if (compute_pipeline->shader_state.uses_num_workgroups) {
|
||||
result = pvr_setup_descriptor_mappings(
|
||||
cmd_buffer,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->state.descriptor,
|
||||
&compute_pipeline->descriptor_state,
|
||||
&indirect_addr,
|
||||
&state->pds_compute_descriptor_data_offset);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -3369,7 +3370,7 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
|||
result = pvr_setup_descriptor_mappings(
|
||||
cmd_buffer,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->state.descriptor,
|
||||
&compute_pipeline->descriptor_state,
|
||||
NULL,
|
||||
&state->pds_compute_descriptor_data_offset);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ static const struct pvr_hard_coding_data {
|
|||
size_t shader_size;
|
||||
|
||||
/* Note that the bo field will be unused. */
|
||||
const struct pvr_compute_pipeline_shader_state shader_info;
|
||||
const struct pvr_compute_shader_state shader_info;
|
||||
|
||||
const struct pvr_hard_code_compute_build_info build_info;
|
||||
} compute;
|
||||
|
|
@ -185,7 +185,7 @@ pvr_get_hard_coding_data(const struct pvr_device_info *const dev_info)
|
|||
|
||||
VkResult pvr_hard_code_compute_pipeline(
|
||||
struct pvr_device *const device,
|
||||
struct pvr_compute_pipeline_shader_state *const shader_state_out,
|
||||
struct pvr_compute_shader_state *const shader_state_out,
|
||||
struct pvr_hard_code_compute_build_info *const build_info_out)
|
||||
{
|
||||
const uint32_t cache_line_size =
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
* This should eventually be deleted as the compiler becomes more capable.
|
||||
*/
|
||||
|
||||
struct pvr_compute_pipeline_shader_state;
|
||||
struct pvr_compute_shader_state;
|
||||
struct pvr_device;
|
||||
struct pvr_fragment_shader_state;
|
||||
struct pvr_hard_coding_data;
|
||||
|
|
@ -80,7 +80,7 @@ bool pvr_hard_code_shader_required(const struct pvr_device_info *const dev_info)
|
|||
|
||||
VkResult pvr_hard_code_compute_pipeline(
|
||||
struct pvr_device *const device,
|
||||
struct pvr_compute_pipeline_shader_state *const shader_state_out,
|
||||
struct pvr_compute_shader_state *const shader_state_out,
|
||||
struct pvr_hard_code_compute_build_info *const build_info_out);
|
||||
|
||||
/* Returns a mask of MESA_SHADER_* (gl_shader_stage) indicating which stage
|
||||
|
|
|
|||
|
|
@ -1065,7 +1065,7 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
struct pvr_hard_code_compute_build_info build_info;
|
||||
|
||||
result = pvr_hard_code_compute_pipeline(device,
|
||||
&compute_pipeline->state.shader,
|
||||
&compute_pipeline->shader_state,
|
||||
&build_info);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
@ -1110,7 +1110,7 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
&explicit_const_usage,
|
||||
compute_pipeline->base.layout,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->state.descriptor);
|
||||
&compute_pipeline->descriptor_state);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_shader;
|
||||
|
||||
|
|
@ -1121,21 +1121,21 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
work_group_input_regs,
|
||||
barrier_coefficient,
|
||||
usc_temps,
|
||||
compute_pipeline->state.shader.bo->vma->dev_addr,
|
||||
&compute_pipeline->state.primary_program,
|
||||
&compute_pipeline->state.primary_program_info);
|
||||
compute_pipeline->shader_state.bo->vma->dev_addr,
|
||||
&compute_pipeline->primary_program,
|
||||
&compute_pipeline->primary_program_info);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_descriptor_program;
|
||||
|
||||
/* If the workgroup ID is required, then we require the base workgroup
|
||||
* variant of the PDS compute program as well.
|
||||
*/
|
||||
compute_pipeline->state.flags.base_workgroup =
|
||||
compute_pipeline->flags.base_workgroup =
|
||||
work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
|
||||
work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
|
||||
work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
|
||||
|
||||
if (compute_pipeline->state.flags.base_workgroup) {
|
||||
if (compute_pipeline->flags.base_workgroup) {
|
||||
result = pvr_pds_compute_base_workgroup_variant_program_init(
|
||||
device,
|
||||
allocator,
|
||||
|
|
@ -1143,8 +1143,8 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
work_group_input_regs,
|
||||
barrier_coefficient,
|
||||
usc_temps,
|
||||
compute_pipeline->state.shader.bo->vma->dev_addr,
|
||||
&compute_pipeline->state.primary_base_workgroup_variant_program);
|
||||
compute_pipeline->shader_state.bo->vma->dev_addr,
|
||||
&compute_pipeline->primary_base_workgroup_variant_program);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_destroy_compute_program;
|
||||
}
|
||||
|
|
@ -1152,17 +1152,16 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
return VK_SUCCESS;
|
||||
|
||||
err_destroy_compute_program:
|
||||
pvr_pds_compute_program_destroy(
|
||||
device,
|
||||
allocator,
|
||||
&compute_pipeline->state.primary_program,
|
||||
&compute_pipeline->state.primary_program_info);
|
||||
pvr_pds_compute_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->primary_program,
|
||||
&compute_pipeline->primary_program_info);
|
||||
|
||||
err_free_descriptor_program:
|
||||
pvr_bo_free(device, compute_pipeline->state.descriptor.pds_code.pvr_bo);
|
||||
pvr_bo_free(device, compute_pipeline->descriptor_state.pds_code.pvr_bo);
|
||||
|
||||
err_free_shader:
|
||||
pvr_bo_free(device, compute_pipeline->state.shader.bo);
|
||||
pvr_bo_free(device, compute_pipeline->shader_state.bo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -1235,22 +1234,21 @@ static void pvr_compute_pipeline_destroy(
|
|||
const VkAllocationCallbacks *const allocator,
|
||||
struct pvr_compute_pipeline *const compute_pipeline)
|
||||
{
|
||||
if (compute_pipeline->state.flags.base_workgroup) {
|
||||
if (compute_pipeline->flags.base_workgroup) {
|
||||
pvr_pds_compute_base_workgroup_variant_program_finish(
|
||||
device,
|
||||
allocator,
|
||||
&compute_pipeline->state.primary_base_workgroup_variant_program);
|
||||
&compute_pipeline->primary_base_workgroup_variant_program);
|
||||
}
|
||||
|
||||
pvr_pds_compute_program_destroy(
|
||||
device,
|
||||
allocator,
|
||||
&compute_pipeline->state.primary_program,
|
||||
&compute_pipeline->state.primary_program_info);
|
||||
pvr_pds_compute_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->primary_program,
|
||||
&compute_pipeline->primary_program_info);
|
||||
pvr_pds_descriptor_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->state.descriptor);
|
||||
pvr_bo_free(device, compute_pipeline->state.shader.bo);
|
||||
&compute_pipeline->descriptor_state);
|
||||
pvr_bo_free(device, compute_pipeline->shader_state.bo);
|
||||
|
||||
pvr_pipeline_finish(&compute_pipeline->base);
|
||||
|
||||
|
|
|
|||
|
|
@ -1162,6 +1162,21 @@ struct pvr_pipeline_stage_state {
|
|||
bool empty_program;
|
||||
};
|
||||
|
||||
struct pvr_compute_shader_state {
|
||||
/* Pointer to a buffer object that contains the shader binary. */
|
||||
struct pvr_bo *bo;
|
||||
|
||||
bool uses_atomic_ops;
|
||||
bool uses_barrier;
|
||||
/* E.g. GLSL shader uses gl_NumWorkGroups. */
|
||||
bool uses_num_workgroups;
|
||||
|
||||
uint32_t const_shared_reg_count;
|
||||
uint32_t input_register_count;
|
||||
uint32_t work_size;
|
||||
uint32_t coefficient_register_count;
|
||||
};
|
||||
|
||||
struct pvr_vertex_shader_state {
|
||||
/* Pointer to a buffer object that contains the shader binary. */
|
||||
struct pvr_bo *bo;
|
||||
|
|
@ -1207,46 +1222,28 @@ struct pvr_pipeline {
|
|||
struct pvr_compute_pipeline {
|
||||
struct pvr_pipeline base;
|
||||
|
||||
struct pvr_compute_shader_state shader_state;
|
||||
|
||||
struct {
|
||||
/* TODO: Change this to be an anonymous struct once the shader hardcoding
|
||||
* is removed.
|
||||
uint32_t base_workgroup : 1;
|
||||
} flags;
|
||||
|
||||
struct pvr_stage_allocation_descriptor_state descriptor_state;
|
||||
|
||||
struct pvr_pds_upload primary_program;
|
||||
struct pvr_pds_info primary_program_info;
|
||||
|
||||
struct pvr_pds_base_workgroup_program {
|
||||
struct pvr_pds_upload code_upload;
|
||||
|
||||
uint32_t *data_section;
|
||||
/* Offset within the PDS data section at which the base workgroup id
|
||||
* resides.
|
||||
*/
|
||||
struct pvr_compute_pipeline_shader_state {
|
||||
/* Pointer to a buffer object that contains the shader binary. */
|
||||
struct pvr_bo *bo;
|
||||
uint32_t base_workgroup_data_patching_offset;
|
||||
|
||||
bool uses_atomic_ops;
|
||||
bool uses_barrier;
|
||||
/* E.g. GLSL shader uses gl_NumWorkGroups. */
|
||||
bool uses_num_workgroups;
|
||||
|
||||
uint32_t const_shared_reg_count;
|
||||
uint32_t input_register_count;
|
||||
uint32_t work_size;
|
||||
uint32_t coefficient_register_count;
|
||||
} shader;
|
||||
|
||||
struct {
|
||||
uint32_t base_workgroup : 1;
|
||||
} flags;
|
||||
|
||||
struct pvr_stage_allocation_descriptor_state descriptor;
|
||||
|
||||
struct pvr_pds_upload primary_program;
|
||||
struct pvr_pds_info primary_program_info;
|
||||
|
||||
struct pvr_pds_base_workgroup_program {
|
||||
struct pvr_pds_upload code_upload;
|
||||
|
||||
uint32_t *data_section;
|
||||
/* Offset within the PDS data section at which the base workgroup id
|
||||
* resides.
|
||||
*/
|
||||
uint32_t base_workgroup_data_patching_offset;
|
||||
|
||||
struct pvr_pds_info info;
|
||||
} primary_base_workgroup_variant_program;
|
||||
} state;
|
||||
struct pvr_pds_info info;
|
||||
} primary_base_workgroup_variant_program;
|
||||
};
|
||||
|
||||
struct pvr_graphics_pipeline {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue