From 6d4b376a9b2bb23bbd3855db56f7a06b2284dedb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 30 Jan 2024 19:37:44 +0100 Subject: [PATCH] panvk: Lower sysvals to push uniforms Signed-off-by: Boris Brezillon Reviewed-by: Mary Guillemard Part-of: --- src/panfrost/vulkan/panvk_cmd_buffer.h | 1 - src/panfrost/vulkan/panvk_pipeline.h | 4 -- src/panfrost/vulkan/panvk_shader.h | 6 +-- src/panfrost/vulkan/panvk_vX_cmd_buffer.c | 44 ++++++------------- src/panfrost/vulkan/panvk_vX_pipeline.c | 14 +----- .../vulkan/panvk_vX_pipeline_layout.c | 14 +++--- src/panfrost/vulkan/panvk_vX_shader.c | 31 +++++++------ 7 files changed, 35 insertions(+), 79 deletions(-) diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.h b/src/panfrost/vulkan/panvk_cmd_buffer.h index 0f028a92561..9b622cb97d7 100644 --- a/src/panfrost/vulkan/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/panvk_cmd_buffer.h @@ -97,7 +97,6 @@ struct panvk_descriptor_state { struct mali_uniform_buffer_packed ubos[MAX_DYNAMIC_UNIFORM_BUFFERS]; struct panvk_ssbo_addr ssbos[MAX_DYNAMIC_STORAGE_BUFFERS]; } dyn; - mali_ptr sysvals_ptr; mali_ptr ubos; mali_ptr textures; mali_ptr samplers; diff --git a/src/panfrost/vulkan/panvk_pipeline.h b/src/panfrost/vulkan/panvk_pipeline.h index 781475d0434..3285a277ad6 100644 --- a/src/panfrost/vulkan/panvk_pipeline.h +++ b/src/panfrost/vulkan/panvk_pipeline.h @@ -70,10 +70,6 @@ struct panvk_pipeline { /* shader stage bit is set of the stage accesses storage images */ uint32_t img_access_mask; - struct { - unsigned ubo_idx; - } sysvals[MESA_SHADER_STAGES]; - unsigned tls_size; unsigned wls_size; diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index d5c40da6bee..3f2c6453773 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -20,9 +20,6 @@ #include "panvk_macros.h" #include "panvk_pipeline_layout.h" -#define PANVK_SYSVAL_UBO_INDEX 0 -#define PANVK_NUM_BUILTIN_UBOS 1 - struct nir_shader; struct pan_blend_state; struct panvk_device; @@ -56,7 +53,6 @@ struct panvk_sysvals { struct panvk_shader { struct pan_shader_info info; struct util_dynarray binary; - unsigned sysval_ubo; struct pan_compute_dim local_size; bool has_img_access; }; @@ -68,7 +64,7 @@ bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev, struct panvk_shader *panvk_per_arch(shader_create)( struct panvk_device *dev, gl_shader_stage stage, const VkPipelineShaderStageCreateInfo *stage_info, - const struct panvk_pipeline_layout *layout, unsigned sysval_ubo, + const struct panvk_pipeline_layout *layout, struct pan_blend_state *blend_state, bool static_blend_constants, const VkAllocationCallbacks *alloc); diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c index 2de9b2a0f2c..f26b601505f 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -307,13 +307,13 @@ panvk_cmd_prepare_draw_sysvals( sysvals->first_vertex = draw->offset_start; sysvals->base_vertex = base_vertex; sysvals->base_instance = draw->first_instance; - bind_point_state->desc_state.sysvals_ptr = 0; + bind_point_state->desc_state.push_uniforms = 0; } if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) { memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants, sizeof(cmdbuf->state.blend.constants)); - bind_point_state->desc_state.sysvals_ptr = 0; + bind_point_state->desc_state.push_uniforms = 0; } if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) { @@ -321,42 +321,31 @@ panvk_cmd_prepare_draw_sysvals( &sysvals->viewport_scale); panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, &sysvals->viewport_offset); - bind_point_state->desc_state.sysvals_ptr = 0; + bind_point_state->desc_state.push_uniforms = 0; } } -static void -panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf, - struct panvk_cmd_bind_point_state *bind_point_state) -{ - struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; - - if (desc_state->sysvals_ptr) - return; - - struct panfrost_ptr sysvals = pan_pool_alloc_aligned( - &cmdbuf->desc_pool.base, sizeof(desc_state->sysvals), 16); - memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals)); - desc_state->sysvals_ptr = sysvals.gpu; -} - static void panvk_cmd_prepare_push_uniforms( struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; - const struct panvk_pipeline *pipeline = bind_point_state->pipeline; - if (!pipeline->layout->push_constants.size || desc_state->push_uniforms) + if (desc_state->push_uniforms) return; struct panfrost_ptr push_uniforms = pan_pool_alloc_aligned( - &cmdbuf->desc_pool.base, - ALIGN_POT(pipeline->layout->push_constants.size, 16), 16); + &cmdbuf->desc_pool.base, 512, 16); + /* The first half is used for push constants. */ memcpy(push_uniforms.cpu, cmdbuf->push_constants, - pipeline->layout->push_constants.size); + sizeof(cmdbuf->push_constants)); + + /* The second half is used for sysvals. */ + memcpy((uint8_t *)push_uniforms.cpu + 256, &desc_state->sysvals, + sizeof(desc_state->sysvals)); + desc_state->push_uniforms = push_uniforms.gpu; } @@ -443,18 +432,12 @@ panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf, if (!ubo_count || desc_state->ubos) return; - panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state); panvk_cmd_prepare_dyn_ssbos(cmdbuf, bind_point_state); struct panfrost_ptr ubos = pan_pool_alloc_desc_array( &cmdbuf->desc_pool.base, ubo_count, UNIFORM_BUFFER); struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu; - pan_pack(&ubo_descs[PANVK_SYSVAL_UBO_INDEX], UNIFORM_BUFFER, cfg) { - cfg.pointer = desc_state->sysvals_ptr; - cfg.entries = DIV_ROUND_UP(sizeof(desc_state->sysvals), 16); - } - for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { const struct panvk_descriptor_set_layout *set_layout = vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]); @@ -1759,7 +1742,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x; sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y; sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z; - desc_state->sysvals_ptr = 0; + desc_state->push_uniforms = 0; panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); dispatch.tsd = batch->tls.gpu; @@ -2132,7 +2115,6 @@ panvk_per_arch(CmdBindDescriptorSets)( * TODO: we could be smarter by checking which part of the pipeline layout * are compatible with the previouly bound descriptor sets. */ - descriptors_state->sysvals_ptr = 0; descriptors_state->ubos = 0; descriptors_state->textures = 0; descriptors_state->samplers = 0; diff --git a/src/panfrost/vulkan/panvk_vX_pipeline.c b/src/panfrost/vulkan/panvk_vX_pipeline.c index 24c7705313a..395a27f7f3d 100644 --- a/src/panfrost/vulkan/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/panvk_vX_pipeline.c @@ -142,7 +142,7 @@ panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder, shader = panvk_per_arch(shader_create)( builder->device, stage, stage_info, builder->layout, - PANVK_SYSVAL_UBO_INDEX, &pipeline->blend.state, + &pipeline->blend.state, panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_BLEND_CONSTANTS), builder->alloc); @@ -223,16 +223,6 @@ panvk_pipeline_builder_alloc_static_state_bo( } } -static void -panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline, - gl_shader_stage stage) -{ - const struct panvk_shader *shader = builder->shaders[stage]; - - pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo; -} - static void panvk_pipeline_builder_emit_non_fs_rsd( const struct pan_shader_info *shader_info, mali_ptr shader_ptr, void *rsd) @@ -467,8 +457,6 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, pipeline->rsds[i] = gpu_rsd; } - panvk_pipeline_builder_init_sysvals(builder, pipeline, i); - if (i == MESA_SHADER_COMPUTE) pipeline->cs.local_size = shader->local_size; } diff --git a/src/panfrost/vulkan/panvk_vX_pipeline_layout.c b/src/panfrost/vulkan/panvk_vX_pipeline_layout.c index 7fd401807ff..3f180a5802b 100644 --- a/src/panfrost/vulkan/panvk_vX_pipeline_layout.c +++ b/src/panfrost/vulkan/panvk_vX_pipeline_layout.c @@ -117,14 +117,10 @@ unsigned panvk_per_arch(pipeline_layout_ubo_start)( const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic) { - unsigned offset = PANVK_NUM_BUILTIN_UBOS; - if (is_dynamic) - offset += layout->num_ubos + layout->sets[set].dyn_ubo_offset; - else - offset += layout->sets[set].ubo_offset; + return layout->num_ubos + layout->sets[set].dyn_ubo_offset; - return offset; + return layout->sets[set].ubo_offset; } unsigned @@ -150,14 +146,14 @@ unsigned panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)( const struct panvk_pipeline_layout *layout) { - return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos; + return layout->num_ubos + layout->num_dyn_ubos; } unsigned panvk_per_arch(pipeline_layout_total_ubo_count)( const struct panvk_pipeline_layout *layout) { - return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos + + return layout->num_ubos + layout->num_dyn_ubos + (layout->num_dyn_ssbos ? 1 : 0); } @@ -165,5 +161,5 @@ unsigned panvk_per_arch(pipeline_layout_dyn_ubos_offset)( const struct panvk_pipeline_layout *layout) { - return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos; + return layout->num_ubos; } diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index c406e2d385b..5c103564ebd 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -51,13 +51,14 @@ #include "vk_util.h" static nir_def * -load_sysval_from_ubo(nir_builder *b, nir_intrinsic_instr *intr, unsigned offset) +load_sysval_from_push_const(nir_builder *b, nir_intrinsic_instr *intr, + unsigned offset) { - return nir_load_ubo(b, intr->def.num_components, intr->def.bit_size, - nir_imm_int(b, PANVK_SYSVAL_UBO_INDEX), - nir_imm_int(b, offset), - .align_mul = intr->def.bit_size / 8, .align_offset = 0, - .range_base = offset, .range = intr->def.bit_size / 8); + return nir_load_push_constant( + b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0), + /* Push constants are placed first, and then come the sysvals. */ + .base = offset + 256, + .range = intr->def.num_components * intr->def.bit_size / 8); } struct sysval_options { @@ -81,25 +82,25 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) #define SYSVAL(name) offsetof(struct panvk_sysvals, name) switch (intr->intrinsic) { case nir_intrinsic_load_num_workgroups: - val = load_sysval_from_ubo(b, intr, SYSVAL(num_work_groups)); + val = load_sysval_from_push_const(b, intr, SYSVAL(num_work_groups)); break; case nir_intrinsic_load_workgroup_size: - val = load_sysval_from_ubo(b, intr, SYSVAL(local_group_size)); + val = load_sysval_from_push_const(b, intr, SYSVAL(local_group_size)); break; case nir_intrinsic_load_viewport_scale: - val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_scale)); + val = load_sysval_from_push_const(b, intr, SYSVAL(viewport_scale)); break; case nir_intrinsic_load_viewport_offset: - val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_offset)); + val = load_sysval_from_push_const(b, intr, SYSVAL(viewport_offset)); break; case nir_intrinsic_load_first_vertex: - val = load_sysval_from_ubo(b, intr, SYSVAL(first_vertex)); + val = load_sysval_from_push_const(b, intr, SYSVAL(first_vertex)); break; case nir_intrinsic_load_base_vertex: - val = load_sysval_from_ubo(b, intr, SYSVAL(base_vertex)); + val = load_sysval_from_push_const(b, intr, SYSVAL(base_vertex)); break; case nir_intrinsic_load_base_instance: - val = load_sysval_from_ubo(b, intr, SYSVAL(base_instance)); + val = load_sysval_from_push_const(b, intr, SYSVAL(base_instance)); break; case nir_intrinsic_load_blend_const_color_rgba: if (opts->static_blend_constants) { @@ -112,7 +113,7 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) val = nir_build_imm(b, 4, 32, constants); } else { - val = load_sysval_from_ubo(b, intr, SYSVAL(blend_constants)); + val = load_sysval_from_push_const(b, intr, SYSVAL(blend_constants)); } break; @@ -206,7 +207,6 @@ struct panvk_shader * panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage, const VkPipelineShaderStageCreateInfo *stage_info, const struct panvk_pipeline_layout *layout, - unsigned sysval_ubo, struct pan_blend_state *blend_state, bool static_blend_constants, const VkAllocationCallbacks *alloc) @@ -383,7 +383,6 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage, if (shader->has_img_access) shader->info.attribute_count += layout->num_imgs; - shader->sysval_ubo = sysval_ubo; shader->local_size.x = nir->info.workgroup_size[0]; shader->local_size.y = nir->info.workgroup_size[1]; shader->local_size.z = nir->info.workgroup_size[2];