diff --git a/src/panfrost/.clang-format b/src/panfrost/.clang-format index e93f6682233..de6ae574573 100644 --- a/src/panfrost/.clang-format +++ b/src/panfrost/.clang-format @@ -85,5 +85,6 @@ ForEachMacros: [ 'pan_section_pack', 'pan_unpack', 'panvk_cs_reg_upd_ctx', + 'panvk_shader_foreach_variant', 'u_foreach_bit', ] diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index dce9139b290..777ba65c210 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -334,7 +334,7 @@ out: VkResult panvk_per_arch(meta_get_copy_desc_job)( - struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader *shader, + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, const struct panvk_descriptor_state *desc_state, const struct panvk_shader_desc_state *shader_desc_state, uint32_t attrib_buf_idx_offset, struct pan_ptr *job_desc) diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 90df1e2152f..cb0b0621ea5 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -465,8 +465,9 @@ void panvk_per_arch(cmd_inherit_render_state)( static inline void panvk_per_arch(calculate_task_axis_and_increment)( - const struct panvk_shader *shader, struct panvk_physical_device *phys_dev, - unsigned *task_axis, unsigned *task_increment) + const struct panvk_shader_variant *shader, + struct panvk_physical_device *phys_dev, unsigned *task_axis, + unsigned *task_increment) { /* Pick the task_axis and task_increment to maximize thread * utilization. */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index c756b91d71e..86a5e2d11c8 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -43,7 +43,8 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) const struct panvk_descriptor_state *desc_state = &cmdbuf->state.compute.desc_state; - const struct panvk_shader *cs = cmdbuf->state.compute.shader; + const struct panvk_shader_variant *cs = + panvk_shader_only_variant(cmdbuf->state.compute.shader); uint32_t desc_count = cs->desc_info.dyn_bufs.count + 1; struct pan_ptr driver_set = panvk_cmd_alloc_dev_mem( cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE); @@ -67,10 +68,9 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) } uint64_t -panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_shader *shader, - const struct pan_compute_dim *dim, - bool indirect) +panvk_per_arch(cmd_dispatch_prepare_tls)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, + const struct pan_compute_dim *dim, bool indirect) { struct panvk_physical_device *phys_dev = to_panvk_physical_device(cmdbuf->vk.base.device->physical); @@ -122,7 +122,8 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf, static void cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) { - const struct panvk_shader *shader = cmdbuf->state.compute.shader; + const struct panvk_shader_variant *shader = + panvk_shader_only_variant(cmdbuf->state.compute.shader); VkResult result; /* If there's no compute shader, we can skip the dispatch. */ @@ -170,8 +171,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) if (result != VK_SUCCESS) return; - result = panvk_per_arch(cmd_prepare_push_uniforms)( - cmdbuf, cmdbuf->state.compute.shader, 1); + result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, shader, 1); if (result != VK_SUCCESS) return; @@ -355,7 +355,8 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, uint32_t groupCountY, uint32_t groupCountZ) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - const struct panvk_shader *shader = cmdbuf->state.compute.shader; + const struct panvk_shader_variant *shader = + panvk_shader_only_variant(cmdbuf->state.compute.shader); struct panvk_dispatch_info info = { .wg_base = {baseGroupX, baseGroupY, baseGroupZ}, .direct.wg_count = {groupCountX, groupCountY, groupCountZ}, diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 205957aa234..da8f61bf4b4 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -269,7 +269,8 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf, return VK_SUCCESS; struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; const struct vk_vertex_input_state *vi = dyns->vi; @@ -357,8 +358,10 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf, static uint32_t get_varying_slots(const struct panvk_cmd_buffer *cmdbuf) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); uint32_t varying_slots = 0; if (fs) { @@ -378,7 +381,8 @@ emit_varying_descs(const struct panvk_cmd_buffer *cmdbuf, /* Assumes 16 byte slots. We could do better. */ uint32_t varying_size = varying_slots * 16; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); for (uint32_t i = 0; i < varying_slots; i++) { const struct pan_shader_varying *var = &fs->info.varyings.input[i]; @@ -406,7 +410,8 @@ static VkResult prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) { struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader); const struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; /* If the shader is using LD_VAR_BUF[_IMM], we do not have to set up @@ -578,8 +583,10 @@ static VkResult update_tls(struct panvk_cmd_buffer *cmdbuf) { struct panvk_tls_state *state = &cmdbuf->state.tls; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader); struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); @@ -807,7 +814,8 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) static inline uint64_t get_vs_all_spd(const struct panvk_cmd_buffer *cmdbuf) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); assert(vs); const struct vk_input_assembly_state *ia = &cmdbuf->vk.dynamic_graphics_state.ia; @@ -819,7 +827,8 @@ get_vs_all_spd(const struct panvk_cmd_buffer *cmdbuf) static inline uint64_t get_vs_pos_spd(const struct panvk_cmd_buffer *cmdbuf) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); assert(vs); const struct vk_input_assembly_state *ia = &cmdbuf->vk.dynamic_graphics_state.ia; @@ -854,12 +863,16 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf) * On v13+, the point size default to 1.0f. */ #if PAN_ARCH < 13 - case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: - if (cmdbuf->state.gfx.vs.shader->info.vs.writes_point_size) + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: { + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + + if (vs->info.vs.writes_point_size) return; primitive_size = 1.0f; break; + } #endif case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: @@ -1572,7 +1585,8 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_info *draw) { struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); bool upd_res_table = false; @@ -1623,7 +1637,8 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_info *draw) static VkResult prepare_fs(struct panvk_cmd_buffer *cmdbuf) { - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; struct cs_builder *b = @@ -1659,8 +1674,10 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf, { struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); VkResult result; if (gfx_state_dirty(cmdbuf, VS_PUSH_UNIFORMS)) { @@ -1732,7 +1749,8 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs) const struct vk_rasterization_state *rs = &dyns->rs; bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable; bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); struct pan_ptr zsd = panvk_cmd_alloc_desc(cmdbuf, DEPTH_STENCIL); if (!zsd.gpu) @@ -1862,7 +1880,8 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf, { struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); bool dcd2_dirty = fs_user_dirty(cmdbuf) || dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || @@ -2046,8 +2065,10 @@ static void set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; const struct vk_input_assembly_state *ia = &dyns->ia; @@ -2128,8 +2149,10 @@ get_tiler_flags_override(struct panvk_draw_info *draw) static VkResult prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; bool idvs = vs->info.vs.idvs; VkResult result; @@ -2260,7 +2283,8 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct cs_tracing_ctx *tracing_ctx = &cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].tracing; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); VkResult result; @@ -2423,7 +2447,8 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, { const struct cs_tracing_ctx *tracing_ctx = &cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].tracing; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); VkResult result; diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c index 1222d7fd4e3..10ff557fcc2 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c @@ -28,7 +28,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_physical_device *phys_dev = to_panvk_physical_device(dev->vk.physical); - const struct panvk_shader *shader = + const struct panvk_shader_variant *shader = panvk_per_arch(precomp_cache_get)(dev->precomp_cache, idx); assert(shader); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index bad78a28c71..68cb362e18d 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -31,10 +31,9 @@ #include uint64_t -panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_shader *shader, - const struct pan_compute_dim *dim, - bool indirect) +panvk_per_arch(cmd_dispatch_prepare_tls)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, + const struct pan_compute_dim *dim, bool indirect) { struct panvk_batch *batch = cmdbuf->cur_batch; @@ -64,7 +63,8 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf, static void cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) { - const struct panvk_shader *shader = cmdbuf->state.compute.shader; + const struct panvk_shader_variant *shader = + panvk_shader_only_variant(cmdbuf->state.compute.shader); VkResult result; /* If there's no compute shader, we can skip the dispatch. */ @@ -104,7 +104,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info); result = panvk_per_arch(cmd_prepare_push_uniforms)( - cmdbuf, cmdbuf->state.compute.shader, 1); + cmdbuf, shader, 1); if (result != VK_SUCCESS) return; diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index c1df1c0549c..35c5c081389 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -224,7 +224,8 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, const struct vk_rasterization_state *rs = &dyns->rs; const struct vk_depth_stencil_state *ds = &dyns->ds; const struct vk_input_assembly_state *ia = &dyns->ia; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); const struct pan_shader_info *fs_info = fs ? &fs->info : NULL; uint32_t bd_count = MAX2(cmdbuf->state.gfx.render.fb.info.rt_count, 1); bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable; @@ -255,7 +256,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE); struct panvk_blend_info *binfo = &cmdbuf->state.gfx.cb.info; - uint64_t fs_code = panvk_shader_get_dev_addr(fs); + uint64_t fs_code = panvk_shader_variant_get_dev_addr(fs); if (fs_info != NULL) { panvk_per_arch(blend_emit_descs)(cmdbuf, bds); @@ -430,7 +431,8 @@ static VkResult panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); const struct panvk_shader_link *link = &cmdbuf->state.gfx.link; struct pan_ptr bufs = panvk_cmd_alloc_desc_array( cmdbuf, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER); @@ -586,7 +588,8 @@ static VkResult panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; const struct vk_vertex_input_state *vi = dyns->vi; @@ -744,7 +747,8 @@ panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_data *draw, struct mali_draw_packed *dcd) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); const struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; @@ -826,8 +830,10 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_data *draw, struct mali_primitive_packed *prim) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; const struct vk_input_assembly_state *ia = &dyns->ia; @@ -888,7 +894,8 @@ panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_data *draw, struct mali_primitive_size_packed *primsz) { - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); const struct vk_input_assembly_state *ia = &cmdbuf->vk.dynamic_graphics_state.ia; bool writes_point_size = @@ -1001,13 +1008,13 @@ set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf, state->render.first_provoking_vertex = U_TRISTATE_YES; } - static VkResult panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader); struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct pan_ptr ptr; VkResult result = panvk_per_arch(meta_get_copy_desc_job)( @@ -1092,7 +1099,8 @@ panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); const struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; const struct vk_vertex_input_state *vi = @@ -1116,7 +1124,8 @@ static VkResult panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader); struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_batch *batch = cmdbuf->cur_batch; struct pan_ptr ptr; @@ -1165,8 +1174,13 @@ panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd) if (!gfx_state_dirty(cmd, VS) && !gfx_state_dirty(cmd, FS)) return VK_SUCCESS; - VkResult result = panvk_per_arch(link_shaders)( - &cmd->desc_pool, gfx->vs.shader, get_fs(cmd), &gfx->link); + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmd->state.gfx.vs.shader); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmd)); + + VkResult result = + panvk_per_arch(link_shaders)(&cmd->desc_pool, vs, fs, &gfx->link); if (result != VK_SUCCESS) { vk_command_buffer_set_error(&cmd->vk, result); return result; @@ -1179,7 +1193,8 @@ static void panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; @@ -1198,7 +1213,8 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) cmdbuf->state.gfx.fs.required = fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state); - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); /* There are only 16 bits in the descriptor for the job ID. Each job has a * pilot shader dealing with descriptor copies, and we need one @@ -1374,13 +1390,13 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) } result = panvk_per_arch(cmd_prepare_push_uniforms)( - cmdbuf, cmdbuf->state.gfx.vs.shader, 1); + cmdbuf, vs, 1); if (result != VK_SUCCESS) return; if (fs) { result = panvk_per_arch(cmd_prepare_push_uniforms)( - cmdbuf, cmdbuf->state.gfx.fs.shader, 1); + cmdbuf, fs, 1); if (result != VK_SUCCESS) return; } @@ -1423,7 +1439,9 @@ padded_vertex_count(struct panvk_cmd_buffer *cmdbuf, uint32_t vertex_count, if (instance_count == 1) return vertex_count; - bool idvs = cmdbuf->state.gfx.vs.shader->info.vs.idvs; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); + bool idvs = vs->info.vs.idvs; /* Index-Driven Vertex Shading requires different instances to * have different cache lines for position results. Each vertex diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c index d33e4e8b118..5e7103dac03 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c @@ -24,7 +24,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, struct panvk_cmd_buffer *cmdbuf = ctx->cmdbuf; struct panvk_batch *batch = cmdbuf->cur_batch; struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - const struct panvk_shader *shader = + const struct panvk_shader_variant *shader = panvk_per_arch(precomp_cache_get)(dev->precomp_cache, idx); assert(shader); diff --git a/src/panfrost/vulkan/panvk_cmd_desc_state.h b/src/panfrost/vulkan/panvk_cmd_desc_state.h index 5bc57f0f51e..f53a008b98c 100644 --- a/src/panfrost/vulkan/panvk_cmd_desc_state.h +++ b/src/panfrost/vulkan/panvk_cmd_desc_state.h @@ -69,23 +69,24 @@ struct panvk_descriptor_state { VkResult panvk_per_arch(cmd_prepare_dyn_ssbos)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct panvk_shader_desc_state *shader_desc_state); VkResult panvk_per_arch(cmd_prepare_shader_desc_tables)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct panvk_shader_desc_state *shader_desc_state); #else void panvk_per_arch(cmd_fill_dyn_bufs)( const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, struct mali_buffer_packed *buffers); + const struct panvk_shader_variant *shader, + struct mali_buffer_packed *buffers); VkResult panvk_per_arch(cmd_prepare_shader_res_table)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct panvk_shader_desc_state *shader_desc_state, uint32_t repeat_count); #endif diff --git a/src/panfrost/vulkan/panvk_cmd_dispatch.h b/src/panfrost/vulkan/panvk_cmd_dispatch.h index 9aafa4043f7..9a446b4f3a5 100644 --- a/src/panfrost/vulkan/panvk_cmd_dispatch.h +++ b/src/panfrost/vulkan/panvk_cmd_dispatch.h @@ -77,7 +77,7 @@ void panvk_per_arch(cmd_prepare_dispatch_sysvals)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info); uint64_t panvk_per_arch(cmd_dispatch_prepare_tls)( - struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader *shader, + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, const struct pan_compute_dim *dim, bool indirect); #endif diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 2437766c2ac..1a0a50d0328 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -260,8 +260,9 @@ static inline bool fs_required(const struct panvk_cmd_graphics_state *state, const struct vk_dynamic_graphics_state *dyn_state) { - const struct pan_shader_info *fs_info = - state->fs.shader ? &state->fs.shader->info : NULL; + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(state->fs.shader); + const struct pan_shader_info *fs_info = fs ? &fs->info : NULL; const struct vk_color_blend_state *cb = &dyn_state->cb; const struct vk_rasterization_state *rs = &dyn_state->rs; @@ -407,7 +408,7 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, static inline uint32_t color_attachment_written_mask( - const struct panvk_shader *fs, + const struct panvk_shader_variant *fs, const struct vk_color_attachment_location_state *cal) { uint32_t written_by_shader = @@ -428,7 +429,7 @@ color_attachment_written_mask( } static inline uint32_t -color_attachment_read_mask(const struct panvk_shader *fs, +color_attachment_read_mask(const struct panvk_shader_variant *fs, const struct vk_input_attachment_location_state *ial, uint8_t color_attachment_mask) { @@ -453,7 +454,7 @@ color_attachment_read_mask(const struct panvk_shader *fs, } static inline bool -z_attachment_read(const struct panvk_shader *fs, +z_attachment_read(const struct panvk_shader_variant *fs, const struct vk_input_attachment_location_state *ial) { uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX @@ -465,7 +466,7 @@ z_attachment_read(const struct panvk_shader *fs, } static inline bool -s_attachment_read(const struct panvk_shader *fs, +s_attachment_read(const struct panvk_shader_variant *fs, const struct vk_input_attachment_location_state *ial) { uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX diff --git a/src/panfrost/vulkan/panvk_cmd_push_constant.h b/src/panfrost/vulkan/panvk_cmd_push_constant.h index faa03299365..9c127bd920a 100644 --- a/src/panfrost/vulkan/panvk_cmd_push_constant.h +++ b/src/panfrost/vulkan/panvk_cmd_push_constant.h @@ -11,7 +11,7 @@ #include "genxml/gen_macros.h" struct panvk_cmd_buffer; -struct panvk_shader; +struct panvk_shader_variant; #define MAX_PUSH_CONSTANTS_SIZE 256 @@ -19,9 +19,8 @@ struct panvk_push_constant_state { uint64_t data[MAX_PUSH_CONSTANTS_SIZE / sizeof(uint64_t)]; }; -VkResult -panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_shader *shader, - uint32_t repeat_count); +VkResult panvk_per_arch(cmd_prepare_push_uniforms)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, + uint32_t repeat_count); #endif diff --git a/src/panfrost/vulkan/panvk_meta.h b/src/panfrost/vulkan/panvk_meta.h index 57acd247c57..78fdeed3c73 100644 --- a/src/panfrost/vulkan/panvk_meta.h +++ b/src/panfrost/vulkan/panvk_meta.h @@ -148,11 +148,11 @@ panvk_meta_copy_get_image_properties(struct panvk_image *img) struct panvk_cmd_buffer; struct panvk_descriptor_state; struct panvk_device; -struct panvk_shader; +struct panvk_shader_variant; struct panvk_shader_desc_state; VkResult panvk_per_arch(meta_get_copy_desc_job)( - struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader *shader, + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, const struct panvk_descriptor_state *desc_state, const struct panvk_shader_desc_state *shader_desc_state, uint32_t attrib_buf_idx_offset, struct pan_ptr *job_desc); diff --git a/src/panfrost/vulkan/panvk_precomp_cache.h b/src/panfrost/vulkan/panvk_precomp_cache.h index e16885457fd..23919b65d1f 100644 --- a/src/panfrost/vulkan/panvk_precomp_cache.h +++ b/src/panfrost/vulkan/panvk_precomp_cache.h @@ -36,7 +36,7 @@ struct panvk_precomp_cache * panvk_per_arch(precomp_cache_init)(struct panvk_device *dev); void panvk_per_arch(precomp_cache_cleanup)(struct panvk_precomp_cache *cache); -struct panvk_shader * +const struct panvk_shader_variant * panvk_per_arch(precomp_cache_get)(struct panvk_precomp_cache *cache, unsigned program); diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 9a82db9b94c..02979ce5070 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -297,8 +297,7 @@ struct panvk_shader_fau_info { uint32_t total_count; }; -struct panvk_shader { - struct vk_shader vk; +struct panvk_shader_variant { struct pan_shader_info info; union { @@ -367,12 +366,80 @@ struct panvk_shader { const char *asm_str; }; +enum panvk_vs_variant { + /* Hardware vertex shader, when next stage is fragment */ + PANVK_VS_VARIANT_HW, + + PANVK_VS_VARIANTS, +}; + +struct panvk_shader { + struct vk_shader vk; + + struct panvk_shader_variant variants[]; +}; + +static inline unsigned +panvk_shader_num_variants(gl_shader_stage stage) +{ + if (stage == MESA_SHADER_VERTEX) + return PANVK_VS_VARIANTS; + + return 1; +} + +static const char *panvk_vs_shader_variant_name[] = { + [PANVK_VS_VARIANT_HW] = NULL, +}; + +static const char * +panvk_shader_variant_name(const struct panvk_shader *shader, + struct panvk_shader_variant *variant) +{ + unsigned i = variant - shader->variants; + assert(i < panvk_shader_num_variants(shader->vk.stage)); + + if (shader->vk.stage == MESA_SHADER_VERTEX) { + assert(i < ARRAY_SIZE(panvk_vs_shader_variant_name)); + return panvk_vs_shader_variant_name[i]; + } + + assert(panvk_shader_num_variants(shader->vk.stage) == 1); + + return NULL; +} + +static const struct panvk_shader_variant * +panvk_shader_only_variant(const struct panvk_shader *shader) +{ + if (!shader) + return NULL; + + assert(panvk_shader_num_variants(shader->vk.stage) == 1); + return &shader->variants[0]; +} + +static const struct panvk_shader_variant * +panvk_shader_hw_variant(const struct panvk_shader *shader) +{ + if (!shader) + return NULL; + + return &shader->variants[0]; +} + static inline uint64_t -panvk_shader_get_dev_addr(const struct panvk_shader *shader) +panvk_shader_variant_get_dev_addr(const struct panvk_shader_variant *shader) { return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0; } +#define panvk_shader_foreach_variant(__shader, __var) \ + for (struct panvk_shader_variant *__var = (__shader)->variants; \ + __var < (__shader)->variants + \ + panvk_shader_num_variants((__shader)->vk.stage); \ + ++__var) + #if PAN_ARCH < 9 struct panvk_shader_link { struct { @@ -382,8 +449,8 @@ struct panvk_shader_link { }; VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, - const struct panvk_shader *vs, - const struct panvk_shader *fs, + const struct panvk_shader_variant *vs, + const struct panvk_shader_variant *fs, struct panvk_shader_link *link); static inline void @@ -398,7 +465,8 @@ void panvk_per_arch(nir_lower_descriptors)( nir_shader *nir, struct panvk_device *dev, const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count, struct vk_descriptor_set_layout *const *set_layouts, - const struct vk_graphics_pipeline_state *state, struct panvk_shader *shader); + const struct vk_graphics_pipeline_state *state, + struct panvk_shader_variant *shader); /* This a stripped-down version of panvk_shader for internal shaders that * are managed by vk_meta (blend and preload shaders). Those don't need the @@ -417,7 +485,7 @@ struct panvk_internal_shader { #if PAN_ARCH >= 9 static inline bool -panvk_use_ld_var_buf(const struct panvk_shader *shader) +panvk_use_ld_var_buf(const struct panvk_shader_variant *shader) { /* LD_VAR_BUF[_IMM] takes an 8-bit offset, limiting its use to 16 or less * varyings, assuming highp vec4. */ diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index 92a3467fc8d..0c514a57520 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -300,9 +300,10 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, &cmdbuf->vk.dynamic_graphics_state; const struct vk_color_blend_state *cb = &dyns->cb; const struct vk_color_attachment_location_state *cal = &dyns->cal; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader); const struct pan_shader_info *fs_info = fs ? &fs->info : NULL; - uint64_t fs_code = panvk_shader_get_dev_addr(fs); + uint64_t fs_code = panvk_shader_variant_get_dev_addr(fs); const struct panvk_rendering_state *render = &cmdbuf->state.gfx.render; const VkFormat *color_attachment_formats = render->color_attachments.fmts; const uint8_t *color_attachment_samples = render->color_attachments.samples; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c index 463daeb9f4f..b16800c718f 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c @@ -113,7 +113,7 @@ VkResult panvk_per_arch(cmd_prepare_dyn_ssbos)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct panvk_shader_desc_state *shader_desc_state) { shader_desc_state->dyn_ssbos = 0; @@ -151,7 +151,7 @@ panvk_per_arch(cmd_prepare_dyn_ssbos)( static void panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct mali_uniform_buffer_packed *ubos, uint32_t ubo_count) { @@ -180,7 +180,7 @@ VkResult panvk_per_arch(cmd_prepare_shader_desc_tables)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct panvk_shader_desc_state *shader_desc_state) { memset(shader_desc_state->tables, 0, sizeof(shader_desc_state->tables)); @@ -246,7 +246,8 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( void panvk_per_arch(cmd_fill_dyn_bufs)( const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, struct mali_buffer_packed *buffers) + const struct panvk_shader_variant *shader, + struct mali_buffer_packed *buffers) { if (!shader) return; @@ -273,7 +274,7 @@ VkResult panvk_per_arch(cmd_prepare_shader_res_table)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_descriptor_state *desc_state, - const struct panvk_shader *shader, + const struct panvk_shader_variant *shader, struct panvk_shader_desc_state *shader_desc_state, uint32_t repeat_count) { if (!shader) { diff --git a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c index 87ffd2565ff..ec9d2b02bc1 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c @@ -12,7 +12,8 @@ void panvk_per_arch(cmd_prepare_dispatch_sysvals)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info) { - const struct panvk_shader *shader = cmdbuf->state.compute.shader; + const struct panvk_shader_variant *shader = + panvk_shader_only_variant(cmdbuf->state.compute.shader); const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0}; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 05cdac3eec5..86bceb607c1 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -702,7 +702,8 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, { const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; - const struct panvk_shader *fs = get_fs(cmdbuf); + const struct panvk_shader_variant *fs = + panvk_shader_only_variant(get_fs(cmdbuf)); uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0; BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0}; @@ -801,7 +802,8 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP)) prepare_iam_sysvals(cmdbuf, dirty_sysvals); - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader_variant *vs = + panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader); #if PAN_ARCH < 9 struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c index 91237a09169..1a1196c62d8 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c @@ -8,13 +8,13 @@ #include "panvk_entrypoints.h" VkResult -panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_shader *shader, - uint32_t repeat_count) +panvk_per_arch(cmd_prepare_push_uniforms)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, + uint32_t repeat_count) { uint64_t *push_ptr; - switch (shader->vk.stage) { + switch (shader->info.stage) { case MESA_SHADER_COMPUTE: if (!compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) return VK_SUCCESS; @@ -47,7 +47,7 @@ panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf, if (!push_uniforms.gpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - uint64_t *sysvals = shader->vk.stage == MESA_SHADER_COMPUTE + uint64_t *sysvals = shader->info.stage == MESA_SHADER_COMPUTE ? (uint64_t *)&cmdbuf->state.compute.sysvals : (uint64_t *)&cmdbuf->state.gfx.sysvals; uint64_t *push_consts = cmdbuf->state.push_constants.data; @@ -57,7 +57,7 @@ panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf, for (uint32_t i = 0; i < repeat_count; i++) { uint64_t addr = push_uniforms.gpu + i * shader->fau.total_count * sizeof(uint64_t); - if (shader->vk.stage == MESA_SHADER_COMPUTE) + if (shader->info.stage == MESA_SHADER_COMPUTE) cmdbuf->state.compute.sysvals.push_uniforms = addr; else cmdbuf->state.gfx.sysvals.push_uniforms = addr; diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index a3eaedab23e..f689b565a37 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -74,7 +74,7 @@ struct lower_desc_ctx { bool null_descriptor_support; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; - struct panvk_shader *shader; + struct panvk_shader_variant *shader; }; static nir_address_format @@ -872,7 +872,7 @@ get_img_index(nir_builder *b, nir_deref_instr *deref, struct panvk_lower_input_attachment_load_ctx { uint32_t ro_color_mask; - struct panvk_shader *shader; + struct panvk_shader_variant *shader; }; static bool @@ -890,7 +890,7 @@ lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr, return false; const struct panvk_lower_input_attachment_load_ctx *ctx = data; - struct panvk_shader *shader = ctx->shader; + struct panvk_shader_variant *shader = ctx->shader; nir_variable *var = nir_deref_instr_get_variable(deref); assert(var); @@ -1066,7 +1066,7 @@ readonly_color_mask(nir_shader *nir, static bool lower_input_attachment_loads(nir_shader *nir, const struct vk_graphics_pipeline_state *state, - struct panvk_shader *shader) + struct panvk_shader_variant *shader) { bool progress = false; struct panvk_lower_input_attachment_load_ctx ia_load_ctx = { @@ -1453,7 +1453,7 @@ collect_instr_desc_access(nir_builder *b, nir_instr *instr, void *data) } static void -upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader *shader, +upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader_variant *shader, const struct panvk_shader_desc_info *desc_info) { #if PAN_ARCH < 9 @@ -1496,7 +1496,7 @@ panvk_per_arch(nir_lower_descriptors)( nir_shader *nir, struct panvk_device *dev, const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count, struct vk_descriptor_set_layout *const *set_layouts, - const struct vk_graphics_pipeline_state *state, struct panvk_shader *shader) + const struct vk_graphics_pipeline_state *state, struct panvk_shader_variant *shader) { struct lower_desc_ctx ctx = { .shader = shader, diff --git a/src/panfrost/vulkan/panvk_vX_precomp_cache.c b/src/panfrost/vulkan/panvk_vX_precomp_cache.c index 0e7a2d547d0..e46a24e2e4a 100644 --- a/src/panfrost/vulkan/panvk_vX_precomp_cache.c +++ b/src/panfrost/vulkan/panvk_vX_precomp_cache.c @@ -80,7 +80,7 @@ panvk_get_precompiled_locked(struct panvk_precomp_cache *cache, return shader; } -struct panvk_shader * +const struct panvk_shader_variant * panvk_per_arch(precomp_cache_get)(struct panvk_precomp_cache *cache, unsigned program) { @@ -90,12 +90,12 @@ panvk_per_arch(precomp_cache_get)(struct panvk_precomp_cache *cache, struct panvk_shader *ret = p_atomic_read(cache->precomp + program); if (ret != NULL) - return ret; + return panvk_shader_only_variant(ret); /* Otherwise, take the lock and upload. */ simple_mtx_lock(&cache->lock); ret = panvk_get_precompiled_locked(cache, program); simple_mtx_unlock(&cache->lock); - return ret; + return panvk_shader_only_variant(ret); } diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 55dd33cb341..d32a422a58f 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -63,7 +63,7 @@ #include "vk_util.h" struct panvk_lower_sysvals_context { - struct panvk_shader *shader; + struct panvk_shader_variant *shader; const struct vk_graphics_pipeline_state *state; }; @@ -566,7 +566,7 @@ collect_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, if (intr->intrinsic != nir_intrinsic_load_push_constant) return false; - struct panvk_shader *shader = data; + struct panvk_shader_variant *shader = data; uint32_t base = nir_intrinsic_base(intr); bool is_sysval = base >= SYSVALS_PUSH_CONST_BASE; uint32_t offset, size; @@ -605,7 +605,7 @@ move_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) if (intr->intrinsic != nir_intrinsic_load_push_constant) return false; - struct panvk_shader *shader = data; + struct panvk_shader_variant *shader = data; unsigned base = nir_intrinsic_base(intr); bool is_sysval = base >= SYSVALS_PUSH_CONST_BASE; @@ -661,7 +661,7 @@ move_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) } static void -lower_load_push_consts(nir_shader *nir, struct panvk_shader *shader) +lower_load_push_consts(nir_shader *nir, struct panvk_shader_variant *shader) { /* Before we lower load_push_constant()s with a dynamic offset to global * loads, we want to run a few optimization passes to get rid of offset @@ -687,7 +687,7 @@ lower_load_push_consts(nir_shader *nir, struct panvk_shader *shader) /* We always reserve the 4 blend constant words for fragment shaders, * because we don't know the blend configuration at this point, and * we might end up with a blend shader reading those blend constants. */ - if (shader->vk.stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_FRAGMENT) { /* We rely on blend constants being placed first and covering 4 words. */ STATIC_ASSERT( offsetof(struct panvk_graphics_sysvals, blend.constants) == 0 && @@ -764,7 +764,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, uint32_t *noperspective_varyings, const struct vk_graphics_pipeline_state *state, const struct pan_compile_inputs *compile_input, - struct panvk_shader *shader) + struct panvk_shader_variant *shader) { struct panvk_instance *instance = to_panvk_instance(dev->vk.physical->instance); @@ -940,7 +940,7 @@ static VkResult panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, VkShaderCreateFlagsEXT shader_flags, struct pan_compile_inputs *compile_input, - struct panvk_shader *shader) + struct panvk_shader_variant *shader) { const bool dump_asm = shader_flags & VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA; @@ -1030,7 +1030,7 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, (nir->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0); #endif - switch (shader->info.stage) { + switch (nir->info.stage) { case MESA_SHADER_COMPUTE: case MESA_SHADER_KERNEL: shader->cs.local_size.x = nir->info.workgroup_size[0]; @@ -1051,7 +1051,7 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, #if PAN_ARCH >= 9 static enum mali_flush_to_zero_mode -shader_ftz_mode(struct panvk_shader *shader) +shader_ftz_mode(struct panvk_shader_variant *shader) { if (shader->info.ftz_fp32) { if (shader->info.ftz_fp16) @@ -1069,7 +1069,8 @@ shader_ftz_mode(struct panvk_shader *shader) #endif static VkResult -panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, +panvk_shader_upload(struct panvk_device *dev, + struct panvk_shader_variant *shader, const VkAllocationCallbacks *pAllocator) { shader->code_mem = (struct panvk_priv_mem){0}; @@ -1098,8 +1099,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(&shader->info, panvk_shader_get_dev_addr(shader), - &cfg); + pan_shader_prepare_rsd(&shader->info, + panvk_shader_variant_get_dev_addr(shader), &cfg); } #else if (shader->info.stage != MESA_SHADER_VERTEX) { @@ -1120,7 +1121,7 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); - cfg.binary = panvk_shader_get_dev_addr(shader); + cfg.binary = panvk_shader_variant_get_dev_addr(shader); cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); @@ -1139,7 +1140,7 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, cfg.stage = pan_shader_stage(&shader->info); cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); - cfg.binary = panvk_shader_get_dev_addr(shader); + cfg.binary = panvk_shader_variant_get_dev_addr(shader); cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); } @@ -1154,8 +1155,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, cfg.stage = pan_shader_stage(&shader->info); cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); - cfg.binary = - panvk_shader_get_dev_addr(shader) + shader->info.vs.no_psiz_offset; + cfg.binary = panvk_shader_variant_get_dev_addr(shader) + + shader->info.vs.no_psiz_offset; cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); } @@ -1171,7 +1172,7 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); - cfg.binary = panvk_shader_get_dev_addr(shader); + cfg.binary = panvk_shader_variant_get_dev_addr(shader); cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); } @@ -1187,8 +1188,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); - cfg.binary = - panvk_shader_get_dev_addr(shader) + shader->info.vs.no_psiz_offset; + cfg.binary = panvk_shader_variant_get_dev_addr(shader) + + shader->info.vs.no_psiz_offset; cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); } @@ -1206,7 +1207,7 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, cfg.stage = pan_shader_stage(&shader->info); cfg.vertex_warp_limit = MALI_WARP_LIMIT_FULL; cfg.register_allocation = pan_register_allocation(work_count); - cfg.binary = panvk_shader_get_dev_addr(shader) + + cfg.binary = panvk_shader_variant_get_dev_addr(shader) + shader->info.vs.secondary_offset; cfg.preload.r48_r63 = (shader->info.vs.secondary_preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); @@ -1220,13 +1221,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, } static void -panvk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader, - const VkAllocationCallbacks *pAllocator) +panvk_shader_variant_destroy(struct panvk_shader_variant *shader) { - struct panvk_device *dev = to_panvk_device(vk_dev); - struct panvk_shader *shader = - container_of(vk_shader, struct panvk_shader, vk); - free((void *)shader->asm_str); ralloc_free((void *)shader->nir_str); @@ -1252,8 +1248,20 @@ panvk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader, if (shader->own_bin) free((void *)shader->bin_ptr); +} - vk_shader_free(&dev->vk, pAllocator, &shader->vk); +static void +panvk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader, + const VkAllocationCallbacks *pAllocator) +{ + struct panvk_shader *shader = + container_of(vk_shader, struct panvk_shader, vk); + + panvk_shader_foreach_variant(shader, variant) { + panvk_shader_variant_destroy(variant); + } + + vk_shader_free(vk_dev, pAllocator, &shader->vk); } static const struct vk_shader_ops panvk_shader_ops; @@ -1275,13 +1283,14 @@ panvk_compile_shader(struct panvk_device *dev, /* We consume the NIR, regardless of success or failure */ nir_shader *nir = info->nir; + size_t size = + sizeof(struct panvk_shader) + sizeof(struct panvk_shader_variant) * + panvk_shader_num_variants(info->stage); shader = vk_shader_zalloc(&dev->vk, &panvk_shader_ops, info->stage, - pAllocator, sizeof(*shader)); + pAllocator, size); if (shader == NULL) return panvk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); - shader->own_bin = true; - nir_variable_mode robust2_modes = 0; if (info->robustness->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT) robust2_modes |= nir_var_mem_ubo; @@ -1298,32 +1307,79 @@ panvk_compile_shader(struct panvk_device *dev, state->ms != NULL && state->ms->sample_shading_enable) nir->info.fs.uses_sample_shading = true; - panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, - info->robustness, noperspective_varyings, state, &inputs, - shader); + if (info->stage == MESA_SHADER_VERTEX) { + struct pan_compile_inputs input_variants[PANVK_VS_VARIANTS] = {0}; + nir_shader *nir_variants[PANVK_VS_VARIANTS] = {0}; + + /* First we apply lowering for variants */ + for (enum panvk_vs_variant v = 0; v < PANVK_VS_VARIANTS; ++v) { + struct panvk_shader_variant *variant = &shader->variants[v]; + bool last = (v + 1) == PANVK_VS_VARIANTS; + + input_variants[v] = inputs; + + /* Each variant gets its own NIR. To save an extra clone, we use the + * original NIR for the last stage. + */ + nir_variants[v] = last ? nir : nir_shader_clone(NULL, nir); + + panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, + info->robustness, noperspective_varyings, state, + &inputs, variant); + + variant->own_bin = true; + + result = panvk_compile_nir(dev, nir_variants[v], info->flags, + &input_variants[v], variant); + + /* We need to update info.push.count because it's used to initialize the + * RSD in pan_shader_prepare_rsd(). */ + variant->info.push.count = variant->fau.total_count * 2; + + if (result != VK_SUCCESS) { + panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } + + result = panvk_shader_upload(dev, variant, pAllocator); + + if (result != VK_SUCCESS) { + panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } + } + } else { + struct panvk_shader_variant *variant = + (struct panvk_shader_variant *)panvk_shader_only_variant(shader); + variant->own_bin = true; + + panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, + info->robustness, noperspective_varyings, state, &inputs, + variant); #if PAN_ARCH >= 9 - if (info->stage == MESA_SHADER_FRAGMENT) - /* Use LD_VAR_BUF[_IMM] for varyings if possible. */ - inputs.valhall.use_ld_var_buf = panvk_use_ld_var_buf(shader); + if (info->stage == MESA_SHADER_FRAGMENT) + /* Use LD_VAR_BUF[_IMM] for varyings if possible. */ + inputs.valhall.use_ld_var_buf = panvk_use_ld_var_buf(variant); #endif - result = panvk_compile_nir(dev, nir, info->flags, &inputs, shader); + result = panvk_compile_nir(dev, nir, info->flags, &inputs, variant); - /* We need to update info.push.count because it's used to initialize the - * RSD in pan_shader_prepare_rsd(). */ - shader->info.push.count = shader->fau.total_count * 2; + /* We need to update info.push.count because it's used to initialize the + * RSD in pan_shader_prepare_rsd(). */ + variant->info.push.count = variant->fau.total_count * 2; - if (result != VK_SUCCESS) { - panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); - return result; - } + if (result != VK_SUCCESS) { + panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } - result = panvk_shader_upload(dev, shader, pAllocator); + result = panvk_shader_upload(dev, variant, pAllocator); - if (result != VK_SUCCESS) { - panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); - return result; + if (result != VK_SUCCESS) { + panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } } *shader_out = &shader->vk; @@ -1341,20 +1397,26 @@ panvk_per_arch(create_shader_from_binary)(struct panvk_device *dev, struct panvk_shader *shader; VkResult result; + size_t size = + sizeof(struct panvk_shader) + sizeof(struct panvk_shader_variant) * + panvk_shader_num_variants(info->stage); shader = vk_shader_zalloc(&dev->vk, &panvk_shader_ops, info->stage, - &dev->vk.alloc, sizeof(*shader)); + &dev->vk.alloc, size); if (shader == NULL) return panvk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); - shader->info = *info; - shader->cs.local_size = local_size; - shader->bin_ptr = bin_ptr; - shader->bin_size = bin_size; - shader->own_bin = false; - shader->nir_str = NULL; - shader->asm_str = NULL; + assert(panvk_shader_num_variants(info->stage) == 1); - result = panvk_shader_upload(dev, shader, &dev->vk.alloc); + struct panvk_shader_variant *variant = &shader->variants[0]; + variant->info = *info; + variant->cs.local_size = local_size; + variant->bin_ptr = bin_ptr; + variant->bin_size = bin_size; + variant->own_bin = false; + variant->nir_str = NULL; + variant->asm_str = NULL; + + result = panvk_shader_upload(dev, variant, &dev->vk.alloc); if (result != VK_SUCCESS) { panvk_shader_destroy(&dev->vk, &shader->vk, &dev->vk.alloc); @@ -1386,10 +1448,9 @@ panvk_compile_shaders(struct vk_device *vk_dev, uint32_t shader_count, for (i = shader_count - 1; i >= 0; i--) { uint32_t *noperspective_varyings_ptr = use_static_noperspective ? &noperspective_varyings : NULL; - result = panvk_compile_shader(dev, &infos[i], state, - noperspective_varyings_ptr, - pAllocator, - &shaders_out[i]); + result = + panvk_compile_shader(dev, &infos[i], state, noperspective_varyings_ptr, + pAllocator, &shaders_out[i]); if (result != VK_SUCCESS) goto err_cleanup; @@ -1399,9 +1460,11 @@ panvk_compile_shaders(struct vk_device *vk_dev, uint32_t shader_count, if (infos[i].nir->info.stage == MESA_SHADER_FRAGMENT) { struct panvk_shader *shader = container_of(shaders_out[i], struct panvk_shader, vk); + const struct panvk_shader_variant *variant = + panvk_shader_only_variant(shader); use_static_noperspective = true; - noperspective_varyings = shader->info.varyings.noperspective; + noperspective_varyings = variant->info.varyings.noperspective; } /* Clean up NIR for the current shader */ @@ -1429,8 +1492,9 @@ err_cleanup: } static VkResult -shader_desc_info_deserialize(struct blob_reader *blob, - struct panvk_shader *shader) +shader_desc_info_deserialize(struct panvk_device *dev, + struct blob_reader *blob, + struct panvk_shader_variant *shader) { shader->desc_info.used_set_mask = blob_read_uint32(blob); @@ -1449,7 +1513,6 @@ shader_desc_info_deserialize(struct blob_reader *blob, } if (others_count) { - struct panvk_device *dev = to_panvk_device(shader->vk.base.device); struct panvk_pool_alloc_info alloc_info = { .size = others_count * sizeof(uint32_t), .alignment = sizeof(uint32_t), @@ -1468,20 +1531,19 @@ shader_desc_info_deserialize(struct blob_reader *blob, shader->desc_info.dyn_bufs.count = blob_read_uint32(blob); blob_copy_bytes(blob, shader->desc_info.dyn_bufs.map, sizeof(*shader->desc_info.dyn_bufs.map) * - shader->desc_info.dyn_bufs.count); + shader->desc_info.dyn_bufs.count); #endif return VK_SUCCESS; } static VkResult -panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, - uint32_t binary_version, - const VkAllocationCallbacks *pAllocator, - struct vk_shader **shader_out) +panvk_deserialize_shader_variant(struct vk_device *vk_dev, + struct blob_reader *blob, + const VkAllocationCallbacks *pAllocator, + struct panvk_shader_variant *shader) { struct panvk_device *device = to_panvk_device(vk_dev); - struct panvk_shader *shader; struct pan_shader_info info; VkResult result; @@ -1489,11 +1551,6 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, if (blob->overrun) return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); - shader = vk_shader_zalloc(vk_dev, &panvk_shader_ops, info.stage, pAllocator, - sizeof(*shader)); - if (shader == NULL) - return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - shader->info = info; blob_copy_bytes(blob, &shader->fau, sizeof(shader->fau)); @@ -1516,45 +1573,70 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, shader->bin_size = blob_read_uint32(blob); - if (blob->overrun) { - panvk_shader_destroy(vk_dev, &shader->vk, pAllocator); + if (blob->overrun) return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); - } shader->bin_ptr = malloc(shader->bin_size); - if (shader->bin_ptr == NULL) { - panvk_shader_destroy(vk_dev, &shader->vk, pAllocator); + if (shader->bin_ptr == NULL) return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } blob_copy_bytes(blob, (void *)shader->bin_ptr, shader->bin_size); - result = shader_desc_info_deserialize(blob, shader); + result = shader_desc_info_deserialize(device, blob, shader); - if (result != VK_SUCCESS) { - panvk_shader_destroy(vk_dev, &shader->vk, pAllocator); + if (result != VK_SUCCESS) return panvk_error(device, result); - } - if (blob->overrun) { - panvk_shader_destroy(vk_dev, &shader->vk, pAllocator); + if (blob->overrun) return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); - } result = panvk_shader_upload(device, shader, pAllocator); - if (result != VK_SUCCESS) { - panvk_shader_destroy(vk_dev, &shader->vk, pAllocator); + if (result != VK_SUCCESS) return result; - } - - *shader_out = &shader->vk; return result; } +static VkResult +panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, + uint32_t binary_version, + const VkAllocationCallbacks *pAllocator, + struct vk_shader **shader_out) +{ + struct panvk_device *device = to_panvk_device(vk_dev); + struct panvk_shader *shader; + + gl_shader_stage stage = blob_read_uint8(blob); + if (blob->overrun) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + size_t size = + sizeof(struct panvk_shader) + + sizeof(struct panvk_shader_variant) * panvk_shader_num_variants(stage); + shader = + vk_shader_zalloc(vk_dev, &panvk_shader_ops, stage, pAllocator, size); + if (shader == NULL) + return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + panvk_shader_foreach_variant(shader, variant) { + VkResult result = + panvk_deserialize_shader_variant(vk_dev, blob, pAllocator, variant); + + if (result != VK_SUCCESS) { + panvk_shader_destroy(vk_dev, &shader->vk, pAllocator); + return result; + } + } + + *shader_out = &shader->vk; + + return VK_SUCCESS; +} + static void -shader_desc_info_serialize(struct blob *blob, const struct panvk_shader *shader) +shader_desc_info_serialize(struct blob *blob, + const struct panvk_shader_variant *shader) { blob_write_uint32(blob, shader->desc_info.used_set_mask); @@ -1581,17 +1663,15 @@ shader_desc_info_serialize(struct blob *blob, const struct panvk_shader *shader) blob_write_uint32(blob, shader->desc_info.dyn_bufs.count); blob_write_bytes(blob, shader->desc_info.dyn_bufs.map, sizeof(*shader->desc_info.dyn_bufs.map) * - shader->desc_info.dyn_bufs.count); + shader->desc_info.dyn_bufs.count); #endif } static bool -panvk_shader_serialize(struct vk_device *vk_dev, - const struct vk_shader *vk_shader, struct blob *blob) +panvk_shader_serialize_variant(struct vk_device *vk_dev, + const struct panvk_shader_variant *shader, + struct blob *blob) { - struct panvk_shader *shader = - container_of(vk_shader, struct panvk_shader, vk); - /** * We can't currently cache assembly * TODO: Implement seriaization with assembly @@ -1625,55 +1705,124 @@ panvk_shader_serialize(struct vk_device *vk_dev, return !blob->out_of_memory; } +static bool +panvk_shader_serialize(struct vk_device *vk_dev, + const struct vk_shader *vk_shader, struct blob *blob) +{ + struct panvk_shader *shader = + container_of(vk_shader, struct panvk_shader, vk); + + blob_write_uint8(blob, vk_shader->stage); + + panvk_shader_foreach_variant(shader, variant) { + panvk_shader_serialize_variant(vk_dev, variant, blob); + } + + return !blob->out_of_memory; +} + static VkResult panvk_shader_get_executable_properties( UNUSED struct vk_device *device, const struct vk_shader *vk_shader, uint32_t *executable_count, VkPipelineExecutablePropertiesKHR *properties) { - UNUSED struct panvk_shader *shader = + struct panvk_shader *shader = container_of(vk_shader, struct panvk_shader, vk); VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, properties, executable_count); - vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) - { - props->stages = mesa_to_vk_shader_stage(shader->info.stage); - props->subgroupSize = pan_subgroup_size(PAN_ARCH); - VK_COPY_STR(props->name, - _mesa_shader_stage_to_string(shader->info.stage)); - VK_PRINT_STR(props->description, "%s shader", - _mesa_shader_stage_to_string(shader->info.stage)); - } + panvk_shader_foreach_variant(shader, variant) { + /* Ignore absent variants but always add vertex on IDVS */ + if (variant->bin_size == 0 && + (variant->info.stage != MESA_SHADER_VERTEX || !variant->info.vs.idvs)) + continue; + + const char *variant_name = panvk_shader_variant_name(shader, variant); + const char *stage_name = _mesa_shader_stage_to_string(shader->vk.stage); - if (shader->info.stage == MESA_SHADER_VERTEX && shader->info.vs.idvs) { vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { - props->stages = mesa_to_vk_shader_stage(shader->info.stage); + props->stages = mesa_to_vk_shader_stage(shader->vk.stage); props->subgroupSize = pan_subgroup_size(PAN_ARCH); - VK_COPY_STR(props->name, "varying"); - VK_COPY_STR(props->description, "Varying shader"); + + if (variant_name != NULL) { + VK_PRINT_STR(props->name, "%s %s", variant_name, stage_name); + VK_PRINT_STR(props->description, "%s %s shader", variant_name, + stage_name); + } else { + VK_COPY_STR(props->name, stage_name); + VK_PRINT_STR(props->description, "%s shader", stage_name); + } + } + + if (variant->info.stage == MESA_SHADER_VERTEX && variant->info.vs.idvs) { + vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, + props) + { + props->stages = mesa_to_vk_shader_stage(shader->vk.stage); + props->subgroupSize = pan_subgroup_size(PAN_ARCH); + VK_COPY_STR(props->name, "varying"); + VK_COPY_STR(props->description, "varying shader"); + } } } return vk_outarray_status(&out); } +static const struct panvk_shader_variant * +get_variant_from_executable_index(struct panvk_shader *shader, + uint32_t executable_index) +{ + uint32_t i = 0; + + panvk_shader_foreach_variant(shader, variant) { + /* Ignore absent variants but always add vertex on IDVS */ + if (variant->bin_size == 0 && + (variant->info.stage != MESA_SHADER_VERTEX || !variant->info.vs.idvs)) + continue; + + if (i == executable_index) + return variant; + + i++; + } + + return NULL; +} + static VkResult panvk_shader_get_executable_statistics( UNUSED struct vk_device *device, const struct vk_shader *vk_shader, uint32_t executable_index, uint32_t *statistic_count, VkPipelineExecutableStatisticKHR *statistics) { - UNUSED struct panvk_shader *shader = + struct panvk_shader *shader = container_of(vk_shader, struct panvk_shader, vk); + bool needs_vary = false; + if (shader->vk.stage == MESA_SHADER_VERTEX) { + assert(executable_index == 0 || executable_index == 1); + + needs_vary = executable_index == 1; + + /* Readjust index to skip embedded varying variant */ + if (executable_index >= 1) + executable_index--; + } + + assert(executable_index < panvk_shader_num_variants(shader->vk.stage)); + const struct panvk_shader_variant *variant = + get_variant_from_executable_index(shader, executable_index); + assert(variant != NULL); + VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, statistics, statistic_count); assert(executable_index == 0 || executable_index == 1); - struct pan_stats *stats = - executable_index ? &shader->info.stats_idvs_varying : &shader->info.stats; + const struct pan_stats *stats = + needs_vary ? &variant->info.stats_idvs_varying : &variant->info.stats; vk_add_pan_stats(out, stats); return vk_outarray_status(&out); @@ -1706,32 +1855,54 @@ panvk_shader_get_executable_internal_representations( uint32_t executable_index, uint32_t *internal_representation_count, VkPipelineExecutableInternalRepresentationKHR *internal_representations) { - UNUSED struct panvk_shader *shader = + struct panvk_shader *shader = container_of(vk_shader, struct panvk_shader, vk); + VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, internal_representations, internal_representation_count); + + bool needs_vary = false; + if (shader->vk.stage == MESA_SHADER_VERTEX) { + assert(executable_index == 0 || executable_index == 1); + + needs_vary = executable_index == 1; + + /* Readjust index to skip embedded varying variant */ + if (executable_index >= 1) + executable_index--; + } + + /* XXX: Varying shader assembly */ + if (needs_vary) + return vk_outarray_status(&out); + + assert(executable_index < panvk_shader_num_variants(shader->vk.stage)); + const struct panvk_shader_variant *variant = + get_variant_from_executable_index(shader, executable_index); + assert(variant != NULL); + bool incomplete_text = false; - if (shader->nir_str != NULL) { + if (variant->nir_str != NULL) { vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { VK_COPY_STR(ir->name, "NIR shader"); VK_COPY_STR(ir->description, - "NIR shader before sending to the back-end compiler"); - if (!write_ir_text(ir, shader->nir_str)) + "NIR shader before sending to the back-end compiler"); + if (!write_ir_text(ir, variant->nir_str)) incomplete_text = true; } } - if (shader->asm_str != NULL) { + if (variant->asm_str != NULL) { vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { VK_COPY_STR(ir->name, "Assembly"); VK_COPY_STR(ir->description, "Final Assembly"); - if (!write_ir_text(ir, shader->asm_str)) + if (!write_ir_text(ir, variant->asm_str)) incomplete_text = true; } } @@ -1862,8 +2033,8 @@ emit_varying_attrs(struct panvk_pool *desc_pool, VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, - const struct panvk_shader *vs, - const struct panvk_shader *fs, + const struct panvk_shader_variant *vs, + const struct panvk_shader_variant *fs, struct panvk_shader_link *link) { BITSET_DECLARE(active_attrs, VARYING_SLOT_MAX) = {0};