diff --git a/src/panfrost/ci/panfrost-g610-fails.txt b/src/panfrost/ci/panfrost-g610-fails.txt index 72b4015dae2..5d2ec67c3b3 100644 --- a/src/panfrost/ci/panfrost-g610-fails.txt +++ b/src/panfrost/ci/panfrost-g610-fails.txt @@ -1837,25 +1837,6 @@ dEQP-VK.fragment_operations.occlusion_query.conservative_test_test_all,Crash dEQP-VK.pipeline.fast_linked_library.max_varyings.test_vertex_io_between_vertex_fragment,Fail dEQP-VK.pipeline.fast_linked_library.multisample.alpha_to_coverage_no_color_attachment.samples_4.alpha_opaque,Fail dEQP-VK.pipeline.fast_linked_library.multisample.alpha_to_coverage_unused_attachment.samples_4.alpha_invisible,Fail -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_decc.pass_decw.dfail_incc.comp_never,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_incc.pass_decw.dfail_incc.comp_always,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_incc.pass_decw.dfail_zero.comp_greater,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_inv.pass_incc.dfail_wrap.comp_not_equal,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_keep.pass_keep.dfail_incc.comp_always,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_keep.pass_repl.dfail_decc.comp_greater,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_repl.pass_decw.dfail_decc.comp_less_or_equal,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_zero.pass_incc.dfail_decc.comp_less,Crash -dEQP-VK.pipeline.fast_linked_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_zero.pass_repl.dfail_zero.comp_always,Crash - -dEQP-VK.pipeline.monolithic.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_incc.pass_incc.dfail_zero.comp_never,Crash -dEQP-VK.pipeline.monolithic.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_incc.pass_zero.dfail_repl.comp_greater_or_equal,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_decc.pass_zero.dfail_decw.comp_greater_or_equal,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_decw.pass_incc.dfail_zero.comp_greater_or_equal,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_incc.pass_repl.dfail_repl.comp_less,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_keep.pass_keep.dfail_repl.comp_greater_or_equal,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_wrap.pass_wrap.dfail_incc.comp_less_or_equal,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_zero.pass_decc.dfail_wrap.comp_always,Crash -dEQP-VK.pipeline.pipeline_library.stencil.nocolor.format.d24_unorm_s8_uint.states.fail_zero.pass_inv.dfail_inv.comp_greater_or_equal,Crash dEQP-VK.glsl.loops.special.do_while_dynamic_iterations.dowhile_trap_vertex,Crash diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 5779d8fbc7d..38cb471e18c 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -810,15 +810,8 @@ panvk_cmd_invalidate_state(struct panvk_cmd_buffer *cmdbuf) memset(&cmdbuf->state.gfx, 0, sizeof(cmdbuf->state.gfx)); cmdbuf->state.gfx.render = render_save; - cmdbuf->state.gfx.fs.desc.res_table = 0; - cmdbuf->state.gfx.fs.spd = 0; - cmdbuf->state.gfx.vs.desc.res_table = 0; - cmdbuf->state.gfx.vs.spds.pos = 0; - cmdbuf->state.gfx.vs.spds.var = 0; - cmdbuf->state.gfx.vb.dirty = true; - cmdbuf->state.gfx.ib.dirty = true; - vk_dynamic_graphics_state_dirty_all(&cmdbuf->vk.dynamic_graphics_state); + gfx_state_set_all_dirty(cmdbuf); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 076b26466cc..7fbf091dc09 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -35,7 +35,8 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) struct panvk_shader_desc_state *cs_desc_state = &cmdbuf->state.compute.cs.desc; - if (cs_desc_state->driver_set.dev_addr) + if (!compute_state_dirty(cmdbuf, CS) && + !compute_state_dirty(cmdbuf, DESC_STATE)) return VK_SUCCESS; const struct panvk_descriptor_state *desc_state = @@ -58,6 +59,7 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) cs_desc_state->driver_set.dev_addr = driver_set.gpu; cs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE; + compute_state_set_dirty(cmdbuf, DESC_STATE); return VK_SUCCESS; } @@ -234,10 +236,13 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) GENX(pan_emit_tls)(&tlsinfo, tsd.cpu); - result = panvk_per_arch(cmd_prepare_push_descs)( - cmdbuf, desc_state, shader->desc_info.used_set_mask); - if (result != VK_SUCCESS) - return; + if (compute_state_dirty(cmdbuf, DESC_STATE) || + compute_state_dirty(cmdbuf, CS)) { + result = panvk_per_arch(cmd_prepare_push_descs)( + cmdbuf, desc_state, shader->desc_info.used_set_mask); + if (result != VK_SUCCESS) + return; + } struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals; /* If indirect, sysvals->num_work_groups will be written by the CS */ @@ -249,20 +254,23 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) sysvals->local_group_size.x = shader->local_size.x; sysvals->local_group_size.y = shader->local_size.y; sysvals->local_group_size.z = shader->local_size.z; + compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); result = prepare_driver_set(cmdbuf); if (result != VK_SUCCESS) return; - cmdbuf->state.compute.push_uniforms = 0; result = prepare_push_uniforms(cmdbuf); if (result != VK_SUCCESS) return; - result = panvk_per_arch(cmd_prepare_shader_res_table)(cmdbuf, desc_state, - shader, cs_desc_state); - if (result != VK_SUCCESS) - return; + if (compute_state_dirty(cmdbuf, CS) || + compute_state_dirty(cmdbuf, DESC_STATE)) { + result = panvk_per_arch(cmd_prepare_shader_res_table)( + cmdbuf, desc_state, shader, cs_desc_state); + if (result != VK_SUCCESS) + return; + } struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE); @@ -277,13 +285,22 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) } cs_update_compute_ctx(b) { - cs_move64_to(b, cs_sr_reg64(b, 0), cs_desc_state->res_table); - uint32_t push_size = 256 + sizeof(struct panvk_compute_sysvals); - uint64_t fau_count = DIV_ROUND_UP(push_size, 8); - mali_ptr fau_ptr = - cmdbuf->state.compute.push_uniforms | (fau_count << 56); - cs_move64_to(b, cs_sr_reg64(b, 8), fau_ptr); - cs_move64_to(b, cs_sr_reg64(b, 16), panvk_priv_mem_dev_addr(shader->spd)); + if (compute_state_dirty(cmdbuf, CS) || + compute_state_dirty(cmdbuf, DESC_STATE)) + cs_move64_to(b, cs_sr_reg64(b, 0), cs_desc_state->res_table); + + if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) { + uint32_t push_size = 256 + sizeof(struct panvk_compute_sysvals); + uint64_t fau_count = DIV_ROUND_UP(push_size, 8); + mali_ptr fau_ptr = + cmdbuf->state.compute.push_uniforms | (fau_count << 56); + cs_move64_to(b, cs_sr_reg64(b, 8), fau_ptr); + } + + if (compute_state_dirty(cmdbuf, CS)) + cs_move64_to(b, cs_sr_reg64(b, 16), + panvk_priv_mem_dev_addr(shader->spd)); + cs_move64_to(b, cs_sr_reg64(b, 24), tsd.gpu); /* Global attribute offset */ @@ -376,6 +393,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_wait_slot(b, SB_ID(LS), false); ++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point; + clear_dirty_after_dispatch(cmdbuf); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index a9d5471c69c..7c2ac47b2b2 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -104,19 +104,23 @@ emit_vs_attrib(const struct vk_vertex_attribute_state *attrib_info, } } +static bool +vs_driver_set_is_dirty(struct panvk_cmd_buffer *cmdbuf) +{ + return dyn_gfx_state_dirty(cmdbuf, VI) || + dyn_gfx_state_dirty(cmdbuf, VI_BINDINGS_VALID) || + dyn_gfx_state_dirty(cmdbuf, VI_BINDING_STRIDES) || + gfx_state_dirty(cmdbuf, VB) || gfx_state_dirty(cmdbuf, VS) || + gfx_state_dirty(cmdbuf, DESC_STATE); +} + static VkResult prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf) { - struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; - bool dirty = dyn_gfx_state_dirty(cmdbuf, VI) || - dyn_gfx_state_dirty(cmdbuf, VI_BINDINGS_VALID) || - dyn_gfx_state_dirty(cmdbuf, VI_BINDING_STRIDES) || - cmdbuf->state.gfx.vb.dirty || - !vs_desc_state->driver_set.dev_addr; - - if (!dirty) + if (!vs_driver_set_is_dirty(cmdbuf)) return VK_SUCCESS; + struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct vk_vertex_input_state *vi = cmdbuf->vk.dynamic_graphics_state.vi; @@ -175,6 +179,7 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf) vs_desc_state->driver_set.dev_addr = driver_set.gpu; vs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE; + gfx_state_set_dirty(cmdbuf, DESC_STATE); return VK_SUCCESS; } @@ -182,13 +187,9 @@ static VkResult prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) { struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; - - if (fs_desc_state->driver_set.dev_addr) - return VK_SUCCESS; - + const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; const struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; uint32_t desc_count = fs->desc_info.dyn_bufs.count + 1; struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem( cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE); @@ -206,6 +207,7 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) fs_desc_state->driver_set.dev_addr = driver_set.gpu; fs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE; + gfx_state_set_dirty(cmdbuf, DESC_STATE); return VK_SUCCESS; } @@ -221,9 +223,9 @@ prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; const struct vk_rasterization_state *rs = &cmdbuf->vk.dynamic_graphics_state.rs; - struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; - if (sysvals->fs.multisampled != (fbinfo->nr_samples > 1)) { + + if (gfx_state_dirty(cmdbuf, RENDER_STATE)) { sysvals->fs.multisampled = fbinfo->nr_samples > 1; cmdbuf->state.gfx.push_uniforms = 0; } @@ -232,7 +234,7 @@ prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) sysvals->blend.constants[i] = CLAMP(cb->blend_constants[i], 0.0f, 1.0f); - cmdbuf->state.gfx.push_uniforms = 0; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || @@ -294,8 +296,7 @@ prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) /* Bump offset off-center if necessary, to not go out of range */ sysvals->viewport.offset.z = CLAMP(z_offset, 0.0f, 1.0f); } - - cmdbuf->state.gfx.push_uniforms = 0; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } } @@ -401,40 +402,6 @@ translate_stencil_op(VkStencilOp in) } } -static bool -fs_required(struct panvk_cmd_buffer *cmdbuf) -{ - const struct pan_shader_info *fs_info = - cmdbuf->state.gfx.fs.shader ? &cmdbuf->state.gfx.fs.shader->info : NULL; - const struct vk_dynamic_graphics_state *dyns = - &cmdbuf->vk.dynamic_graphics_state; - const struct vk_color_blend_state *cb = &dyns->cb; - - if (!fs_info) - return false; - - /* If we generally have side effects */ - if (fs_info->fs.sidefx) - return true; - - /* If colour is written we need to execute */ - for (unsigned i = 0; i < cb->attachment_count; ++i) { - if ((cb->color_write_enables & BITFIELD_BIT(i)) && - cb->attachments[i].write_mask) - return true; - } - - /* If alpha-to-coverage is enabled, we need to run the fragment shader even - * if we don't have a color attachment, so depth/stencil updates can be - * discarded if alpha, and thus coverage, is 0. */ - if (dyns->ms.alpha_to_coverage_enable) - return true; - - /* If depth is written and not implied we need to execute. - * TODO: Predicate on Z/S writes being enabled */ - return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil); -} - static enum mali_draw_mode translate_prim_topology(VkPrimitiveTopology in) { @@ -741,12 +708,11 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf) const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct vk_input_assembly_state *ia = &cmdbuf->vk.dynamic_graphics_state.ia; - mali_ptr pos_spd = get_pos_spd(cmdbuf); float primitive_size; if (!dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) && !dyn_gfx_state_dirty(cmdbuf, RS_LINE_WIDTH) && - cmdbuf->state.gfx.vs.spds.pos == pos_spd) + !gfx_state_dirty(cmdbuf, VS)) return; switch (ia->primitive_topology) { @@ -1073,15 +1039,14 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf) const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - mali_ptr pos_spd = get_pos_spd(cmdbuf); - mali_ptr var_spd = panvk_priv_mem_dev_addr(vs->spds.var); bool upd_res_table = false; - if (!vs_desc_state->res_table) { - VkResult result = prepare_vs_driver_set(cmdbuf); - if (result != VK_SUCCESS) - return result; + VkResult result = prepare_vs_driver_set(cmdbuf); + if (result != VK_SUCCESS) + return result; + if (gfx_state_dirty(cmdbuf, VS) || gfx_state_dirty(cmdbuf, DESC_STATE) || + vs_driver_set_is_dirty(cmdbuf)) { result = panvk_per_arch(cmd_prepare_shader_res_table)(cmdbuf, desc_state, vs, vs_desc_state); if (result != VK_SUCCESS) @@ -1094,36 +1059,29 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf) if (upd_res_table) cs_move64_to(b, cs_sr_reg64(b, 0), vs_desc_state->res_table); - if (pos_spd != cmdbuf->state.gfx.vs.spds.pos) - cs_move64_to(b, cs_sr_reg64(b, 16), pos_spd); + if (gfx_state_dirty(cmdbuf, VS) || + dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY)) + cs_move64_to(b, cs_sr_reg64(b, 16), get_pos_spd(cmdbuf)); - if (var_spd != cmdbuf->state.gfx.vs.spds.var) - cs_move64_to(b, cs_sr_reg64(b, 18), var_spd); + if (gfx_state_dirty(cmdbuf, VS)) + cs_move64_to(b, cs_sr_reg64(b, 18), + panvk_priv_mem_dev_addr(vs->spds.var)); } return VK_SUCCESS; } -static inline uint64_t -get_fs_spd(const struct panvk_shader *fs) -{ - return fs ? panvk_priv_mem_dev_addr(fs->spd) : 0; -} - static VkResult prepare_fs(struct panvk_cmd_buffer *cmdbuf) { - const struct panvk_shader *fs = - fs_required(cmdbuf) ? cmdbuf->state.gfx.fs.shader : NULL; + const struct panvk_shader *fs = get_fs(cmdbuf); struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - mali_ptr frag_spd = get_fs_spd(fs); - bool upd_res_table = false; - /* No need to setup the FS desc tables if the FS is not executed. */ - if (fs && !fs_desc_state->res_table) { + if (fs && + (gfx_state_dirty(cmdbuf, FS) || gfx_state_dirty(cmdbuf, DESC_STATE))) { VkResult result = prepare_fs_driver_set(cmdbuf); if (result != VK_SUCCESS) return result; @@ -1132,22 +1090,14 @@ prepare_fs(struct panvk_cmd_buffer *cmdbuf) fs, fs_desc_state); if (result != VK_SUCCESS) return result; - - upd_res_table = true; } - /* If this is the first time we execute a RUN_IDVS, and no fragment - * shader is required, we still force an update of the make sure we don't - * inherit the value set by a previous command buffer. */ - if (!fs_desc_state->res_table && !fs) - upd_res_table = true; - cs_update_vt_ctx(b) { - if (upd_res_table) - cs_move64_to(b, cs_sr_reg64(b, 4), fs_desc_state->res_table); - - if (cmdbuf->state.gfx.fs.spd != frag_spd) - cs_move64_to(b, cs_sr_reg64(b, 20), frag_spd); + if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, DESC_STATE)) + cs_move64_to(b, cs_sr_reg64(b, 4), fs ? fs_desc_state->res_table : 0); + if (fs_user_dirty(cmdbuf)) + cs_move64_to(b, cs_sr_reg64(b, 20), + fs ? panvk_priv_mem_dev_addr(fs->spd) : 0); } return VK_SUCCESS; @@ -1159,7 +1109,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - if (!cmdbuf->state.gfx.push_uniforms) { + if (gfx_state_dirty(cmdbuf, PUSH_UNIFORMS)) { cmdbuf->state.gfx.push_uniforms = panvk_per_arch( cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.gfx.sysvals, sizeof(cmdbuf->state.gfx.sysvals)); @@ -1182,8 +1132,6 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) static VkResult prepare_ds(struct panvk_cmd_buffer *cmdbuf) { - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; - mali_ptr frag_spd = get_fs_spd(fs); bool dirty = dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) || @@ -1196,14 +1144,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) || - /* fs_required() uses ms.alpha_to_coverage_enable - * and vk_color_blend_state - */ - dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || - dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || - dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || - dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || - cmdbuf->state.gfx.fs.spd != frag_spd; + fs_user_dirty(cmdbuf); if (!dirty) return VK_SUCCESS; @@ -1216,7 +1157,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) const struct vk_rasterization_state *rs = &dyns->rs; bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable; bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable; - bool needs_fs = fs_required(cmdbuf); + const struct panvk_shader *fs = get_fs(cmdbuf); struct panfrost_ptr zsd = panvk_cmd_alloc_desc(cmdbuf, DEPTH_STENCIL); if (!zsd.gpu) @@ -1240,7 +1181,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) cfg.back_depth_pass = translate_stencil_op(ds->stencil.back.op.pass); } - cfg.stencil_from_shader = needs_fs ? fs->info.fs.writes_stencil : 0; + cfg.stencil_from_shader = fs ? fs->info.fs.writes_stencil : 0; cfg.front_write_mask = ds->stencil.front.write_mask; cfg.back_write_mask = ds->stencil.back.write_mask; cfg.front_value_mask = ds->stencil.front.compare_mask; @@ -1274,17 +1215,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) { struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - - const struct panvk_shader *fs = NULL; - bool fs_is_dirty = false; - bool needs_fs = fs_required(cmdbuf); - if (needs_fs) { - fs = cmdbuf->state.gfx.fs.shader; - fs_is_dirty = cmdbuf->state.gfx.fs.spd != get_fs_spd(fs); - } else { - fs_is_dirty = cmdbuf->state.gfx.fs.spd != 0; - } - + const struct panvk_shader *fs = get_fs(cmdbuf); bool dcd0_dirty = dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_CULL_MODE) || @@ -1301,21 +1232,11 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_OP) || dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) || - /* fs_required() uses vk_color_blend_state */ - dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || - dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || - dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || fs_is_dirty || - cmdbuf->state.gfx.render.dirty; + fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE); bool dcd1_dirty = dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) || dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) || - /* fs_required() uses ms.alpha_to_coverage_enable - * and vk_color_blend_state - */ - dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || - dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || - dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || - dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || - fs_is_dirty || cmdbuf->state.gfx.render.dirty; + fs_user_dirty(cmdbuf) || + gfx_state_dirty(cmdbuf, RENDER_STATE); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; @@ -1328,7 +1249,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) if (dcd0_dirty) { struct mali_dcd_flags_0_packed dcd0; pan_pack(&dcd0, DCD_FLAGS_0, cfg) { - if (needs_fs) { + if (fs) { uint8_t rt_written = fs->info.outputs_written >> FRAG_RESULT_DATA0; uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS; @@ -1374,7 +1295,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) ? dyns->ms.sample_mask : UINT16_MAX; - if (needs_fs) { + if (fs) { cfg.render_target_mask = (fs->info.outputs_written >> FRAG_RESULT_DATA0) & cmdbuf->state.gfx.render.bound_attachments; @@ -1393,7 +1314,7 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf, struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - if (draw->index.size && cmdbuf->state.gfx.ib.dirty) { + if (draw->index.size && gfx_state_dirty(cmdbuf, IB)) { uint64_t ib_size = panvk_buffer_range(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset, VK_WHOLE_SIZE); @@ -1406,59 +1327,26 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf, } } -static void -clear_dirty(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) -{ - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = - fs_required(cmdbuf) ? cmdbuf->state.gfx.fs.shader : NULL; - - if (vs) { - cmdbuf->state.gfx.vs.spds.pos = get_pos_spd(cmdbuf); - cmdbuf->state.gfx.vs.spds.var = panvk_priv_mem_dev_addr(vs->spds.var); - } - - cmdbuf->state.gfx.fs.spd = get_fs_spd(fs); - - cmdbuf->state.gfx.vb.dirty = false; - if (draw->index.size) - cmdbuf->state.gfx.ib.dirty = false; - - cmdbuf->state.gfx.render.dirty = false; - vk_dynamic_graphics_state_clear_dirty(&cmdbuf->vk.dynamic_graphics_state); -} - static void set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader *fs = get_fs(cmdbuf); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; const struct vk_input_assembly_state *ia = &dyns->ia; const struct vk_rasterization_state *rs = &dyns->rs; - struct mali_primitive_flags_packed tiler_idvs_flags; bool writes_point_size = vs->info.vs.writes_point_size && ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - bool dirty = - get_pos_spd(cmdbuf) != cmdbuf->state.gfx.vs.spds.pos || - panvk_priv_mem_dev_addr(vs->spds.var) != cmdbuf->state.gfx.vs.spds.var || - /* fs_required() uses ms.alpha_to_coverage_enable - * and vk_color_blend_state - */ - dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || - dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || - dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || - dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || - dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) || - dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) || - dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_RESTART_ENABLE) || - dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) || - cmdbuf->state.gfx.fs.spd != get_fs_spd(fs); + bool dirty = gfx_state_dirty(cmdbuf, VS) || fs_user_dirty(cmdbuf) || + dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_RESTART_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) || + dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE); if (dirty) { pan_pack(&tiler_idvs_flags, PRIMITIVE_FLAGS, cfg) { @@ -1480,8 +1368,7 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, cfg.low_depth_cull = cfg.high_depth_cull = vk_rasterization_state_depth_clip_enable(rs); - cfg.secondary_shader = - vs->info.vs.secondary_enable && fs_required(cmdbuf); + cfg.secondary_shader = vs->info.vs.secondary_enable && fs != NULL; cfg.primitive_restart = ia->primitive_restart_enable; } @@ -1506,7 +1393,7 @@ static VkResult prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader *fs = get_fs(cmdbuf); struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; bool idvs = vs->info.vs.idvs; VkResult result; @@ -1537,10 +1424,13 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) uint32_t used_set_mask = vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0); - result = - panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, used_set_mask); - if (result != VK_SUCCESS) - return result; + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS) || + gfx_state_dirty(cmdbuf, FS)) { + result = panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, + used_set_mask); + if (result != VK_SUCCESS) + return result; + } prepare_sysvals(cmdbuf); @@ -1590,8 +1480,7 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) prepare_tiler_primitive_size(cmdbuf); } - clear_dirty(cmdbuf, draw); - + clear_dirty_after_draw(cmdbuf); return VK_SUCCESS; } @@ -1827,7 +1716,7 @@ panvk_cmd_init_render_state(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.gfx.render.flags = pRenderingInfo->flags; - cmdbuf->state.gfx.render.dirty = true; + gfx_state_set_dirty(cmdbuf, RENDER_STATE); memset(cmdbuf->state.gfx.render.fb.crc_valid, 0, sizeof(cmdbuf->state.gfx.render.fb.crc_valid)); memset(&cmdbuf->state.gfx.render.color_attachments, 0, @@ -2082,7 +1971,7 @@ panvk_per_arch(cmd_inherit_render_state)( cmdbuf->state.gfx.render.flags = inheritance_info->flags; - cmdbuf->state.gfx.render.dirty = true; + gfx_state_set_dirty(cmdbuf, RENDER_STATE); memset(cmdbuf->state.gfx.render.fb.crc_valid, 0, sizeof(cmdbuf->state.gfx.render.fb.crc_valid)); memset(&cmdbuf->state.gfx.render.color_attachments, 0, @@ -2632,9 +2521,7 @@ panvk_per_arch(CmdBindVertexBuffers)(VkCommandBuffer commandBuffer, cmdbuf->state.gfx.vb.count = MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount); - memset(&cmdbuf->state.gfx.vs.desc.driver_set, 0, - sizeof(cmdbuf->state.gfx.vs.desc.driver_set)); - cmdbuf->state.gfx.vb.dirty = true; + gfx_state_set_dirty(cmdbuf, VB); } VKAPI_ATTR void VKAPI_CALL @@ -2648,5 +2535,5 @@ panvk_per_arch(CmdBindIndexBuffer)(VkCommandBuffer commandBuffer, cmdbuf->state.gfx.ib.buffer = buf; cmdbuf->state.gfx.ib.offset = offset; cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType); - cmdbuf->state.gfx.ib.dirty = true; + gfx_state_set_dirty(cmdbuf, IB); } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index 1cc7669545a..dbb41731bf1 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -83,21 +83,25 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, sysvals->local_group_size.y = shader->local_size.y; sysvals->local_group_size.z = shader->local_size.z; - result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader, - cs_desc_state); - if (result != VK_SUCCESS) - return; + if (compute_state_dirty(cmdbuf, CS) || + compute_state_dirty(cmdbuf, DESC_STATE)) { + result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader, + cs_desc_state); + if (result != VK_SUCCESS) + return; - sysvals->desc.dyn_ssbos = cs_desc_state->dyn_ssbos; + sysvals->desc.dyn_ssbos = cs_desc_state->dyn_ssbos; + } for (uint32_t i = 0; i < MAX_SETS; i++) { if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; } - cmdbuf->state.compute.push_uniforms = 0; + /* We unconditionally update the sysvals, so push_uniforms is always dirty. */ + compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - if (!cmdbuf->state.compute.push_uniforms) { + if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) { cmdbuf->state.compute.push_uniforms = panvk_per_arch( cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.compute.sysvals, sizeof(cmdbuf->state.compute.sysvals)); @@ -107,18 +111,22 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, dispatch.push_uniforms = cmdbuf->state.compute.push_uniforms; - result = panvk_per_arch(cmd_prepare_shader_desc_tables)( - cmdbuf, desc_state, shader, cs_desc_state); + struct panfrost_ptr copy_desc_job = {0}; - struct panfrost_ptr copy_desc_job; - result = panvk_per_arch(meta_get_copy_desc_job)( - cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0, - ©_desc_job); - if (result != VK_SUCCESS) - return; + if (compute_state_dirty(cmdbuf, CS) || + compute_state_dirty(cmdbuf, DESC_STATE)) { + result = panvk_per_arch(cmd_prepare_shader_desc_tables)( + cmdbuf, desc_state, shader, cs_desc_state); - if (copy_desc_job.cpu) - util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu); + result = panvk_per_arch(meta_get_copy_desc_job)( + cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0, + ©_desc_job); + if (result != VK_SUCCESS) + return; + + if (copy_desc_job.cpu) + util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu); + } struct panfrost_ptr job = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB); if (!job.gpu) @@ -170,6 +178,7 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, } panvk_per_arch(cmd_close_batch)(cmdbuf); + clear_dirty_after_dispatch(cmdbuf); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 96aa4de3b2e..17eefedf174 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -109,14 +109,15 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, sysvals->vs.base_instance = draw->first_instance; sysvals->layer_id = draw->layer_id; sysvals->fs.multisampled = fbinfo->nr_samples > 1; - cmdbuf->state.gfx.push_uniforms = 0; + + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) { for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) sysvals->blend.constants[i] = CLAMP(cb->blend_constants[i], 0.0f, 1.0f); - cmdbuf->state.gfx.push_uniforms = 0; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS)) { @@ -145,21 +146,28 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, sysvals->viewport.offset.x = (0.5f * viewport->width) + viewport->x; sysvals->viewport.offset.y = (0.5f * viewport->height) + viewport->y; sysvals->viewport.offset.z = viewport->minDepth; - cmdbuf->state.gfx.push_uniforms = 0; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } - VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, - vs, vs_desc_state); - if (result != VK_SUCCESS) - return result; + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) { + VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)( + cmdbuf, desc_state, vs, vs_desc_state); + if (result != VK_SUCCESS) + return result; - sysvals->desc.vs_dyn_ssbos = vs_desc_state->dyn_ssbos; - result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, fs, - fs_desc_state); - if (result != VK_SUCCESS) - return result; + sysvals->desc.vs_dyn_ssbos = vs_desc_state->dyn_ssbos; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } - sysvals->desc.fs_dyn_ssbos = fs_desc_state->dyn_ssbos; + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) { + VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)( + cmdbuf, desc_state, fs, fs_desc_state); + if (result != VK_SUCCESS) + return result; + + sysvals->desc.fs_dyn_ssbos = fs_desc_state->dyn_ssbos; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } for (uint32_t i = 0; i < MAX_SETS; i++) { uint32_t used_set_mask = @@ -167,6 +175,7 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, if (used_set_mask & BITFIELD_BIT(i)) sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } return VK_SUCCESS; @@ -274,40 +283,6 @@ translate_stencil_op(VkStencilOp in) } } -static bool -fs_required(struct panvk_cmd_buffer *cmdbuf) -{ - const struct pan_shader_info *fs_info = - cmdbuf->state.gfx.fs.shader ? &cmdbuf->state.gfx.fs.shader->info : NULL; - const struct vk_dynamic_graphics_state *dyns = - &cmdbuf->vk.dynamic_graphics_state; - const struct vk_color_blend_state *cb = &dyns->cb; - - if (!fs_info) - return false; - - /* If we generally have side effects */ - if (fs_info->fs.sidefx) - return true; - - /* If colour is written we need to execute */ - for (unsigned i = 0; i < cb->attachment_count; ++i) { - if ((cb->color_write_enables & BITFIELD_BIT(i)) && - cb->attachments[i].write_mask) - return true; - } - - /* If alpha-to-coverage is enabled, we need to run the fragment shader even - * if we don't have a color attachment, so depth/stencil updates can be - * discarded if alpha, and thus coverage, is 0. */ - if (dyns->ms.alpha_to_coverage_enable) - return true; - - /* If depth is written and not implied we need to execute. - * TODO: Predicate on Z/S writes being enabled */ - return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil); -} - static VkResult panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) @@ -338,7 +313,8 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || - !cmdbuf->state.gfx.fs.rsd; + gfx_state_dirty(cmdbuf, FS) || + gfx_state_dirty(cmdbuf, RENDER_STATE); if (!dirty) { draw->fs.rsd = cmdbuf->state.gfx.fs.rsd; @@ -351,14 +327,13 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, const struct vk_rasterization_state *rs = &dyns->rs; const struct vk_color_blend_state *cb = &dyns->cb; const struct vk_depth_stencil_state *ds = &dyns->ds; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader *fs = get_fs(cmdbuf); const struct pan_shader_info *fs_info = fs ? &fs->info : NULL; unsigned bd_count = MAX2(cb->attachment_count, 1); bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable; bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable; bool writes_z = writes_depth(cmdbuf); bool writes_s = writes_stencil(cmdbuf); - bool needs_fs = fs_required(cmdbuf); struct panfrost_ptr ptr = panvk_cmd_alloc_desc_aggregate( cmdbuf, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(bd_count, BLEND)); @@ -388,7 +363,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, pan_pack(rsd, RENDERER_STATE, cfg) { bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable; - if (needs_fs) { + if (fs) { pan_shader_prepare_rsd(fs_info, fs_code, &cfg); if (binfo.shader_loads_blend_const) { @@ -697,9 +672,8 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, dyn_gfx_state_dirty(cmdbuf, VI) || dyn_gfx_state_dirty(cmdbuf, VI_BINDINGS_VALID) || dyn_gfx_state_dirty(cmdbuf, VI_BINDING_STRIDES) || - (num_imgs && !cmdbuf->state.gfx.vs.desc.img_attrib_table) || - (cmdbuf->state.gfx.vb.count && !cmdbuf->state.gfx.vs.attrib_bufs) || - (attrib_count && !cmdbuf->state.gfx.vs.attribs); + gfx_state_dirty(cmdbuf, VB) || + gfx_state_dirty(cmdbuf, DESC_STATE); if (!dirty) return VK_SUCCESS; @@ -920,6 +894,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_info *draw, void *prim) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + const struct panvk_shader *fs = get_fs(cmdbuf); const struct vk_dynamic_graphics_state *dyns = &cmdbuf->vk.dynamic_graphics_state; const struct vk_input_assembly_state *ia = &dyns->ia; @@ -927,7 +902,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, bool writes_point_size = vs->info.vs.writes_point_size && ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - bool secondary_shader = vs->info.vs.secondary_enable && fs_required(cmdbuf); + bool secondary_shader = vs->info.vs.secondary_enable && fs != NULL; pan_pack(prim, PRIMITIVE, cfg) { cfg.draw_mode = translate_prim_topology(ia->primitive_topology); @@ -1194,17 +1169,16 @@ panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd) { struct panvk_cmd_graphics_state *gfx = &cmd->state.gfx; - if (gfx->linked) + if (!gfx_state_dirty(cmd, VS) && !gfx_state_dirty(cmd, FS)) return VK_SUCCESS; VkResult result = panvk_per_arch(link_shaders)( - &cmd->desc_pool, gfx->vs.shader, gfx->fs.shader, &gfx->link); + &cmd->desc_pool, gfx->vs.shader, get_fs(cmd), &gfx->link); if (result != VK_SUCCESS) { vk_command_buffer_set_error(&cmd->vk, result); return result; } - gfx->linked = true; return VK_SUCCESS; } @@ -1213,7 +1187,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + const struct panvk_shader *fs = get_fs(cmdbuf); struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; @@ -1237,9 +1211,13 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) batch = panvk_per_arch(cmd_open_batch)(cmdbuf); } - result = panvk_cmd_prepare_draw_link_shaders(cmdbuf); - if (result != VK_SUCCESS) - return; + if (fs_user_dirty(cmdbuf)) { + result = panvk_cmd_prepare_draw_link_shaders(cmdbuf); + if (result != VK_SUCCESS) + return; + } + + bool needs_tiling = !rs->rasterizer_discard_enable; if (!rs->rasterizer_discard_enable) { result = panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); @@ -1256,17 +1234,22 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) uint32_t used_set_mask = vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0); - result = - panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, used_set_mask); - if (result != VK_SUCCESS) - return; + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS) || + gfx_state_dirty(cmdbuf, FS)) { + result = panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, + used_set_mask); + if (result != VK_SUCCESS) + return; + } - result = panvk_per_arch(cmd_prepare_shader_desc_tables)( - cmdbuf, &cmdbuf->state.gfx.desc_state, vs, vs_desc_state); - if (result != VK_SUCCESS) - return; + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) { + result = panvk_per_arch(cmd_prepare_shader_desc_tables)( + cmdbuf, &cmdbuf->state.gfx.desc_state, vs, vs_desc_state); + if (result != VK_SUCCESS) + return; - panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw); + panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw); + } unsigned copy_desc_job_id = draw->jobs.vertex_copy_desc.gpu @@ -1274,12 +1257,9 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) 0, 0, &draw->jobs.vertex_copy_desc, false) : 0; - bool vs_writes_pos = - cmdbuf->state.gfx.link.buf_strides[PANVK_VARY_BUF_POSITION] > 0; - bool needs_tiling = !rs->rasterizer_discard_enable && vs_writes_pos; - /* No need to setup the FS desc tables if the FS is not executed. */ - if (needs_tiling && fs_required(cmdbuf)) { + if (fs && + (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS))) { result = panvk_per_arch(cmd_prepare_shader_desc_tables)( cmdbuf, &cmdbuf->state.gfx.desc_state, fs, fs_desc_state); if (result != VK_SUCCESS) @@ -1363,8 +1343,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) } } - /* Clear the dirty flags all at once */ - vk_dynamic_graphics_state_clear_dirty(&cmdbuf->vk.dynamic_graphics_state); + clear_dirty_after_draw(cmdbuf); } static unsigned @@ -1530,6 +1509,7 @@ panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf, if (cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT) return; + gfx_state_set_dirty(cmdbuf, RENDER_STATE); cmdbuf->state.gfx.render.fb.bo_count = 0; memset(cmdbuf->state.gfx.render.fb.bos, 0, sizeof(cmdbuf->state.gfx.render.fb.bos)); @@ -1744,9 +1724,6 @@ panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf, } assert(fbinfo->width && fbinfo->height); - - /* We need to re-emit the FS RSD when the color attachments change. */ - cmdbuf->state.gfx.fs.rsd = 0; } static void @@ -1991,7 +1968,7 @@ panvk_per_arch(CmdBindVertexBuffers)(VkCommandBuffer commandBuffer, cmdbuf->state.gfx.vb.count = MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount); - cmdbuf->state.gfx.vs.attrib_bufs = 0; + gfx_state_set_dirty(cmdbuf, VB); } VKAPI_ATTR void VKAPI_CALL @@ -2005,4 +1982,5 @@ panvk_per_arch(CmdBindIndexBuffer)(VkCommandBuffer commandBuffer, cmdbuf->state.gfx.ib.buffer = buf; cmdbuf->state.gfx.ib.offset = offset; cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType); + gfx_state_set_dirty(cmdbuf, IB); } diff --git a/src/panfrost/vulkan/panvk_cmd_desc_state.h b/src/panfrost/vulkan/panvk_cmd_desc_state.h index ddf5c9e1036..a38cbfada96 100644 --- a/src/panfrost/vulkan/panvk_cmd_desc_state.h +++ b/src/panfrost/vulkan/panvk_cmd_desc_state.h @@ -50,10 +50,17 @@ struct panvk_push_set { struct panvk_descriptor_state { const struct panvk_descriptor_set *sets[MAX_SETS]; struct panvk_descriptor_set *push_sets[MAX_SETS]; + BITSET_DECLARE(dirty_push_sets, MAX_SETS); uint32_t dyn_buf_offsets[MAX_SETS][MAX_DYNAMIC_BUFFERS]; }; +static inline void +desc_state_clear_all_dirty(struct panvk_descriptor_state *desc_state) +{ + BITSET_ZERO(desc_state->dirty_push_sets); +} + #if PAN_ARCH <= 7 VkResult panvk_per_arch(cmd_prepare_dyn_ssbos)( struct panvk_cmd_buffer *cmdbuf, diff --git a/src/panfrost/vulkan/panvk_cmd_dispatch.h b/src/panfrost/vulkan/panvk_cmd_dispatch.h index 9812cd18d97..3ea6397b90e 100644 --- a/src/panfrost/vulkan/panvk_cmd_dispatch.h +++ b/src/panfrost/vulkan/panvk_cmd_dispatch.h @@ -10,6 +10,13 @@ #error "PAN_ARCH must be defined" #endif +enum panvk_cmd_compute_dirty_state { + PANVK_CMD_COMPUTE_DIRTY_CS, + PANVK_CMD_COMPUTE_DIRTY_DESC_STATE, + PANVK_CMD_COMPUTE_DIRTY_PUSH_UNIFORMS, + PANVK_CMD_COMPUTE_DIRTY_STATE_COUNT, +}; + struct panvk_cmd_compute_state { struct panvk_descriptor_state desc_state; const struct panvk_shader *shader; @@ -18,6 +25,23 @@ struct panvk_cmd_compute_state { struct { struct panvk_shader_desc_state desc; } cs; + BITSET_DECLARE(dirty, PANVK_CMD_COMPUTE_DIRTY_STATE_COUNT); }; +#define compute_state_dirty(__cmdbuf, __name) \ + BITSET_TEST((__cmdbuf)->state.compute.dirty, \ + PANVK_CMD_COMPUTE_DIRTY_##__name) + +#define compute_state_set_dirty(__cmdbuf, __name) \ + BITSET_SET((__cmdbuf)->state.compute.dirty, PANVK_CMD_COMPUTE_DIRTY_##__name) + +#define compute_state_clear_all_dirty(__cmdbuf) \ + BITSET_ZERO((__cmdbuf)->state.compute.dirty) + +#define clear_dirty_after_dispatch(__cmdbuf) \ + do { \ + compute_state_clear_all_dirty(__cmdbuf); \ + desc_state_clear_all_dirty(&(__cmdbuf)->state.compute.desc_state); \ + } while (0) + #endif diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 6dac2b742aa..873496ea1bd 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -61,10 +61,20 @@ struct panvk_rendering_state { #if PAN_ARCH >= 10 struct panfrost_ptr fbds; mali_ptr tiler; - bool dirty; #endif }; +enum panvk_cmd_graphics_dirty_state { + PANVK_CMD_GRAPHICS_DIRTY_VS, + PANVK_CMD_GRAPHICS_DIRTY_FS, + PANVK_CMD_GRAPHICS_DIRTY_VB, + PANVK_CMD_GRAPHICS_DIRTY_IB, + PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE, + PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE, + PANVK_CMD_GRAPHICS_DIRTY_PUSH_UNIFORMS, + PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT, +}; + struct panvk_cmd_graphics_state { struct panvk_descriptor_state desc_state; @@ -77,7 +87,6 @@ struct panvk_cmd_graphics_state { #if PAN_ARCH <= 7 struct panvk_shader_link link; - bool linked; #endif struct { @@ -85,8 +94,6 @@ struct panvk_cmd_graphics_state { struct panvk_shader_desc_state desc; #if PAN_ARCH <= 7 mali_ptr rsd; -#else - mali_ptr spd; #endif } fs; @@ -96,17 +103,12 @@ struct panvk_cmd_graphics_state { #if PAN_ARCH <= 7 mali_ptr attribs; mali_ptr attrib_bufs; -#else - struct { - mali_ptr pos, var; - } spds; #endif } vs; struct { struct panvk_attrib_buf bufs[MAX_VBS]; unsigned count; - bool dirty; } vb; /* Index buffer */ @@ -114,7 +116,6 @@ struct panvk_cmd_graphics_state { struct panvk_buffer *buffer; uint64_t offset; uint8_t index_size; - bool dirty; } ib; struct { @@ -132,12 +133,26 @@ struct panvk_cmd_graphics_state { #if PAN_ARCH >= 10 mali_ptr tsd; #endif + + BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT); }; #define dyn_gfx_state_dirty(__cmdbuf, __name) \ BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \ MESA_VK_DYNAMIC_##__name) +#define gfx_state_dirty(__cmdbuf, __name) \ + BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name) + +#define gfx_state_set_dirty(__cmdbuf, __name) \ + BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name) + +#define gfx_state_clear_all_dirty(__cmdbuf) \ + BITSET_ZERO((__cmdbuf)->state.gfx.dirty) + +#define gfx_state_set_all_dirty(__cmdbuf) \ + BITSET_ONES((__cmdbuf)->state.gfx.dirty) + static inline uint32_t panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev, const struct panvk_cmd_graphics_state *state) @@ -162,4 +177,74 @@ panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev, return hierarchy_mask; } +static inline bool +fs_required(const struct panvk_cmd_graphics_state *state, + const struct vk_dynamic_graphics_state *dyn_state) +{ + const struct pan_shader_info *fs_info = + state->fs.shader ? &state->fs.shader->info : NULL; + const struct vk_color_blend_state *cb = &dyn_state->cb; + const struct vk_rasterization_state *rs = &dyn_state->rs; + + if (rs->rasterizer_discard_enable || !fs_info) + return false; + + /* If we generally have side effects */ + if (fs_info->fs.sidefx) + return true; + + /* If colour is written we need to execute */ + for (unsigned i = 0; i < cb->attachment_count; ++i) { + if ((cb->color_write_enables & BITFIELD_BIT(i)) && + cb->attachments[i].write_mask) + return true; + } + + /* If alpha-to-coverage is enabled, we need to run the fragment shader even + * if we don't have a color attachment, so depth/stencil updates can be + * discarded if alpha, and thus coverage, is 0. */ + if (dyn_state->ms.alpha_to_coverage_enable) + return true; + + /* If depth is written and not implied we need to execute. + * TODO: Predicate on Z/S writes being enabled */ + return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil); +} + +#define get_fs(__cmdbuf) \ + (fs_required(&(__cmdbuf)->state.gfx, \ + &(__cmdbuf)->vk.dynamic_graphics_state) \ + ? (__cmdbuf)->state.gfx.fs.shader \ + : NULL) + +/* Anything that might change the value returned by get_fs() makes users of the + * fragment shader dirty, because not using the fragment shader (when + * fs_required() returns false) impacts various other things, like VS -> FS + * linking in the JM backend, or the update of the fragment shader pointer in + * the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about + * fragment shader updates. */ + +#define fs_user_dirty(__cmdbuf) \ + (gfx_state_dirty(cmdbuf, FS) || \ + dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || \ + dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || \ + dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || \ + dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || \ + dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE)) + +/* After a draw, all dirty flags are cleared except the FS dirty flag which + * needs to be set again if the draw didn't use the fragment shader. */ + +#define clear_dirty_after_draw(__cmdbuf) \ + do { \ + bool __set_fs_dirty = \ + (__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf); \ + vk_dynamic_graphics_state_clear_dirty( \ + &(__cmdbuf)->vk.dynamic_graphics_state); \ + gfx_state_clear_all_dirty(__cmdbuf); \ + desc_state_clear_all_dirty(&(__cmdbuf)->state.gfx.desc_state); \ + if (__set_fs_dirty) \ + gfx_state_set_dirty(__cmdbuf, FS); \ + } while (0) + #endif diff --git a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c index 6fc4c0ba13e..cfaa19833f0 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c @@ -104,6 +104,8 @@ cmd_get_push_desc_set(struct vk_command_buffer *vk_cmdbuf, /* Pushing descriptors replaces whatever sets are bound */ desc_state->sets[set_idx] = set; + + BITSET_SET(desc_state->dirty_push_sets, set_idx); return set; } @@ -115,8 +117,9 @@ panvk_per_arch(cmd_prepare_dyn_ssbos)( const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state) { - if (!shader || !shader->desc_info.dyn_ssbos.count || - shader_desc_state->dyn_ssbos) + shader_desc_state->dyn_ssbos = 0; + + if (!shader || !shader->desc_info.dyn_ssbos.count) return VK_SUCCESS; struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem( @@ -181,6 +184,9 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state) { + memset(shader_desc_state->tables, 0, sizeof(shader_desc_state->tables)); + shader_desc_state->img_attrib_table = 0; + if (!shader) return VK_SUCCESS; @@ -192,7 +198,7 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( uint32_t desc_size = i == PANVK_BIFROST_DESC_TABLE_UBO ? 8 : PANVK_DESCRIPTOR_SIZE; - if (!desc_count || shader_desc_state->tables[i]) + if (!desc_count) continue; struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem( @@ -209,8 +215,6 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( * separately for vertex shaders. */ if (i == PANVK_BIFROST_DESC_TABLE_IMG && shader->info.stage != MESA_SHADER_VERTEX) { - assert(!shader_desc_state->img_attrib_table); - ptr = panvk_cmd_alloc_desc_array(cmdbuf, desc_count, ATTRIBUTE); if (!ptr.gpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -272,8 +276,10 @@ panvk_per_arch(cmd_prepare_shader_res_table)( const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state) { - if (!shader || shader_desc_state->res_table) + if (!shader) { + shader_desc_state->res_table = 0; return VK_SUCCESS; + } uint32_t first_unused_set = util_last_bit(shader->desc_info.used_set_mask); uint32_t res_count = 1 + first_unused_set; @@ -322,7 +328,8 @@ panvk_per_arch(cmd_prepare_push_descs)(struct panvk_cmd_buffer *cmdbuf, struct panvk_descriptor_set *push_set = desc_state->push_sets[i]; if (!(used_set_mask & BITFIELD_BIT(i)) || !push_set || - desc_state->sets[i] != push_set || push_set->descs.dev) + desc_state->sets[i] != push_set || push_set->descs.dev || + !BITSET_TEST(desc_state->dirty_push_sets, i)) continue; struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem( @@ -351,16 +358,14 @@ panvk_per_arch(CmdBindDescriptorSets2KHR)( cmd_desc_state_bind_sets(&cmdbuf->state.gfx.desc_state, pBindDescriptorSetsInfo); - memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); - memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); + gfx_state_set_dirty(cmdbuf, DESC_STATE); } if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) { cmd_desc_state_bind_sets(&cmdbuf->state.compute.desc_state, pBindDescriptorSetsInfo); - memset(&cmdbuf->state.compute.cs.desc, 0, - sizeof(cmdbuf->state.compute.cs.desc)); + compute_state_set_dirty(cmdbuf, DESC_STATE); } } @@ -401,16 +406,14 @@ panvk_per_arch(CmdPushDescriptorSet2KHR)( push_desc_set_write(cmdbuf, &cmdbuf->state.gfx.desc_state, pPushDescriptorSetInfo); - memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); - memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); + gfx_state_set_dirty(cmdbuf, DESC_STATE); } if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) { push_desc_set_write(cmdbuf, &cmdbuf->state.compute.desc_state, pPushDescriptorSetInfo); - memset(&cmdbuf->state.compute.cs.desc, 0, - sizeof(cmdbuf->state.compute.cs.desc)); + compute_state_set_dirty(cmdbuf, DESC_STATE); } } @@ -443,11 +446,8 @@ panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)( push_set->descs.dev = 0; push_set->layout = NULL; - if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { - memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); - memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); - } else { - memset(&cmdbuf->state.compute.cs.desc, 0, - sizeof(cmdbuf->state.compute.cs.desc)); - } + if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) + gfx_state_set_dirty(cmdbuf, DESC_STATE); + else + compute_state_set_dirty(cmdbuf, DESC_STATE); } diff --git a/src/panfrost/vulkan/panvk_vX_cmd_meta.c b/src/panfrost/vulkan/panvk_vX_cmd_meta.c index c4d2e4faf25..07e060a9f09 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_meta.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_meta.c @@ -65,16 +65,14 @@ panvk_per_arch(cmd_meta_compute_end)( if (memcmp(cmdbuf->state.push_constants.data, save_ctx->push_constants.data, sizeof(cmdbuf->state.push_constants.data))) { cmdbuf->state.push_constants = save_ctx->push_constants; - cmdbuf->state.compute.push_uniforms = 0; - cmdbuf->state.gfx.push_uniforms = 0; + compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } cmdbuf->state.compute.shader = save_ctx->cs.shader; cmdbuf->state.compute.cs.desc = save_ctx->cs.desc; - -#if PAN_ARCH >= 9 - cmdbuf->state.compute.cs.desc.res_table = 0; -#endif + compute_state_set_dirty(cmdbuf, CS); + compute_state_set_dirty(cmdbuf, DESC_STATE); } void @@ -126,8 +124,8 @@ panvk_per_arch(cmd_meta_gfx_end)( if (memcmp(cmdbuf->state.push_constants.data, save_ctx->push_constants.data, sizeof(cmdbuf->state.push_constants.data))) { cmdbuf->state.push_constants = save_ctx->push_constants; - cmdbuf->state.compute.push_uniforms = 0; - cmdbuf->state.gfx.push_uniforms = 0; + compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } cmdbuf->state.gfx.fs.shader = save_ctx->fs.shader; @@ -151,6 +149,11 @@ panvk_per_arch(cmd_meta_gfx_end)( memcpy(cmdbuf->vk.dynamic_graphics_state.dirty, cmdbuf->vk.dynamic_graphics_state.set, sizeof(cmdbuf->vk.dynamic_graphics_state.set)); + gfx_state_set_dirty(cmdbuf, VS); + gfx_state_set_dirty(cmdbuf, FS); + gfx_state_set_dirty(cmdbuf, VB); + gfx_state_set_dirty(cmdbuf, DESC_STATE); + gfx_state_set_dirty(cmdbuf, RENDER_STATE); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c index a67a66fe4c2..7f9e1334867 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c @@ -34,10 +34,10 @@ panvk_per_arch(CmdPushConstants2KHR)( VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) - cmdbuf->state.gfx.push_uniforms = 0; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) - cmdbuf->state.compute.push_uniforms = 0; + compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); memcpy(cmdbuf->state.push_constants.data + pPushConstantsInfo->offset, pPushConstantsInfo->pValues, pPushConstantsInfo->size); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 346cf8cf3f2..51976fd41fd 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1389,24 +1389,22 @@ panvk_cmd_bind_shader(struct panvk_cmd_buffer *cmd, const gl_shader_stage stage, { switch (stage) { case MESA_SHADER_COMPUTE: - cmd->state.compute.shader = shader; - memset(&cmd->state.compute.cs.desc, 0, - sizeof(cmd->state.compute.cs.desc)); + if (cmd->state.compute.shader != shader) { + cmd->state.compute.shader = shader; + compute_state_set_dirty(cmd, CS); + } break; case MESA_SHADER_VERTEX: - cmd->state.gfx.vs.shader = shader; -#if PAN_ARCH <= 7 - cmd->state.gfx.linked = false; -#endif - memset(&cmd->state.gfx.vs.desc, 0, sizeof(cmd->state.gfx.vs.desc)); + if (cmd->state.gfx.vs.shader != shader) { + cmd->state.gfx.vs.shader = shader; + gfx_state_set_dirty(cmd, VS); + } break; case MESA_SHADER_FRAGMENT: - cmd->state.gfx.fs.shader = shader; -#if PAN_ARCH <= 7 - cmd->state.gfx.linked = false; - cmd->state.gfx.fs.rsd = 0; -#endif - memset(&cmd->state.gfx.fs.desc, 0, sizeof(cmd->state.gfx.fs.desc)); + if (cmd->state.gfx.fs.shader != shader) { + cmd->state.gfx.fs.shader = shader; + gfx_state_set_dirty(cmd, FS); + } break; default: assert(!"Unsupported stage");