From 1570f0172e02aa660bb0b2619d67c1cff52b3914 Mon Sep 17 00:00:00 2001 From: Caterina Shablia Date: Sun, 15 Dec 2024 18:58:26 +0000 Subject: [PATCH] panvk: Fix base_{instance,vertex} handling At the moment, we're not initializing the base_{instance,vertex} sysvals which we'll need to do if we want to support shader draw parameters. It turns out even without shader draw parameters enabled, some shaders need a valid base_instance value, so this alone should fix a few tests. On CSF hardware, we have a way to pass a non-zero base instance that's propagated to the instance ID, but this messes with instance divisors, so instead of using the native base instance feature, we force it to zero, pass the base instance through an FAU sysval, and let panvk_lower_load_vs_input() do the lowering for vertex attribute loads. Reviewed-by: Boris Brezillon Reviewed-by: Mary Guillemard Part-of: --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 48 ++++++++++++++++++--- src/panfrost/vulkan/panvk_vX_shader.c | 29 +++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 383fbcd5c7f..feb928fc6f5 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -223,7 +223,8 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) #define MIN_DEPTH_CLIP_RANGE 37.7E-06f static void -prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) +prepare_sysvals(struct panvk_cmd_buffer *cmdbuf, + const struct panvk_draw_info *draw) { struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals; struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; @@ -304,6 +305,15 @@ prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) } gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); } + + if (draw->vertex.base != sysvals->vs.first_vertex || + draw->vertex.base != sysvals->vs.base_vertex || + draw->instance.base != sysvals->vs.base_instance) { + sysvals->vs.first_vertex = draw->vertex.base; + sysvals->vs.base_vertex = draw->vertex.base; + sysvals->vs.base_instance = draw->instance.base; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } } static bool @@ -1654,7 +1664,7 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) return result; } - prepare_sysvals(cmdbuf); + prepare_sysvals(cmdbuf, draw); result = prepare_push_uniforms(cmdbuf); if (result != VK_SUCCESS) @@ -1755,7 +1765,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_move32_to(b, cs_sr_reg32(b, 34), draw->instance.count); cs_move32_to(b, cs_sr_reg32(b, 35), draw->index.offset); cs_move32_to(b, cs_sr_reg32(b, 36), draw->vertex.base); - cs_move32_to(b, cs_sr_reg32(b, 37), draw->instance.base); + /* NIR expects zero-based instance ID, but even if it did have an intrinsic to + * load the absolute instance ID, we'd want to keep it zero-based to work around + * Mali's limitation on non-zero firstInstance when a instance divisor is used. + */ + cs_move32_to(b, cs_sr_reg32(b, 37), 0); } struct mali_primitive_flags_packed flags_override = @@ -1891,6 +1905,9 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, /* MultiDrawIndirect (.maxDrawIndirectCount) needs additional changes. */ assert(draw->indirect.draw_count == 1); + /* Force a new push uniform block to be allocated */ + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + result = prepare_draw(cmdbuf, draw); if (result != VK_SUCCESS) return; @@ -1907,12 +1924,31 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_load_to(b, cs_sr_reg_tuple(b, 33, 5), draw_params_addr, reg_mask, 0); } - struct mali_primitive_flags_packed flags_override = - get_tiler_flags_override(draw); - /* Wait for the SR33-37 indirect buffer load. */ cs_wait_slot(b, SB_ID(LS), false); + struct cs_index fau_block_addr = cs_scratch_reg64(b, 2); + cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.push_uniforms); + cs_store32(b, cs_sr_reg32(b, 36), fau_block_addr, + 256 + offsetof(struct panvk_graphics_sysvals, vs.first_vertex)); + cs_store32(b, cs_sr_reg32(b, 36), fau_block_addr, + 256 + offsetof(struct panvk_graphics_sysvals, vs.base_vertex)); + cs_store32(b, cs_sr_reg32(b, 37), fau_block_addr, + 256 + offsetof(struct panvk_graphics_sysvals, vs.base_instance)); + + /* Wait for the store using SR-37 as src to finish, so we can overwrite it. */ + cs_wait_slot(b, SB_ID(LS), false); + + /* NIR expects zero-based instance ID, but even if it did have an intrinsic to + * load the absolute instance ID, we'd want to keep it zero-based to work around + * Mali's limitation on non-zero firstInstance when a instance divisor is used. + */ + cs_update_vt_ctx(b) + cs_move32_to(b, cs_sr_reg32(b, 37), 0); + + struct mali_primitive_flags_packed flags_override = + get_tiler_flags_override(draw); + cs_req_res(b, CS_IDVS_RES); cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), flags_override.opaque[0], false, true, diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index fc2b43e6891..ea2ceb9f3c0 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -139,6 +139,31 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) return true; } +static bool +panvk_lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, + UNUSED void *data) +{ + if (intrin->intrinsic != nir_intrinsic_load_input) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *ld_attr = nir_load_attribute_pan( + b, intrin->def.num_components, intrin->def.bit_size, + b->shader->options->vertex_id_zero_based ? + nir_load_vertex_id_zero_base(b) : + nir_load_vertex_id(b), + PAN_ARCH >= 9 ? + nir_iadd(b, nir_load_instance_id(b), nir_load_base_instance(b)) : + nir_load_instance_id(b), + nir_get_io_offset_src(intrin)->ssa, + .base = nir_intrinsic_base(intrin), + .component = nir_intrinsic_component(intrin), + .dest_type = nir_intrinsic_dest_type(intrin)); + nir_def_replace(&intrin->def, ld_attr); + + return true; +} + #if PAN_ARCH <= 7 static bool lower_gl_pos_layer_writes(nir_builder *b, nir_instr *instr, void *data) @@ -555,6 +580,10 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, pan_shader_preprocess(nir, compile_input->gpu_id); + if (stage == MESA_SHADER_VERTEX) + NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input, + nir_metadata_control_flow, NULL); + /* since valhall, panvk_per_arch(nir_lower_descriptors) separates the * driver set and the user sets, and does not need pan_lower_image_index */