diff --git a/.pick_status.json b/.pick_status.json index be3bd2cad97..ab68329e4c4 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4404,7 +4404,7 @@ "description": "panvk/csf: fix case where vk_meta is used before PROVOKING_VERTEX_MODE_LAST", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "7a9f14d3c2b1470703fb68216c40fc0572f94815", "notes": null diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 8ea5de33b27..0c006fb970d 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -13,6 +13,8 @@ #include #include "genxml/gen_macros.h" +#include "drm-uapi/panthor_drm.h" + #include "panvk_buffer.h" #include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" @@ -43,6 +45,153 @@ #include "vk_pipeline_layout.h" #include "vk_render_pass.h" +static enum cs_reg_perm +provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg) +{ + return CS_REG_RW; +} + +#define PROVOKING_VERTEX_FN_MAX_SIZE 512 + +static size_t +generate_fn_set_fbds_provoking_vertex(struct panvk_device *dev, + struct cs_buffer fn_mem, bool has_zs_ext, + uint32_t rt_count, + uint32_t *dump_region_size) +{ + const struct drm_panthor_csif_info *csif_info = + panthor_kmod_get_csif_props(dev->kmod.dev); + + struct cs_builder b; + struct cs_builder_conf conf = { + .nr_registers = csif_info->cs_reg_count, + .nr_kernel_registers = MAX2(csif_info->unpreserved_cs_reg_count, 4), + .reg_perm = provoking_vertex_fn_reg_perm_cb, + }; + cs_builder_init(&b, &conf, fn_mem); + + struct cs_function function; + struct cs_function_ctx function_ctx = { + .ctx_reg = cs_subqueue_ctx_reg(&b), + .dump_addr_offset = + offsetof(struct panvk_cs_subqueue_context, reg_dump_addr), + }; + + cs_function_def(&b, &function, function_ctx) { + uint32_t fbd_sz = get_fbd_size(has_zs_ext, rt_count); + + /* argument passed in by the caller */ + struct cs_index fbd_count = cs_scratch_reg32(&b, 0); + + /* normal scratch regs */ + struct cs_index scratch_reg = cs_scratch_reg32(&b, 1); + struct cs_index fbd_addr = cs_scratch_reg64(&b, 2); + + cs_add64(&b, fbd_addr, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), 0); + + cs_while(&b, MALI_CS_CONDITION_GREATER, fbd_count) { + /* provoking_vertex flag is bit 14 of word 11 */ + unsigned offset = 11 * 4; + cs_load32_to(&b, scratch_reg, fbd_addr, offset); + cs_wait_slot(&b, SB_ID(LS), false); + cs_add32(&b, scratch_reg, scratch_reg, -(1 << 14)); + cs_store32(&b, scratch_reg, fbd_addr, offset); + cs_wait_slot(&b, SB_ID(LS), false); + + cs_add32(&b, fbd_count, fbd_count, -1); + cs_add64(&b, fbd_addr, fbd_addr, fbd_sz); + } + } + + assert(cs_is_valid(&b)); + cs_finish(&b); + + *dump_region_size = function.dump_size; + + return function.length * sizeof(uint64_t); +} + +static uint32_t +get_fn_set_fbds_provoking_vertex_idx(bool has_zs_ext, uint32_t rt_count) +{ + assert(rt_count >= 1 && rt_count <= MAX_RTS); + uint32_t idx = has_zs_ext * MAX_RTS + (rt_count - 1); + assert(idx < 2 * MAX_RTS); + return idx; +} + +static uint32_t +calc_fn_set_fbds_provoking_vertex_idx(struct panvk_cmd_buffer *cmdbuf) +{ + const struct pan_fb_info *fb = &cmdbuf->state.gfx.render.fb.info; + bool has_zs_ext = fb->zs.view.zs || fb->zs.view.s; + uint32_t rt_count = MAX2(fb->rt_count, 1); + + return get_fn_set_fbds_provoking_vertex_idx(has_zs_ext, rt_count); +} + +VkResult +panvk_per_arch(device_draw_context_init)(struct panvk_device *dev) +{ + dev->draw_ctx = vk_alloc(&dev->vk.alloc, + sizeof(struct panvk_device_draw_context), + _Alignof(struct panvk_device_draw_context), + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (dev->draw_ctx == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + VkResult result = panvk_priv_bo_create( + dev, PROVOKING_VERTEX_FN_MAX_SIZE * 2 * MAX_RTS, 0, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &dev->draw_ctx->fns_bo); + if (result != VK_SUCCESS) + goto free_draw_ctx; + + for (uint32_t has_zs_ext = 0; has_zs_ext <= 1; has_zs_ext++) { + for (uint32_t rt_count = 1; rt_count <= MAX_RTS; rt_count++) { + uint32_t idx = + get_fn_set_fbds_provoking_vertex_idx(has_zs_ext, rt_count); + /* Check that we have calculated a fn_stride if we need it to offset + * addresses. */ + assert(idx == 0 || + dev->draw_ctx->fn_set_fbds_provoking_vertex_stride != 0); + size_t offset = + idx * dev->draw_ctx->fn_set_fbds_provoking_vertex_stride; + + struct cs_buffer fn_mem = { + .cpu = dev->draw_ctx->fns_bo->addr.host + offset, + .gpu = dev->draw_ctx->fns_bo->addr.dev + offset, + .capacity = PROVOKING_VERTEX_FN_MAX_SIZE / sizeof(uint64_t), + }; + + uint32_t dump_region_size; + size_t fn_length = + generate_fn_set_fbds_provoking_vertex(dev, fn_mem, has_zs_ext, + rt_count, &dump_region_size); + + /* All functions must have the same length */ + assert(idx == 0 || + fn_length == dev->draw_ctx->fn_set_fbds_provoking_vertex_stride); + dev->draw_ctx->fn_set_fbds_provoking_vertex_stride = fn_length; + dev->dump_region_size[PANVK_SUBQUEUE_VERTEX_TILER] = + MAX2(dev->dump_region_size[PANVK_SUBQUEUE_VERTEX_TILER], + dump_region_size); + } + } + + return VK_SUCCESS; + +free_draw_ctx: + vk_free(&dev->vk.alloc, dev->draw_ctx); + return result; +} + +void +panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev) +{ + panvk_priv_bo_unref(dev->draw_ctx->fns_bo); + vk_free(&dev->vk.alloc, dev->draw_ctx); +} + static void emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf, uint32_t attrib_idx, uint32_t vb_desc_offset, @@ -802,6 +951,8 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size, cs_wait_slot(b, SB_ID(LS), false); } +static bool get_first_provoking_vertex(struct panvk_cmd_buffer *cmdbuf); + static VkResult get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) { @@ -851,8 +1002,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples); - cfg.first_provoking_vertex = - cmdbuf->state.gfx.render.first_provoking_vertex != U_TRISTATE_NO; + cfg.first_provoking_vertex = get_first_provoking_vertex(cmdbuf); /* This will be overloaded. */ cfg.layer_count = 1; @@ -902,6 +1052,15 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) BITFIELD_MASK(4), offsetof(struct panvk_cs_subqueue_context, render.tiler_heap)); + /* If we don't know what provoking vertex mode the application wants yet, + * leave space to patch it later */ + if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) { + cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_tds_provoking_vertex) + /* provoking_vertex flag is bit 18 of word 2 */ + cs_add32(b, cs_scratch_reg32(b, 2), cs_scratch_reg32(b, 2), + -(1 << 18)); + } + /* Fill extra fields with zeroes so we can reset the completed * top/bottom and private states. */ cs_move64_to(b, cs_scratch_reg64(b, 10), 0); @@ -1150,8 +1309,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) fbinfo->sample_positions = dev->sample_positions->addr.dev + panfrost_sample_positions_offset(pan_sample_pattern(fbinfo->nr_samples)); - fbinfo->first_provoking_vertex = - cmdbuf->state.gfx.render.first_provoking_vertex != U_TRISTATE_NO; + fbinfo->first_provoking_vertex = get_first_provoking_vertex(cmdbuf); VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf, fbinfo); if (result != VK_SUCCESS) @@ -1192,6 +1350,9 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT); + bool unset_provoking_vertex = + cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET; + if (copy_fbds) { struct cs_index cur_tiler = cs_reg64(b, 38); struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER); @@ -1232,6 +1393,15 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) cs_load_to(b, cs_scratch_reg_tuple(b, 0, 14), pass_src_fbd_ptr, BITFIELD_MASK(14), fbd_off); cs_add64(b, cs_scratch_reg64(b, 14), cur_tiler, 0); + + /* If we don't know what provoking vertex mode the + * application wants yet, leave space to patch it later. */ + if (unset_provoking_vertex) { + /* provoking_vertex flag is bit 14 of word 11 */ + struct cs_index word = cs_scratch_reg32(b, 11); + cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex) + cs_add32(b, word, word, -(1 << 14)); + } } else { cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), pass_src_fbd_ptr, BITFIELD_MASK(16), fbd_off); @@ -1281,11 +1451,52 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) fbds.gpu | fbd_flags); cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler); } + + /* If we don't know what provoking vertex mode the application wants yet, + * leave space to patch it later */ + if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) { + uint32_t fbd_count = calc_enabled_layer_count(cmdbuf) * + (1 + PANVK_IR_PASS_COUNT); + /* passed to fn_set_fbds_provoking_vertex */ + struct cs_index fbd_count_reg = cs_scratch_reg32(b, 0); + cs_move32_to(b, fbd_count_reg, fbd_count); + + struct cs_index length_reg = cs_scratch_reg32(b, 1); + struct cs_index addr_reg = cs_scratch_reg64(b, 2); + uint32_t fn_idx = calc_fn_set_fbds_provoking_vertex_idx(cmdbuf); + uint32_t fn_stride = + dev->draw_ctx->fn_set_fbds_provoking_vertex_stride; + uint32_t fn_addr = + dev->draw_ctx->fns_bo->addr.dev + fn_idx * fn_stride; + cs_move64_to(b, addr_reg, fn_addr); + cs_move32_to(b, length_reg, fn_stride); + + cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex) + cs_call(b, addr_reg, length_reg); + } } return VK_SUCCESS; } +static bool +get_first_provoking_vertex(struct panvk_cmd_buffer *cmdbuf) +{ + switch (cmdbuf->state.gfx.render.first_provoking_vertex) { + case U_TRISTATE_NO: + return false; + case U_TRISTATE_YES: + return true; + /* If we don't know the provoking vertex mode yet, guess that it will + * be PROVOKING_VERTEX_MODE_FIRST. This is the vulkan default, and so + * likely to be right more often. */ + case U_TRISTATE_UNSET: + return true; + default: + unreachable("Invalid u_tristate"); + } +} + static void set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf, enum u_tristate first_provoking_vertex) @@ -1301,14 +1512,24 @@ set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf, state->render.first_provoking_vertex = first_provoking_vertex; } - /* Once we emit the first FBDs/TDs, we need to commit to a state. If we - * choose the wrong one, we will fail the assert when the next application - * draw happens (with a different state). Use PROVOKING_VERTEX_MODE_FIRST - * because it's the vulkan default, and so likely to be right more often. - * - * TODO: handle this case better */ - if (state->render.first_provoking_vertex == U_TRISTATE_UNSET) - state->render.first_provoking_vertex = U_TRISTATE_YES; + /* If the application uses PROVOKING_VERTEX_MODE_LAST after we previously + * emitted FBDs/TDs with the wrong mode set, patch the CS to flip the + * provoking vertex mode bits. */ + if (state->render.first_provoking_vertex == U_TRISTATE_NO && + state->render.maybe_set_tds_provoking_vertex) + { + struct cs_builder *b = + panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); + cs_patch_maybe(b, state->render.maybe_set_tds_provoking_vertex); + state->render.maybe_set_tds_provoking_vertex = NULL; + } + if (state->render.first_provoking_vertex == U_TRISTATE_NO && + state->render.maybe_set_fbds_provoking_vertex) { + struct cs_builder *b = + panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT); + cs_patch_maybe(b, state->render.maybe_set_fbds_provoking_vertex); + state->render.maybe_set_fbds_provoking_vertex = NULL; + } } static VkResult @@ -2414,6 +2635,8 @@ panvk_per_arch(cmd_inherit_render_state)( struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; cmdbuf->state.gfx.render.first_provoking_vertex = U_TRISTATE_UNSET; + cmdbuf->state.gfx.render.maybe_set_tds_provoking_vertex = NULL; + cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex = NULL; cmdbuf->state.gfx.render.suspended = false; cmdbuf->state.gfx.render.flags = inheritance_info->flags; diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 138be5494d8..8f0062156b8 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -83,6 +83,11 @@ struct panvk_rendering_state { /* True if the last render pass was suspended. */ bool suspended; + /* Blocks that can patch to flip the provoking vertex mode if we need to + * emit FBDs/TDs before we know which mode the application is using */ + struct cs_maybe *maybe_set_tds_provoking_vertex; + struct cs_maybe *maybe_set_fbds_provoking_vertex; + struct { /* != 0 if the render pass contains one or more occlusion queries to * signal. */ @@ -208,6 +213,13 @@ struct panvk_cmd_graphics_state { } \ } while (0) +#if PAN_ARCH >= 10 +struct panvk_device_draw_context { + struct panvk_priv_bo *fns_bo; + uint64_t fn_set_fbds_provoking_vertex_stride; +}; +#endif + static inline uint32_t panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev, const struct panvk_cmd_graphics_state *state, @@ -314,6 +326,15 @@ cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state, gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \ } while (0) + +#if PAN_ARCH >= 10 +VkResult +panvk_per_arch(device_draw_context_init)(struct panvk_device *dev); + +void +panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev); +#endif + void panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf, const VkRenderingInfo *pRenderingInfo); diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index cda025c2a91..267309eaa30 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -30,6 +30,7 @@ #define PANVK_MAX_QUEUE_FAMILIES 1 struct panvk_precomp_cache; +struct panvk_device_draw_context; struct panvk_device { struct vk_device vk; @@ -79,6 +80,8 @@ struct panvk_device { #endif } utrace; + struct panvk_device_draw_context* draw_ctx; + struct { struct pandecode_context *decode_ctx; } debug; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 397a48ab321..378a081c65e 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -221,6 +221,10 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf, #endif state->render.first_provoking_vertex = U_TRISTATE_UNSET; +#if PAN_ARCH >= 10 + state->render.maybe_set_tds_provoking_vertex = NULL; + state->render.maybe_set_fbds_provoking_vertex = NULL; +#endif memset(state->render.fb.crc_valid, 0, sizeof(state->render.fb.crc_valid)); memset(&state->render.color_attachments, 0, sizeof(state->render.color_attachments)); diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index a7268794639..9158e152b31 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -17,6 +17,7 @@ #include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_device.h" +#include "panvk_cmd_draw.h" #include "panvk_entrypoints.h" #include "panvk_instance.h" #include "panvk_macros.h" @@ -373,9 +374,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, if (result != VK_SUCCESS) goto err_free_priv_bos; - result = panvk_meta_init(device); +#if PAN_ARCH >= 10 + result = panvk_per_arch(device_draw_context_init)(device); if (result != VK_SUCCESS) goto err_free_precomp; +#endif + + result = panvk_meta_init(device); + if (result != VK_SUCCESS) + goto err_free_draw_ctx; for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = @@ -425,7 +432,11 @@ err_finish_queues: panvk_meta_cleanup(device); +err_free_draw_ctx: +#if PAN_ARCH >= 10 + panvk_per_arch(device_draw_context_cleanup)(device); err_free_precomp: +#endif panvk_precomp_cleanup(device); err_free_priv_bos: if (device->printf.bo) @@ -468,6 +479,9 @@ panvk_per_arch(destroy_device)(struct panvk_device *device, } panvk_precomp_cleanup(device); +#if PAN_ARCH >= 10 + panvk_per_arch(device_draw_context_cleanup)(device); +#endif panvk_meta_cleanup(device); u_printf_destroy(&device->printf.ctx); panvk_priv_bo_unref(device->printf.bo);