mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 07:10:09 +01:00
panvk: Add a CSF backend for panvk_queue/cmd_buffer
With those two components implemented, we can now compile all common per-arch source files. Co-developed-by: Rebecca Mckeever <rebecca.mckeever@collabora.com> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Rebecca Mckeever <rebecca.mckeever@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30969>
This commit is contained in:
parent
7049d31676
commit
5544d39f44
11 changed files with 4406 additions and 28 deletions
|
|
@ -10,5 +10,10 @@ ForEachMacros: [
|
|||
'cs_case',
|
||||
'cs_default',
|
||||
'cs_match',
|
||||
'cs_update_compute_ctx',
|
||||
'cs_update_frag_ctx',
|
||||
'cs_update_progress_seqno',
|
||||
'cs_update_vt_ctx',
|
||||
'cs_while',
|
||||
'panvk_cs_reg_upd_ctx',
|
||||
]
|
||||
|
|
|
|||
460
src/panfrost/vulkan/csf/panvk_cmd_buffer.h
Normal file
460
src/panfrost/vulkan/csf/panvk_cmd_buffer.h
Normal file
|
|
@ -0,0 +1,460 @@
|
|||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_CMD_BUFFER_H
|
||||
#define PANVK_CMD_BUFFER_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "genxml/cs_builder.h"
|
||||
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_queue.h"
|
||||
|
||||
#include "vk_command_buffer.h"
|
||||
|
||||
#include "util/list.h"
|
||||
|
||||
#define MAX_VBS 16
|
||||
#define MAX_RTS 8
|
||||
|
||||
struct panvk_cs_sync32 {
|
||||
uint32_t seqno;
|
||||
uint32_t error;
|
||||
};
|
||||
|
||||
struct panvk_cs_sync64 {
|
||||
uint64_t seqno;
|
||||
uint32_t error;
|
||||
uint32_t pad;
|
||||
};
|
||||
|
||||
struct panvk_cs_desc_ringbuf {
|
||||
uint64_t syncobj;
|
||||
uint64_t ptr;
|
||||
uint32_t pos;
|
||||
uint32_t pad;
|
||||
};
|
||||
|
||||
/* 512k of render descriptors that can be used when
|
||||
* VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT is set on the command buffer. */
|
||||
#define RENDER_DESC_RINGBUF_SIZE (512 * 1024)
|
||||
|
||||
struct panvk_cs_subqueue_context {
|
||||
uint64_t syncobjs;
|
||||
uint32_t iter_sb;
|
||||
uint32_t pad;
|
||||
struct {
|
||||
struct panvk_cs_desc_ringbuf desc_ringbuf;
|
||||
uint64_t tiler_heap;
|
||||
uint64_t geom_buf;
|
||||
} render;
|
||||
uint64_t debug_syncobjs;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
struct panvk_cache_flush_info {
|
||||
enum mali_cs_flush_mode l2;
|
||||
enum mali_cs_flush_mode lsc;
|
||||
bool others;
|
||||
};
|
||||
|
||||
struct panvk_cs_deps {
|
||||
bool needs_draw_flush;
|
||||
struct {
|
||||
uint32_t wait_sb_mask;
|
||||
struct panvk_cache_flush_info cache_flush;
|
||||
} src[PANVK_SUBQUEUE_COUNT];
|
||||
|
||||
struct {
|
||||
uint32_t wait_subqueue_mask;
|
||||
} dst[PANVK_SUBQUEUE_COUNT];
|
||||
};
|
||||
|
||||
enum panvk_sb_ids {
|
||||
PANVK_SB_LS = 0,
|
||||
PANVK_SB_IMM_FLUSH = 0,
|
||||
PANVK_SB_DEFERRED_SYNC = 1,
|
||||
PANVK_SB_DEFERRED_FLUSH = 2,
|
||||
PANVK_SB_ITER_START = 3,
|
||||
PANVK_SB_ITER_COUNT = 5,
|
||||
};
|
||||
|
||||
#define SB_IMM_MASK 0
|
||||
#define SB_MASK(nm) BITFIELD_BIT(PANVK_SB_##nm)
|
||||
#define SB_ID(nm) PANVK_SB_##nm
|
||||
#define SB_ITER(x) (PANVK_SB_ITER_START + (x))
|
||||
#define SB_WAIT_ITER(x) BITFIELD_BIT(PANVK_SB_ITER_START + (x))
|
||||
#define SB_ALL_MASK BITFIELD_MASK(8)
|
||||
|
||||
static inline uint32_t
|
||||
next_iter_sb(uint32_t sb)
|
||||
{
|
||||
return sb + 1 < PANVK_SB_ITER_COUNT ? sb + 1 : 0;
|
||||
}
|
||||
|
||||
enum panvk_cs_regs {
|
||||
/* RUN_IDVS staging regs. */
|
||||
PANVK_CS_REG_RUN_IDVS_SR_START = 0,
|
||||
PANVK_CS_REG_RUN_IDVS_SR_END = 60,
|
||||
|
||||
/* RUN_FRAGMENT staging regs. */
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 40,
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 46,
|
||||
|
||||
/* RUN_COMPUTE staging regs. */
|
||||
PANVK_CS_REG_RUN_COMPUTE_SR_START = 0,
|
||||
PANVK_CS_REG_RUN_COMPUTE_SR_END = 39,
|
||||
|
||||
/* Range of registers that can be used to store temporary data on
|
||||
* all queues. Note that some queues have extra space they can use
|
||||
* as scratch space.*/
|
||||
PANVK_CS_REG_SCRATCH_START = 66,
|
||||
PANVK_CS_REG_SCRATCH_END = 83,
|
||||
|
||||
/* Driver context. */
|
||||
PANVK_CS_REG_PROGRESS_SEQNO_START = 84,
|
||||
PANVK_CS_REG_PROGRESS_SEQNO_END = 89,
|
||||
PANVK_CS_REG_SUBQUEUE_CTX_START = 90,
|
||||
PANVK_CS_REG_SUBQUEUE_CTX_END = 91,
|
||||
};
|
||||
|
||||
static inline struct cs_index
|
||||
cs_scratch_reg_tuple(struct cs_builder *b, unsigned start, unsigned count)
|
||||
{
|
||||
assert(PANVK_CS_REG_SCRATCH_START + start + count - 1 <=
|
||||
PANVK_CS_REG_SCRATCH_END);
|
||||
return cs_reg_tuple(b, PANVK_CS_REG_SCRATCH_START + start, count);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_scratch_reg32(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
return cs_scratch_reg_tuple(b, reg, 1);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_scratch_reg64(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
assert(reg % 2 == 0);
|
||||
return cs_scratch_reg_tuple(b, reg, 2);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_sr_reg_tuple(struct cs_builder *b, unsigned start, unsigned count)
|
||||
{
|
||||
assert(start + count - 1 < PANVK_CS_REG_SCRATCH_START);
|
||||
return cs_reg_tuple(b, start, count);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_sr_reg32(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
return cs_sr_reg_tuple(b, reg, 1);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_sr_reg64(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
assert(reg % 2 == 0);
|
||||
return cs_sr_reg_tuple(b, reg, 2);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_subqueue_ctx_reg(struct cs_builder *b)
|
||||
{
|
||||
return cs_reg64(b, PANVK_CS_REG_SUBQUEUE_CTX_START);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_progress_seqno_reg(struct cs_builder *b, enum panvk_subqueue_id subqueue)
|
||||
{
|
||||
assert(PANVK_CS_REG_PROGRESS_SEQNO_START + (subqueue * 2) <
|
||||
PANVK_CS_REG_PROGRESS_SEQNO_END);
|
||||
return cs_reg64(b, PANVK_CS_REG_PROGRESS_SEQNO_START + (subqueue * 2));
|
||||
}
|
||||
|
||||
struct panvk_cs_reg_upd_context {
|
||||
reg_perm_cb_t reg_perm;
|
||||
struct panvk_cs_reg_upd_context *next;
|
||||
};
|
||||
|
||||
struct panvk_cs_state {
|
||||
struct cs_builder builder;
|
||||
|
||||
struct cs_load_store_tracker ls_tracker;
|
||||
|
||||
/* Used to debug register writes in invalid contexts. */
|
||||
struct {
|
||||
struct panvk_cs_reg_upd_context *upd_ctx_stack;
|
||||
reg_perm_cb_t base_perm;
|
||||
} reg_access;
|
||||
|
||||
/* Sync point relative to the beginning of the command buffer.
|
||||
* Needs to be offset with the subqueue sync point. */
|
||||
int32_t relative_sync_point;
|
||||
};
|
||||
|
||||
static inline struct panvk_cs_reg_upd_context *
|
||||
panvk_cs_reg_ctx_push(struct cs_builder *b,
|
||||
struct panvk_cs_reg_upd_context *ctx,
|
||||
reg_perm_cb_t reg_perm)
|
||||
{
|
||||
struct panvk_cs_state *cs_state =
|
||||
container_of(b, struct panvk_cs_state, builder);
|
||||
|
||||
ctx->reg_perm = reg_perm;
|
||||
ctx->next = cs_state->reg_access.upd_ctx_stack;
|
||||
cs_state->reg_access.upd_ctx_stack = ctx;
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void
|
||||
panvk_cs_reg_ctx_pop(struct cs_builder *b, struct panvk_cs_reg_upd_context *ctx)
|
||||
{
|
||||
struct panvk_cs_state *cs_state =
|
||||
container_of(b, struct panvk_cs_state, builder);
|
||||
|
||||
assert(cs_state->reg_access.upd_ctx_stack == ctx);
|
||||
|
||||
cs_state->reg_access.upd_ctx_stack = ctx->next;
|
||||
}
|
||||
|
||||
struct panvk_cs_reg_range {
|
||||
unsigned start;
|
||||
unsigned end;
|
||||
};
|
||||
|
||||
#define PANVK_CS_REG_RANGE(__name) \
|
||||
{ \
|
||||
.start = PANVK_CS_REG_##__name##_START, \
|
||||
.end = PANVK_CS_REG_##__name##_END, \
|
||||
}
|
||||
|
||||
#define panvk_cs_reg_blacklist(__name, ...) \
|
||||
static inline enum cs_reg_perm panvk_cs_##__name##_reg_perm( \
|
||||
struct cs_builder *b, unsigned reg) \
|
||||
{ \
|
||||
const struct panvk_cs_reg_range ranges[] = { \
|
||||
__VA_ARGS__, \
|
||||
}; \
|
||||
\
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ranges); i++) { \
|
||||
if (reg >= ranges[i].start && reg <= ranges[i].end) \
|
||||
return CS_REG_RD; \
|
||||
} \
|
||||
\
|
||||
return CS_REG_RW; \
|
||||
}
|
||||
|
||||
panvk_cs_reg_blacklist(vt, PANVK_CS_REG_RANGE(RUN_IDVS_SR),
|
||||
PANVK_CS_REG_RANGE(PROGRESS_SEQNO),
|
||||
PANVK_CS_REG_RANGE(SUBQUEUE_CTX));
|
||||
panvk_cs_reg_blacklist(frag, PANVK_CS_REG_RANGE(RUN_FRAGMENT_SR),
|
||||
PANVK_CS_REG_RANGE(PROGRESS_SEQNO),
|
||||
PANVK_CS_REG_RANGE(SUBQUEUE_CTX));
|
||||
panvk_cs_reg_blacklist(compute, PANVK_CS_REG_RANGE(RUN_COMPUTE_SR),
|
||||
PANVK_CS_REG_RANGE(PROGRESS_SEQNO),
|
||||
PANVK_CS_REG_RANGE(SUBQUEUE_CTX));
|
||||
|
||||
#define panvk_cs_reg_whitelist(__name, ...) \
|
||||
static inline enum cs_reg_perm panvk_cs_##__name##_reg_perm( \
|
||||
struct cs_builder *b, unsigned reg) \
|
||||
{ \
|
||||
const struct panvk_cs_reg_range ranges[] = { \
|
||||
__VA_ARGS__, \
|
||||
}; \
|
||||
\
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ranges); i++) { \
|
||||
if (reg >= ranges[i].start && reg <= ranges[i].end) \
|
||||
return CS_REG_RW; \
|
||||
} \
|
||||
\
|
||||
return CS_REG_RD; \
|
||||
}
|
||||
|
||||
#define panvk_cs_reg_upd_ctx(__b, __name) \
|
||||
for (struct panvk_cs_reg_upd_context __reg_upd_ctx, \
|
||||
*reg_upd_ctxp = panvk_cs_reg_ctx_push(__b, &__reg_upd_ctx, \
|
||||
panvk_cs_##__name##_reg_perm); \
|
||||
reg_upd_ctxp; \
|
||||
panvk_cs_reg_ctx_pop(__b, &__reg_upd_ctx), reg_upd_ctxp = NULL)
|
||||
|
||||
panvk_cs_reg_whitelist(progress_seqno, PANVK_CS_REG_RANGE(PROGRESS_SEQNO));
|
||||
#define cs_update_progress_seqno(__b) panvk_cs_reg_upd_ctx(__b, progress_seqno)
|
||||
|
||||
panvk_cs_reg_whitelist(compute_ctx, PANVK_CS_REG_RANGE(RUN_COMPUTE_SR));
|
||||
#define cs_update_compute_ctx(__b) panvk_cs_reg_upd_ctx(__b, compute_ctx)
|
||||
|
||||
panvk_cs_reg_whitelist(frag_ctx, PANVK_CS_REG_RANGE(RUN_FRAGMENT_SR));
|
||||
#define cs_update_frag_ctx(__b) panvk_cs_reg_upd_ctx(__b, frag_ctx)
|
||||
|
||||
panvk_cs_reg_whitelist(vt_ctx, PANVK_CS_REG_RANGE(RUN_IDVS_SR));
|
||||
#define cs_update_vt_ctx(__b) panvk_cs_reg_upd_ctx(__b, vt_ctx)
|
||||
|
||||
struct panvk_tls_state {
|
||||
struct panfrost_ptr desc;
|
||||
struct pan_tls_info info;
|
||||
unsigned max_wg_count;
|
||||
};
|
||||
|
||||
struct panvk_cmd_compute_state {
|
||||
struct panvk_descriptor_state desc_state;
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_compute_sysvals sysvals;
|
||||
mali_ptr push_uniforms;
|
||||
struct {
|
||||
struct panvk_shader_desc_state desc;
|
||||
} cs;
|
||||
};
|
||||
|
||||
struct panvk_attrib_buf {
|
||||
mali_ptr address;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
struct panvk_resolve_attachment {
|
||||
VkResolveModeFlagBits mode;
|
||||
struct panvk_image_view *src_iview;
|
||||
struct panvk_image_view *dst_iview;
|
||||
};
|
||||
|
||||
struct panvk_cmd_graphics_state {
|
||||
struct panvk_descriptor_state desc_state;
|
||||
|
||||
struct {
|
||||
struct vk_vertex_input_state vi;
|
||||
struct vk_sample_locations_state sl;
|
||||
} dynamic;
|
||||
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
struct panvk_shader_link link;
|
||||
bool linked;
|
||||
|
||||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
mali_ptr spd;
|
||||
} fs;
|
||||
|
||||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
struct {
|
||||
mali_ptr pos, var;
|
||||
} spds;
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
struct panvk_attrib_buf bufs[MAX_VBS];
|
||||
unsigned count;
|
||||
bool dirty;
|
||||
} vb;
|
||||
|
||||
/* Index buffer */
|
||||
struct {
|
||||
struct panvk_buffer *buffer;
|
||||
uint64_t offset;
|
||||
uint8_t index_size;
|
||||
uint32_t first_vertex, base_vertex, base_instance;
|
||||
bool dirty;
|
||||
} ib;
|
||||
|
||||
struct {
|
||||
struct panvk_blend_info info;
|
||||
} cb;
|
||||
|
||||
struct {
|
||||
VkRenderingFlags flags;
|
||||
uint32_t layer_count;
|
||||
|
||||
enum vk_rp_attachment_flags bound_attachments;
|
||||
struct {
|
||||
VkFormat fmts[MAX_RTS];
|
||||
uint8_t samples[MAX_RTS];
|
||||
struct panvk_resolve_attachment resolve[MAX_RTS];
|
||||
} color_attachments;
|
||||
|
||||
struct pan_image_view zs_pview;
|
||||
|
||||
struct {
|
||||
struct panvk_resolve_attachment resolve;
|
||||
} z_attachment, s_attachment;
|
||||
|
||||
struct {
|
||||
struct pan_fb_info info;
|
||||
bool crc_valid[MAX_RTS];
|
||||
} fb;
|
||||
|
||||
struct panfrost_ptr fbds;
|
||||
mali_ptr tiler;
|
||||
bool dirty;
|
||||
} render;
|
||||
|
||||
mali_ptr push_uniforms;
|
||||
};
|
||||
|
||||
struct panvk_cmd_buffer {
|
||||
struct vk_command_buffer vk;
|
||||
VkCommandBufferUsageFlags flags;
|
||||
struct panvk_pool cs_pool;
|
||||
struct panvk_pool desc_pool;
|
||||
struct panvk_pool tls_pool;
|
||||
struct list_head push_sets;
|
||||
|
||||
uint32_t flush_id;
|
||||
|
||||
struct {
|
||||
struct panvk_cmd_graphics_state gfx;
|
||||
struct panvk_cmd_compute_state compute;
|
||||
struct panvk_push_constant_state push_constants;
|
||||
struct panvk_cs_state cs[PANVK_SUBQUEUE_COUNT];
|
||||
struct panvk_tls_state tls;
|
||||
} state;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(panvk_cmd_buffer, vk.base, VkCommandBuffer,
|
||||
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
||||
|
||||
static inline struct cs_builder *
|
||||
panvk_get_cs_builder(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
||||
{
|
||||
return &cmdbuf->state.cs[subqueue].builder;
|
||||
}
|
||||
|
||||
static inline struct panvk_descriptor_state *
|
||||
panvk_cmd_get_desc_state(struct panvk_cmd_buffer *cmdbuf,
|
||||
VkPipelineBindPoint bindpoint)
|
||||
{
|
||||
switch (bindpoint) {
|
||||
case VK_PIPELINE_BIND_POINT_GRAPHICS:
|
||||
return &cmdbuf->state.gfx.desc_state;
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE:
|
||||
return &cmdbuf->state.compute.desc_state;
|
||||
|
||||
default:
|
||||
assert(!"Unsupported bind point");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
extern const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops);
|
||||
|
||||
void panvk_per_arch(cmd_flush_draws)(struct panvk_cmd_buffer *cmdbuf);
|
||||
|
||||
void panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
|
||||
enum panvk_subqueue_id subqueue);
|
||||
|
||||
void panvk_per_arch(get_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkDependencyInfo *in,
|
||||
struct panvk_cs_deps *out);
|
||||
|
||||
#endif /* PANVK_CMD_BUFFER_H */
|
||||
72
src/panfrost/vulkan/csf/panvk_queue.h
Normal file
72
src/panfrost/vulkan/csf/panvk_queue.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_QUEUE_H
|
||||
#define PANVK_QUEUE_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
#endif
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "panvk_device.h"
|
||||
|
||||
#include "vk_queue.h"
|
||||
|
||||
enum panvk_subqueue_id {
|
||||
PANVK_SUBQUEUE_VERTEX_TILER = 0,
|
||||
PANVK_SUBQUEUE_FRAGMENT,
|
||||
PANVK_SUBQUEUE_COMPUTE,
|
||||
PANVK_SUBQUEUE_COUNT,
|
||||
};
|
||||
|
||||
struct panvk_tiler_heap {
|
||||
uint32_t chunk_size;
|
||||
struct panvk_priv_mem desc;
|
||||
struct {
|
||||
uint32_t handle;
|
||||
mali_ptr dev_addr;
|
||||
} context;
|
||||
};
|
||||
|
||||
struct panvk_subqueue {
|
||||
struct panvk_priv_mem context;
|
||||
};
|
||||
|
||||
struct panvk_desc_ringbuf {
|
||||
struct panvk_priv_mem syncobj;
|
||||
struct pan_kmod_bo *bo;
|
||||
struct {
|
||||
uint64_t dev;
|
||||
void *host;
|
||||
} addr;
|
||||
};
|
||||
|
||||
struct panvk_queue {
|
||||
struct vk_queue vk;
|
||||
|
||||
uint32_t group_handle;
|
||||
uint32_t syncobj_handle;
|
||||
|
||||
struct panvk_tiler_heap tiler_heap;
|
||||
struct panvk_desc_ringbuf render_desc_ringbuf;
|
||||
struct panvk_priv_mem syncobjs;
|
||||
struct panvk_priv_mem debug_syncobjs;
|
||||
|
||||
struct panvk_subqueue subqueues[PANVK_SUBQUEUE_COUNT];
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(panvk_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
|
||||
|
||||
void panvk_per_arch(queue_finish)(struct panvk_queue *queue);
|
||||
|
||||
VkResult panvk_per_arch(queue_init)(struct panvk_device *device,
|
||||
struct panvk_queue *queue, int idx,
|
||||
const VkDeviceQueueCreateInfo *create_info);
|
||||
|
||||
#endif
|
||||
703
src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c
Normal file
703
src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c
Normal file
|
|
@ -0,0 +1,703 @@
|
|||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
*
|
||||
* Derived from tu_cmd_buffer.c which is:
|
||||
* Copyright © 2016 Red Hat.
|
||||
* Copyright © 2016 Bas Nieuwenhuizen
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_pool.h"
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_instance.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
#include "pan_blitter.h"
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_props.h"
|
||||
#include "pan_samples.h"
|
||||
|
||||
#include "vk_descriptor_update_template.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_synchronization.h"
|
||||
|
||||
static void
|
||||
emit_tls(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
unsigned core_id_range;
|
||||
panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
|
||||
|
||||
if (cmdbuf->state.tls.info.tls.size) {
|
||||
unsigned thread_tls_alloc =
|
||||
panfrost_query_thread_tls_alloc(&phys_dev->kmod.props);
|
||||
unsigned size = panfrost_get_total_stack_size(
|
||||
cmdbuf->state.tls.info.tls.size, thread_tls_alloc, core_id_range);
|
||||
|
||||
cmdbuf->state.tls.info.tls.ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu;
|
||||
}
|
||||
|
||||
assert(!cmdbuf->state.tls.info.wls.size);
|
||||
|
||||
if (cmdbuf->state.tls.desc.cpu) {
|
||||
GENX(pan_emit_tls)(&cmdbuf->state.tls.info, cmdbuf->state.tls.desc.cpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_instance *instance =
|
||||
to_panvk_instance(dev->vk.physical->instance);
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, subqueue);
|
||||
|
||||
cs_update_progress_seqno(b) {
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
uint32_t rel_sync_point = cmdbuf->state.cs[i].relative_sync_point;
|
||||
|
||||
if (!rel_sync_point)
|
||||
continue;
|
||||
|
||||
cs_add64(b, cs_progress_seqno_reg(b, i), cs_progress_seqno_reg(b, i),
|
||||
rel_sync_point);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we're decoding the CS or dumping memory mappings, we need a flush
|
||||
* to make sure all data have been pushed to memory. */
|
||||
if (instance->debug_flags & (PANVK_DEBUG_DUMP | PANVK_DEBUG_TRACE)) {
|
||||
struct cs_index flush_id = cs_scratch_reg32(b, 0);
|
||||
|
||||
cs_move32_to(b, flush_id, 0);
|
||||
cs_wait_slots(b, SB_ALL_MASK, false);
|
||||
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
|
||||
false, flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
}
|
||||
|
||||
/* If we're in sync/trace more, we signal the debug object. */
|
||||
if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) {
|
||||
struct cs_index debug_sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index one = cs_scratch_reg32(b, 2);
|
||||
struct cs_index error = cs_scratch_reg32(b, 3);
|
||||
struct cs_index cmp_scratch = cs_scratch_reg32(b, 2);
|
||||
|
||||
cs_move32_to(b, one, 1);
|
||||
cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, debug_syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_add64(b, debug_sync_addr, debug_sync_addr,
|
||||
sizeof(struct panvk_cs_sync32) * subqueue);
|
||||
cs_load32_to(b, error, debug_sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, error));
|
||||
cs_wait_slots(b, SB_ALL_MASK, false);
|
||||
cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_SYSTEM, one, debug_sync_addr,
|
||||
cs_now());
|
||||
|
||||
cs_match(b, error, cmp_scratch) {
|
||||
cs_case(b, 0) {
|
||||
/* Do nothing. */
|
||||
}
|
||||
|
||||
cs_default(b) {
|
||||
/* Overwrite the sync error with the first error we encountered. */
|
||||
cs_store32(b, error, debug_sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, error));
|
||||
cs_wait_slots(b, SB_ID(LS), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cs_finish(&cmdbuf->state.cs[subqueue].builder);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
emit_tls(cmdbuf);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(cmdbuf->state.cs); i++) {
|
||||
struct cs_builder *b = &cmdbuf->state.cs[i].builder;
|
||||
|
||||
if (!cs_is_valid(b)) {
|
||||
vk_command_buffer_set_error(&cmdbuf->vk,
|
||||
VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
} else {
|
||||
finish_cs(cmdbuf, i);
|
||||
}
|
||||
}
|
||||
|
||||
return vk_command_buffer_end(&cmdbuf->vk);
|
||||
}
|
||||
|
||||
static bool
|
||||
src_stages_need_draw_flush(VkPipelineStageFlags2 stages)
|
||||
{
|
||||
static const VkPipelineStageFlags2 draw_flush_stage_mask =
|
||||
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
|
||||
VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT |
|
||||
VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT;
|
||||
|
||||
return (stages & draw_flush_stage_mask) != 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
stages_cover_subqueue(enum panvk_subqueue_id subqueue,
|
||||
VkPipelineStageFlags2 stages)
|
||||
{
|
||||
static const VkPipelineStageFlags2 queue_coverage[PANVK_SUBQUEUE_COUNT] = {
|
||||
[PANVK_SUBQUEUE_VERTEX_TILER] = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
|
||||
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
|
||||
VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT,
|
||||
[PANVK_SUBQUEUE_FRAGMENT] =
|
||||
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
|
||||
VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT |
|
||||
VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT,
|
||||
[PANVK_SUBQUEUE_COMPUTE] =
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_COPY_BIT,
|
||||
};
|
||||
|
||||
return (stages & queue_coverage[subqueue]) != 0;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
src_stages_to_subqueue_sb_mask(enum panvk_subqueue_id subqueue,
|
||||
VkPipelineStageFlags2 stages)
|
||||
{
|
||||
if (!stages_cover_subqueue(subqueue, stages))
|
||||
return 0;
|
||||
|
||||
/* Indirect draw buffers are read from the command stream, and load/store
|
||||
* operations are synchronized with the LS scoreboad immediately after the
|
||||
* read, so no need to wait in that case.
|
||||
*/
|
||||
if (subqueue == PANVK_SUBQUEUE_VERTEX_TILER &&
|
||||
stages == VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT)
|
||||
return 0;
|
||||
|
||||
/* We need to wait for all previously submitted jobs, and given the
|
||||
* iterator scoreboard is a moving target, we just wait for the
|
||||
* whole dynamic scoreboard range. */
|
||||
return BITFIELD_RANGE(PANVK_SB_ITER_START, PANVK_SB_ITER_COUNT);
|
||||
}
|
||||
|
||||
static void
|
||||
collect_cache_flush_info(enum panvk_subqueue_id subqueue,
|
||||
struct panvk_cache_flush_info *cache_flush,
|
||||
VkPipelineStageFlags2 src_stages,
|
||||
VkPipelineStageFlags2 dst_stages,
|
||||
VkAccessFlags2 src_access, VkAccessFlags2 dst_access)
|
||||
{
|
||||
static const VkAccessFlags2 dev_writes[PANVK_SUBQUEUE_COUNT] = {
|
||||
[PANVK_SUBQUEUE_VERTEX_TILER] = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT |
|
||||
VK_ACCESS_2_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_2_TRANSFER_WRITE_BIT,
|
||||
[PANVK_SUBQUEUE_FRAGMENT] =
|
||||
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_2_TRANSFER_WRITE_BIT,
|
||||
[PANVK_SUBQUEUE_COMPUTE] = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT |
|
||||
VK_ACCESS_2_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_2_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static const VkAccessFlags2 dev_reads[PANVK_SUBQUEUE_COUNT] = {
|
||||
[PANVK_SUBQUEUE_VERTEX_TILER] =
|
||||
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_2_INDEX_READ_BIT |
|
||||
VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_2_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_TRANSFER_READ_BIT |
|
||||
VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
||||
VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
|
||||
[PANVK_SUBQUEUE_FRAGMENT] =
|
||||
VK_ACCESS_2_UNIFORM_READ_BIT | VK_ACCESS_2_SHADER_READ_BIT |
|
||||
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_2_TRANSFER_READ_BIT | VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
||||
VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
|
||||
[PANVK_SUBQUEUE_COMPUTE] =
|
||||
VK_ACCESS_2_UNIFORM_READ_BIT | VK_ACCESS_2_SHADER_READ_BIT |
|
||||
VK_ACCESS_2_TRANSFER_READ_BIT | VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
||||
VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
|
||||
};
|
||||
|
||||
/* Note on the cache organization:
|
||||
* - L2 cache is unified, so all changes to this cache are automatically
|
||||
* visible to all GPU sub-components (shader cores, tiler, ...). This
|
||||
* means we only need to flush when the host (AKA CPU) is involved.
|
||||
* - LS caches (which are basically just read-write L1 caches) are coherent
|
||||
* with each other and with the L2 cache, so again, we only need to flush
|
||||
* when the host is involved.
|
||||
* - Other read-only L1 caches (like the ones in front of the texture unit)
|
||||
* are not coherent with the LS or L2 caches, and thus need to be
|
||||
* invalidated any time a write happens.
|
||||
*/
|
||||
|
||||
#define ACCESS_HITS_RO_L1_CACHE \
|
||||
(VK_ACCESS_2_SHADER_SAMPLED_READ_BIT | \
|
||||
VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | \
|
||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | \
|
||||
VK_ACCESS_2_TRANSFER_READ_BIT)
|
||||
|
||||
if ((dev_writes[subqueue] & src_access) &&
|
||||
(dev_reads[subqueue] & ACCESS_HITS_RO_L1_CACHE & dst_access))
|
||||
cache_flush->others |= true;
|
||||
|
||||
/* If the host wrote something, we need to clean/invalidate everything. */
|
||||
if ((src_stages & VK_PIPELINE_STAGE_2_HOST_BIT) &&
|
||||
(src_access & VK_ACCESS_2_HOST_WRITE_BIT) &&
|
||||
((dev_reads[subqueue] | dev_writes[subqueue]) & dst_access)) {
|
||||
cache_flush->l2 |= MALI_CS_FLUSH_MODE_CLEAN_AND_INVALIDATE;
|
||||
cache_flush->lsc |= MALI_CS_FLUSH_MODE_CLEAN_AND_INVALIDATE;
|
||||
cache_flush->others |= true;
|
||||
}
|
||||
|
||||
/* If the host needs to read something we wrote, we need to clean
|
||||
* everything. */
|
||||
if ((dst_stages & VK_PIPELINE_STAGE_2_HOST_BIT) &&
|
||||
(dst_access & VK_ACCESS_2_HOST_READ_BIT) &&
|
||||
(dev_writes[subqueue] & src_access)) {
|
||||
cache_flush->l2 |= MALI_CS_FLUSH_MODE_CLEAN;
|
||||
cache_flush->lsc |= MALI_CS_FLUSH_MODE_CLEAN;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
collect_cs_deps(struct panvk_cmd_buffer *cmdbuf,
|
||||
VkPipelineStageFlags2 src_stages,
|
||||
VkPipelineStageFlags2 dst_stages, VkAccessFlags src_access,
|
||||
VkAccessFlags dst_access, struct panvk_cs_deps *deps)
|
||||
{
|
||||
if (src_stages_need_draw_flush(src_stages) && cmdbuf->state.gfx.render.tiler)
|
||||
deps->needs_draw_flush = true;
|
||||
|
||||
uint32_t wait_subqueue_mask = 0;
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
uint32_t sb_mask = src_stages_to_subqueue_sb_mask(i, src_stages);
|
||||
assert((sb_mask != 0) == stages_cover_subqueue(i, src_stages));
|
||||
if (!sb_mask)
|
||||
continue;
|
||||
|
||||
deps->src[i].wait_sb_mask |= sb_mask;
|
||||
collect_cache_flush_info(i, &deps->src[i].cache_flush, src_stages,
|
||||
dst_stages, src_access, dst_access);
|
||||
wait_subqueue_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
if (!stages_cover_subqueue(i, dst_stages))
|
||||
continue;
|
||||
|
||||
deps->dst[i].wait_subqueue_mask |= wait_subqueue_mask & ~BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(get_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkDependencyInfo *in,
|
||||
struct panvk_cs_deps *out)
|
||||
{
|
||||
memset(out, 0, sizeof(*out));
|
||||
|
||||
for (uint32_t i = 0; i < in->memoryBarrierCount; i++) {
|
||||
const VkMemoryBarrier2 *barrier = &in->pMemoryBarriers[i];
|
||||
VkPipelineStageFlags2 src_stages =
|
||||
vk_expand_pipeline_stage_flags2(barrier->srcStageMask);
|
||||
VkPipelineStageFlags2 dst_stages =
|
||||
vk_expand_pipeline_stage_flags2(barrier->dstStageMask);
|
||||
VkAccessFlags2 src_access =
|
||||
vk_filter_src_access_flags2(src_stages, barrier->srcAccessMask);
|
||||
VkAccessFlags2 dst_access =
|
||||
vk_filter_dst_access_flags2(dst_stages, barrier->dstAccessMask);
|
||||
|
||||
collect_cs_deps(cmdbuf, src_stages, dst_stages, src_access, dst_access,
|
||||
out);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < in->bufferMemoryBarrierCount; i++) {
|
||||
const VkBufferMemoryBarrier2 *barrier = &in->pBufferMemoryBarriers[i];
|
||||
VkPipelineStageFlags2 src_stages =
|
||||
vk_expand_pipeline_stage_flags2(barrier->srcStageMask);
|
||||
VkPipelineStageFlags2 dst_stages =
|
||||
vk_expand_pipeline_stage_flags2(barrier->dstStageMask);
|
||||
VkAccessFlags2 src_access =
|
||||
vk_filter_src_access_flags2(src_stages, barrier->srcAccessMask);
|
||||
VkAccessFlags2 dst_access =
|
||||
vk_filter_dst_access_flags2(dst_stages, barrier->dstAccessMask);
|
||||
|
||||
collect_cs_deps(cmdbuf, src_stages, dst_stages, src_access, dst_access,
|
||||
out);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < in->imageMemoryBarrierCount; i++) {
|
||||
const VkImageMemoryBarrier2 *barrier = &in->pImageMemoryBarriers[i];
|
||||
VkPipelineStageFlags2 src_stages =
|
||||
vk_expand_pipeline_stage_flags2(barrier->srcStageMask);
|
||||
VkPipelineStageFlags2 dst_stages =
|
||||
vk_expand_pipeline_stage_flags2(barrier->dstStageMask);
|
||||
VkAccessFlags2 src_access =
|
||||
vk_filter_src_access_flags2(src_stages, barrier->srcAccessMask);
|
||||
VkAccessFlags2 dst_access =
|
||||
vk_filter_dst_access_flags2(dst_stages, barrier->dstAccessMask);
|
||||
|
||||
collect_cs_deps(cmdbuf, src_stages, dst_stages, src_access, dst_access,
|
||||
out);
|
||||
}
|
||||
|
||||
/* The draw flush will add a vertex -> fragment dependency, so we can skip
|
||||
* the one described in the deps. */
|
||||
if (out->needs_draw_flush)
|
||||
out->dst[PANVK_SUBQUEUE_FRAGMENT].wait_subqueue_mask &=
|
||||
~BITFIELD_BIT(PANVK_SUBQUEUE_VERTEX_TILER);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
||||
const VkDependencyInfo *pDependencyInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cs_deps deps;
|
||||
|
||||
panvk_per_arch(get_cs_deps)(cmdbuf, pDependencyInfo, &deps);
|
||||
|
||||
if (deps.needs_draw_flush)
|
||||
panvk_per_arch(cmd_flush_draws)(cmdbuf);
|
||||
|
||||
uint32_t wait_subqueue_mask = 0;
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++)
|
||||
wait_subqueue_mask |= deps.dst[i].wait_subqueue_mask;
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
if (!deps.src[i].wait_sb_mask)
|
||||
continue;
|
||||
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i);
|
||||
struct panvk_cs_state *cs_state = &cmdbuf->state.cs[i];
|
||||
|
||||
cs_wait_slots(b, deps.src[i].wait_sb_mask, false);
|
||||
|
||||
struct panvk_cache_flush_info cache_flush = deps.src[i].cache_flush;
|
||||
if (cache_flush.l2 != MALI_CS_FLUSH_MODE_NONE ||
|
||||
cache_flush.lsc != MALI_CS_FLUSH_MODE_NONE || cache_flush.others) {
|
||||
struct cs_index flush_id = cs_scratch_reg32(b, 0);
|
||||
|
||||
cs_move32_to(b, flush_id, 0);
|
||||
cs_flush_caches(b, cache_flush.l2, cache_flush.lsc, cache_flush.others,
|
||||
flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
}
|
||||
|
||||
/* If no one waits on us, there's no point signaling the sync object. */
|
||||
if (wait_subqueue_mask & BITFIELD_BIT(i)) {
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index add_val = cs_scratch_reg64(b, 2);
|
||||
|
||||
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * i);
|
||||
cs_move64_to(b, add_val, 1);
|
||||
cs_sync64_add(b, false, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr,
|
||||
cs_now());
|
||||
++cs_state->relative_sync_point;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
if (!deps.dst[i].wait_subqueue_mask)
|
||||
continue;
|
||||
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i);
|
||||
for (uint32_t j = 0; j < PANVK_SUBQUEUE_COUNT; j++) {
|
||||
if (!(deps.dst[i].wait_subqueue_mask & BITFIELD_BIT(j)))
|
||||
continue;
|
||||
|
||||
struct panvk_cs_state *cs_state = &cmdbuf->state.cs[j];
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index wait_val = cs_scratch_reg64(b, 2);
|
||||
|
||||
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * j);
|
||||
|
||||
cs_add64(b, wait_val, cs_progress_seqno_reg(b, j),
|
||||
cs_state->relative_sync_point);
|
||||
cs_sync64_wait(b, false, MALI_CS_CONDITION_GREATER, wait_val,
|
||||
sync_addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
|
||||
enum panvk_subqueue_id subqueue)
|
||||
{
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, subqueue);
|
||||
struct cs_index iter_sb = cs_scratch_reg32(b, 0);
|
||||
struct cs_index cmp_scratch = cs_scratch_reg32(b, 1);
|
||||
|
||||
cs_load32_to(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
cs_match(b, iter_sb, cmp_scratch) {
|
||||
#define CASE(x) \
|
||||
cs_case(b, x) { \
|
||||
cs_wait_slot(b, SB_ITER(x), false); \
|
||||
cs_set_scoreboard_entry(b, SB_ITER(x), SB_ID(LS)); \
|
||||
}
|
||||
|
||||
CASE(0)
|
||||
CASE(1)
|
||||
CASE(2)
|
||||
CASE(3)
|
||||
CASE(4)
|
||||
#undef CASE
|
||||
}
|
||||
}
|
||||
|
||||
static struct cs_buffer
|
||||
alloc_cs_buffer(void *cookie)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf = cookie;
|
||||
const unsigned capacity = 64 * 1024 / sizeof(uint64_t);
|
||||
|
||||
struct panfrost_ptr ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, cs, capacity * 8, 64);
|
||||
|
||||
return (struct cs_buffer){
|
||||
.cpu = ptr.cpu,
|
||||
.gpu = ptr.gpu,
|
||||
.capacity = capacity,
|
||||
};
|
||||
}
|
||||
|
||||
static enum cs_reg_perm
|
||||
cs_reg_perm(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
struct panvk_cs_state *cs_state =
|
||||
container_of(b, struct panvk_cs_state, builder);
|
||||
struct panvk_cs_reg_upd_context *upd_ctx;
|
||||
|
||||
for (upd_ctx = cs_state->reg_access.upd_ctx_stack; upd_ctx;
|
||||
upd_ctx = upd_ctx->next) {
|
||||
if (upd_ctx->reg_perm(b, reg) == CS_REG_RW)
|
||||
return CS_REG_RW;
|
||||
}
|
||||
|
||||
return cs_state->reg_access.base_perm(b, reg);
|
||||
}
|
||||
|
||||
static void
|
||||
init_cs_builders(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_instance *instance =
|
||||
to_panvk_instance(dev->vk.physical->instance);
|
||||
const reg_perm_cb_t base_reg_perms[PANVK_SUBQUEUE_COUNT] = {
|
||||
[PANVK_SUBQUEUE_VERTEX_TILER] = panvk_cs_vt_reg_perm,
|
||||
[PANVK_SUBQUEUE_FRAGMENT] = panvk_cs_frag_reg_perm,
|
||||
[PANVK_SUBQUEUE_COMPUTE] = panvk_cs_compute_reg_perm,
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(cmdbuf->state.cs); i++) {
|
||||
/* Lazy allocation of the root CS. */
|
||||
struct cs_buffer root_cs = {0};
|
||||
|
||||
struct cs_builder_conf conf = {
|
||||
.nr_registers = 96,
|
||||
.nr_kernel_registers = 4,
|
||||
.alloc_buffer = alloc_cs_buffer,
|
||||
.cookie = cmdbuf,
|
||||
};
|
||||
|
||||
if (instance->debug_flags & PANVK_DEBUG_CS) {
|
||||
cmdbuf->state.cs[i].ls_tracker = (struct cs_load_store_tracker){
|
||||
.sb_slot = SB_ID(LS),
|
||||
};
|
||||
|
||||
conf.ls_tracker = &cmdbuf->state.cs[i].ls_tracker;
|
||||
|
||||
cmdbuf->state.cs[i].reg_access.upd_ctx_stack = NULL;
|
||||
cmdbuf->state.cs[i].reg_access.base_perm = base_reg_perms[i];
|
||||
conf.reg_perm = cs_reg_perm;
|
||||
}
|
||||
|
||||
cs_builder_init(&cmdbuf->state.cs[i].builder, &conf, root_cs);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
|
||||
VkCommandBufferResetFlags flags)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
struct panvk_cmd_pool *pool =
|
||||
container_of(vk_cmdbuf->pool, struct panvk_cmd_pool, vk);
|
||||
|
||||
vk_command_buffer_reset(&cmdbuf->vk);
|
||||
|
||||
panvk_pool_reset(&cmdbuf->cs_pool);
|
||||
panvk_pool_reset(&cmdbuf->desc_pool);
|
||||
panvk_pool_reset(&cmdbuf->tls_pool);
|
||||
list_splicetail(&cmdbuf->push_sets, &pool->push_sets);
|
||||
list_inithead(&cmdbuf->push_sets);
|
||||
|
||||
memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
|
||||
init_cs_builders(cmdbuf);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
struct panvk_cmd_pool *pool =
|
||||
container_of(vk_cmdbuf->pool, struct panvk_cmd_pool, vk);
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
|
||||
panvk_pool_cleanup(&cmdbuf->cs_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->desc_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->tls_pool);
|
||||
list_splicetail(&cmdbuf->push_sets, &pool->push_sets);
|
||||
vk_command_buffer_finish(&cmdbuf->vk);
|
||||
vk_free(&dev->vk.alloc, cmdbuf);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
|
||||
struct vk_command_buffer **cmdbuf_out)
|
||||
{
|
||||
struct panvk_device *device =
|
||||
container_of(vk_pool->base.device, struct panvk_device, vk);
|
||||
struct panvk_cmd_pool *pool =
|
||||
container_of(vk_pool, struct panvk_cmd_pool, vk);
|
||||
struct panvk_cmd_buffer *cmdbuf;
|
||||
|
||||
cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!cmdbuf)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
VkResult result = vk_command_buffer_init(
|
||||
&pool->vk, &cmdbuf->vk, &panvk_per_arch(cmd_buffer_ops), level);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&device->vk.alloc, cmdbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
list_inithead(&cmdbuf->push_sets);
|
||||
cmdbuf->vk.dynamic_graphics_state.vi = &cmdbuf->state.gfx.dynamic.vi;
|
||||
cmdbuf->vk.dynamic_graphics_state.ms.sample_locations =
|
||||
&cmdbuf->state.gfx.dynamic.sl;
|
||||
|
||||
struct panvk_pool_properties cs_pool_props = {
|
||||
.create_flags = 0,
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "Command buffer CS pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->cs_pool, device, &pool->cs_bo_pool, &cs_pool_props);
|
||||
|
||||
struct panvk_pool_properties desc_pool_props = {
|
||||
.create_flags = 0,
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "Command buffer descriptor pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool,
|
||||
&desc_pool_props);
|
||||
|
||||
struct panvk_pool_properties tls_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "TLS pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool,
|
||||
&tls_pool_props);
|
||||
|
||||
init_cs_builders(cmdbuf);
|
||||
*cmdbuf_out = &cmdbuf->vk;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
|
||||
.create = panvk_create_cmdbuf,
|
||||
.reset = panvk_reset_cmdbuf,
|
||||
.destroy = panvk_destroy_cmdbuf,
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_instance *instance =
|
||||
to_panvk_instance(dev->vk.physical->instance);
|
||||
|
||||
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
|
||||
cmdbuf->flags = pBeginInfo->flags;
|
||||
|
||||
/* The descriptor ringbuf trips out pandecode because we always point to the
|
||||
* next tiler/framebuffer descriptor after CS execution, which means we're
|
||||
* decoding an uninitialized or stale descriptor.
|
||||
* FIXME: find a way to trace the simultaneous path that doesn't crash. One
|
||||
* option would be to disable CS intepretation and dump the RUN_xxx context
|
||||
* on the side at execution time.
|
||||
*/
|
||||
if (instance->debug_flags & PANVK_DEBUG_TRACE)
|
||||
cmdbuf->flags &= ~VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
285
src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
Normal file
285
src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
*
|
||||
* Derived from tu_cmd_buffer.c which is:
|
||||
* Copyright © 2016 Red Hat.
|
||||
* Copyright © 2016 Bas Nieuwenhuizen
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_meta.h"
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_meta.h"
|
||||
#include "panvk_physical_device.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_props.h"
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
static VkResult
|
||||
prepare_driver_set(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_shader_desc_state *cs_desc_state =
|
||||
&cmdbuf->state.compute.cs.desc;
|
||||
|
||||
if (cs_desc_state->driver_set.dev_addr)
|
||||
return VK_SUCCESS;
|
||||
|
||||
const struct panvk_descriptor_state *desc_state =
|
||||
&cmdbuf->state.compute.desc_state;
|
||||
const struct panvk_shader *cs = cmdbuf->state.compute.shader;
|
||||
uint32_t desc_count = cs->desc_info.dyn_bufs.count + 1;
|
||||
struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
|
||||
struct panvk_opaque_desc *descs = driver_set.cpu;
|
||||
|
||||
if (!driver_set.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
/* Dummy sampler always comes first. */
|
||||
pan_pack(&descs[0], SAMPLER, _) {
|
||||
}
|
||||
|
||||
panvk_per_arch(cmd_fill_dyn_bufs)(desc_state, cs,
|
||||
(struct mali_buffer_packed *)(&descs[1]));
|
||||
|
||||
cs_desc_state->driver_set.dev_addr = driver_set.gpu;
|
||||
cs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
cmdbuf->state.compute.push_uniforms = panvk_per_arch(
|
||||
cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.compute.sysvals,
|
||||
sizeof(cmdbuf->state.compute.sysvals));
|
||||
return cmdbuf->state.compute.push_uniforms ? VK_SUCCESS
|
||||
: VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
||||
uint32_t baseGroupX, uint32_t baseGroupY,
|
||||
uint32_t baseGroupZ, uint32_t groupCountX,
|
||||
uint32_t groupCountY, uint32_t groupCountZ)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
const struct panvk_shader *shader = cmdbuf->state.compute.shader;
|
||||
VkResult result;
|
||||
|
||||
/* If there's no compute shader, we can skip the dispatch. */
|
||||
if (!panvk_priv_mem_dev_addr(shader->spd))
|
||||
return;
|
||||
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_descriptor_state *desc_state =
|
||||
&cmdbuf->state.compute.desc_state;
|
||||
struct panvk_shader_desc_state *cs_desc_state =
|
||||
&cmdbuf->state.compute.cs.desc;
|
||||
|
||||
struct panfrost_ptr tsd = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
|
||||
if (!tsd.gpu)
|
||||
return;
|
||||
|
||||
struct pan_tls_info tlsinfo = {
|
||||
.tls.size = shader->info.tls_size,
|
||||
.wls.size = shader->info.wls_size,
|
||||
};
|
||||
unsigned core_id_range;
|
||||
|
||||
panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
|
||||
|
||||
if (tlsinfo.wls.size) {
|
||||
/* TODO: Clamp WLS instance to some maximum WLS budget. */
|
||||
struct pan_compute_dim dim = {groupCountX, groupCountY, groupCountZ};
|
||||
|
||||
tlsinfo.wls.instances = pan_wls_instances(&dim);
|
||||
|
||||
unsigned wls_total_size = pan_wls_adjust_size(tlsinfo.wls.size) *
|
||||
tlsinfo.wls.instances * core_id_range;
|
||||
|
||||
tlsinfo.wls.ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, tls, wls_total_size, 4096).gpu;
|
||||
if (!tlsinfo.wls.ptr)
|
||||
return;
|
||||
}
|
||||
|
||||
cmdbuf->state.tls.info.tls.size =
|
||||
MAX2(shader->info.tls_size, cmdbuf->state.tls.info.tls.size);
|
||||
|
||||
if (!cmdbuf->state.tls.desc.gpu) {
|
||||
cmdbuf->state.tls.desc = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
|
||||
if (!cmdbuf->state.tls.desc.gpu)
|
||||
return;
|
||||
}
|
||||
|
||||
GENX(pan_emit_tls)(&tlsinfo, tsd.cpu);
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_push_descs)(
|
||||
cmdbuf, desc_state, shader->desc_info.used_set_mask);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals;
|
||||
sysvals->num_work_groups.x = groupCountX;
|
||||
sysvals->num_work_groups.y = groupCountY;
|
||||
sysvals->num_work_groups.z = groupCountZ;
|
||||
sysvals->local_group_size.x = shader->local_size.x;
|
||||
sysvals->local_group_size.y = shader->local_size.y;
|
||||
sysvals->local_group_size.z = shader->local_size.z;
|
||||
|
||||
result = prepare_driver_set(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
cmdbuf->state.compute.push_uniforms = 0;
|
||||
result = prepare_push_uniforms(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_shader_res_table)(cmdbuf, desc_state,
|
||||
shader, cs_desc_state);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
unsigned task_increment = 0;
|
||||
|
||||
/* Copy the global TLS pointer to the per-job TSD. */
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd.gpu);
|
||||
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
|
||||
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
cs_update_compute_ctx(b) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, 0), cs_desc_state->res_table);
|
||||
uint32_t push_size = 256 + sizeof(struct panvk_compute_sysvals);
|
||||
uint64_t fau_count = DIV_ROUND_UP(push_size, 8);
|
||||
mali_ptr fau_ptr =
|
||||
cmdbuf->state.compute.push_uniforms | (fau_count << 56);
|
||||
cs_move64_to(b, cs_sr_reg64(b, 8), fau_ptr);
|
||||
cs_move64_to(b, cs_sr_reg64(b, 16), panvk_priv_mem_dev_addr(shader->spd));
|
||||
cs_move64_to(b, cs_sr_reg64(b, 24), tsd.gpu);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, 32), 0);
|
||||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||
cfg.workgroup_size_x = shader->local_size.x;
|
||||
cfg.workgroup_size_y = shader->local_size.y;
|
||||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, 33), wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 34), baseGroupX * shader->local_size.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 35), baseGroupY * shader->local_size.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 36), baseGroupZ * shader->local_size.z);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 37), groupCountX);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 38), groupCountY);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 39), groupCountZ);
|
||||
|
||||
/* Pick the task_axis and task_increment to maximize thread utilization. */
|
||||
unsigned threads_per_wg =
|
||||
shader->local_size.x * shader->local_size.y * shader->local_size.z;
|
||||
unsigned max_thread_cnt = panfrost_compute_max_thread_count(
|
||||
&phys_dev->kmod.props, shader->info.work_reg_count);
|
||||
unsigned threads_per_task = threads_per_wg;
|
||||
unsigned local_size[3] = {
|
||||
shader->local_size.x,
|
||||
shader->local_size.y,
|
||||
shader->local_size.z,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (threads_per_task * local_size[i] >= max_thread_cnt) {
|
||||
/* We reached out thread limit, stop at the current axis and
|
||||
* calculate the increment so it doesn't exceed the per-core
|
||||
* thread capacity.
|
||||
*/
|
||||
task_increment = max_thread_cnt / threads_per_task;
|
||||
break;
|
||||
} else if (task_axis == MALI_TASK_AXIS_Z) {
|
||||
/* We reached the Z axis, and there's still room to stuff more
|
||||
* threads. Pick the current axis grid size as our increment
|
||||
* as there's no point using something bigger.
|
||||
*/
|
||||
task_increment = local_size[i];
|
||||
break;
|
||||
}
|
||||
|
||||
threads_per_task *= local_size[i];
|
||||
task_axis++;
|
||||
}
|
||||
}
|
||||
|
||||
assert(task_axis <= MALI_TASK_AXIS_Z);
|
||||
assert(task_increment > 0);
|
||||
|
||||
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
|
||||
cs_req_res(b, CS_COMPUTE_RES);
|
||||
cs_run_compute(b, task_increment, task_axis, false,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
cs_req_res(b, 0);
|
||||
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
|
||||
struct cs_index cmp_scratch = cs_scratch_reg32(b, 3);
|
||||
struct cs_index add_val = cs_scratch_reg64(b, 4);
|
||||
|
||||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||
cs_move64_to(b, add_val, 1);
|
||||
|
||||
cs_match(b, iter_sb, cmp_scratch) {
|
||||
#define CASE(x) \
|
||||
cs_case(b, x) { \
|
||||
cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, \
|
||||
add_val, sync_addr, \
|
||||
cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \
|
||||
cs_move32_to(b, iter_sb, next_iter_sb(x)); \
|
||||
}
|
||||
|
||||
CASE(0)
|
||||
CASE(1)
|
||||
CASE(2)
|
||||
CASE(3)
|
||||
CASE(4)
|
||||
#undef CASE
|
||||
}
|
||||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer, VkDeviceSize offset)
|
||||
{
|
||||
panvk_stub();
|
||||
}
|
||||
2121
src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
Normal file
2121
src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
Normal file
File diff suppressed because it is too large
Load diff
745
src/panfrost/vulkan/csf/panvk_vX_queue.c
Normal file
745
src/panfrost/vulkan/csf/panvk_vX_queue.c
Normal file
|
|
@ -0,0 +1,745 @@
|
|||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "drm-uapi/panthor_drm.h"
|
||||
|
||||
#include "genxml/cs_builder.h"
|
||||
#include "genxml/decode.h"
|
||||
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_macros.h"
|
||||
#include "panvk_queue.h"
|
||||
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "vk_log.h"
|
||||
|
||||
static void
|
||||
finish_render_desc_ringbuf(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
struct panvk_desc_ringbuf *ringbuf = &queue->render_desc_ringbuf;
|
||||
|
||||
panvk_pool_free_mem(&dev->mempools.rw, ringbuf->syncobj);
|
||||
|
||||
if (dev->debug.decode_ctx && ringbuf->addr.dev) {
|
||||
pandecode_inject_free(dev->debug.decode_ctx, ringbuf->addr.dev,
|
||||
RENDER_DESC_RINGBUF_SIZE);
|
||||
pandecode_inject_free(dev->debug.decode_ctx,
|
||||
ringbuf->addr.dev + RENDER_DESC_RINGBUF_SIZE,
|
||||
RENDER_DESC_RINGBUF_SIZE);
|
||||
}
|
||||
|
||||
if (ringbuf->addr.dev) {
|
||||
struct pan_kmod_vm_op op = {
|
||||
.type = PAN_KMOD_VM_OP_TYPE_UNMAP,
|
||||
.va = {
|
||||
.start = ringbuf->addr.dev,
|
||||
.size = RENDER_DESC_RINGBUF_SIZE * 2,
|
||||
},
|
||||
};
|
||||
|
||||
ASSERTED int ret =
|
||||
pan_kmod_vm_bind(dev->kmod.vm, PAN_KMOD_VM_OP_MODE_IMMEDIATE, &op, 1);
|
||||
assert(!ret);
|
||||
}
|
||||
|
||||
if (ringbuf->addr.host) {
|
||||
ASSERTED int ret =
|
||||
os_munmap(ringbuf->addr.host, RENDER_DESC_RINGBUF_SIZE);
|
||||
assert(!ret);
|
||||
}
|
||||
|
||||
pan_kmod_bo_put(ringbuf->bo);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
init_render_desc_ringbuf(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(queue->vk.base.device->physical);
|
||||
uint32_t flags = panvk_device_adjust_bo_flags(dev, PAN_KMOD_BO_FLAG_NO_MMAP);
|
||||
struct panvk_desc_ringbuf *ringbuf = &queue->render_desc_ringbuf;
|
||||
const size_t size = RENDER_DESC_RINGBUF_SIZE;
|
||||
VkResult result;
|
||||
int ret;
|
||||
|
||||
ringbuf->bo = pan_kmod_bo_alloc(dev->kmod.dev, dev->kmod.vm, size, flags);
|
||||
if (!ringbuf->bo)
|
||||
return vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to create a descriptor ring buffer context");
|
||||
|
||||
if (!(flags & PAN_KMOD_BO_FLAG_NO_MMAP)) {
|
||||
ringbuf->addr.host = pan_kmod_bo_mmap(
|
||||
ringbuf->bo, 0, size, PROT_READ | PROT_WRITE, MAP_SHARED, NULL);
|
||||
if (ringbuf->addr.host == MAP_FAILED) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to CPU map ringbuf BO");
|
||||
goto err_finish_ringbuf;
|
||||
}
|
||||
}
|
||||
|
||||
/* We choose the alignment to guarantee that we won't ever cross a 4G
|
||||
* boundary when accessing the mapping. This way we can encode the wraparound
|
||||
* using 32-bit operations. */
|
||||
uint64_t dev_addr = util_vma_heap_alloc(&dev->as.heap, size * 2, size * 2);
|
||||
|
||||
struct pan_kmod_vm_op vm_ops[] = {
|
||||
{
|
||||
.type = PAN_KMOD_VM_OP_TYPE_MAP,
|
||||
.va = {
|
||||
.start = dev_addr,
|
||||
.size = RENDER_DESC_RINGBUF_SIZE,
|
||||
},
|
||||
.map = {
|
||||
.bo = ringbuf->bo,
|
||||
.bo_offset = 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
.type = PAN_KMOD_VM_OP_TYPE_MAP,
|
||||
.va = {
|
||||
.start = dev_addr + RENDER_DESC_RINGBUF_SIZE,
|
||||
.size = RENDER_DESC_RINGBUF_SIZE,
|
||||
},
|
||||
.map = {
|
||||
.bo = ringbuf->bo,
|
||||
.bo_offset = 0,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
ret = pan_kmod_vm_bind(dev->kmod.vm, PAN_KMOD_VM_OP_MODE_IMMEDIATE, vm_ops,
|
||||
ARRAY_SIZE(vm_ops));
|
||||
if (ret) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to GPU map ringbuf BO");
|
||||
goto err_finish_ringbuf;
|
||||
}
|
||||
|
||||
ringbuf->addr.dev = dev_addr;
|
||||
|
||||
if (dev->debug.decode_ctx) {
|
||||
pandecode_inject_mmap(dev->debug.decode_ctx, ringbuf->addr.dev,
|
||||
ringbuf->addr.host, RENDER_DESC_RINGBUF_SIZE, NULL);
|
||||
pandecode_inject_mmap(dev->debug.decode_ctx,
|
||||
ringbuf->addr.dev + RENDER_DESC_RINGBUF_SIZE,
|
||||
ringbuf->addr.host, RENDER_DESC_RINGBUF_SIZE, NULL);
|
||||
}
|
||||
|
||||
struct panvk_pool_alloc_info alloc_info = {
|
||||
.size = sizeof(struct panvk_cs_sync32),
|
||||
.alignment = 64,
|
||||
};
|
||||
|
||||
ringbuf->syncobj = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
|
||||
|
||||
struct panvk_cs_sync32 *syncobj = panvk_priv_mem_host_addr(ringbuf->syncobj);
|
||||
|
||||
if (!syncobj) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to create the render desc ringbuf context");
|
||||
goto err_finish_ringbuf;
|
||||
}
|
||||
|
||||
*syncobj = (struct panvk_cs_sync32){
|
||||
.seqno = RENDER_DESC_RINGBUF_SIZE,
|
||||
};
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_finish_ringbuf:
|
||||
finish_render_desc_ringbuf(queue);
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
init_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
struct panvk_subqueue *subq = &queue->subqueues[subqueue];
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(queue->vk.base.device->physical);
|
||||
struct panvk_instance *instance =
|
||||
to_panvk_instance(dev->vk.physical->instance);
|
||||
unsigned debug = instance->debug_flags;
|
||||
struct panvk_cs_sync64 *syncobjs = panvk_priv_mem_host_addr(queue->syncobjs);
|
||||
|
||||
struct panvk_pool_alloc_info alloc_info = {
|
||||
.size = sizeof(struct panvk_cs_subqueue_context),
|
||||
.alignment = 64,
|
||||
};
|
||||
|
||||
subq->context = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
|
||||
if (!panvk_priv_mem_host_addr(subq->context))
|
||||
return vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to create a queue context");
|
||||
|
||||
struct panvk_cs_subqueue_context *cs_ctx =
|
||||
panvk_priv_mem_host_addr(subq->context);
|
||||
|
||||
*cs_ctx = (struct panvk_cs_subqueue_context){
|
||||
.syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs),
|
||||
.debug_syncobjs = panvk_priv_mem_dev_addr(queue->debug_syncobjs),
|
||||
.iter_sb = 0,
|
||||
};
|
||||
|
||||
/* We use the geometry buffer for our temporary CS buffer. */
|
||||
struct cs_buffer root_cs = {
|
||||
.cpu = panvk_priv_mem_host_addr(queue->tiler_heap.desc) + 4096,
|
||||
.gpu = panvk_priv_mem_dev_addr(queue->tiler_heap.desc) + 4096,
|
||||
.capacity = 64 * 1024 / sizeof(uint64_t),
|
||||
};
|
||||
const struct cs_builder_conf conf = {
|
||||
.nr_registers = 96,
|
||||
.nr_kernel_registers = 4,
|
||||
};
|
||||
struct cs_builder b;
|
||||
|
||||
assert(panvk_priv_mem_dev_addr(queue->tiler_heap.desc) != 0);
|
||||
|
||||
cs_builder_init(&b, &conf, root_cs);
|
||||
/* Pass the context to through r62. */
|
||||
cs_move64_to(&b, cs_subqueue_ctx_reg(&b),
|
||||
panvk_priv_mem_dev_addr(subq->context));
|
||||
|
||||
/* Intialize scoreboard slots used for asynchronous operations. */
|
||||
cs_set_scoreboard_entry(&b, SB_ITER(0), SB_ID(LS));
|
||||
|
||||
/* We do greater than test on sync objects, and given the reference seqno
|
||||
* registers are all zero at init time, we need to initialize all syncobjs
|
||||
* with a seqno of one. */
|
||||
syncobjs[subqueue].seqno = 1;
|
||||
|
||||
if (subqueue != PANVK_SUBQUEUE_COMPUTE) {
|
||||
cs_ctx->render.tiler_heap =
|
||||
panvk_priv_mem_dev_addr(queue->tiler_heap.desc);
|
||||
/* Our geometry buffer comes 4k after the tiler heap, and we encode the
|
||||
* size in the lower 12 bits so the address can be copied directly
|
||||
* to the tiler descriptors. */
|
||||
cs_ctx->render.geom_buf =
|
||||
(cs_ctx->render.tiler_heap + 4096) | ((64 * 1024) >> 12);
|
||||
|
||||
/* Initialize the ringbuf */
|
||||
cs_ctx->render.desc_ringbuf = (struct panvk_cs_desc_ringbuf){
|
||||
.syncobj = panvk_priv_mem_dev_addr(queue->render_desc_ringbuf.syncobj),
|
||||
.ptr = queue->render_desc_ringbuf.addr.dev,
|
||||
.pos = 0,
|
||||
};
|
||||
|
||||
struct cs_index heap_ctx_addr = cs_scratch_reg64(&b, 0);
|
||||
|
||||
/* Pre-set the heap context on the vertex-tiler/fragment queues. */
|
||||
cs_move64_to(&b, heap_ctx_addr, queue->tiler_heap.context.dev_addr);
|
||||
cs_heap_set(&b, heap_ctx_addr);
|
||||
}
|
||||
|
||||
cs_finish(&b);
|
||||
|
||||
assert(cs_is_valid(&b));
|
||||
|
||||
struct drm_panthor_sync_op syncop = {
|
||||
.flags =
|
||||
DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ | DRM_PANTHOR_SYNC_OP_SIGNAL,
|
||||
.handle = queue->syncobj_handle,
|
||||
.timeline_value = 0,
|
||||
};
|
||||
struct drm_panthor_queue_submit qsubmit = {
|
||||
.queue_index = subqueue,
|
||||
.stream_size = cs_root_chunk_size(&b),
|
||||
.stream_addr = cs_root_chunk_gpu_addr(&b),
|
||||
.latest_flush = 0,
|
||||
.syncs = DRM_PANTHOR_OBJ_ARRAY(1, &syncop),
|
||||
};
|
||||
struct drm_panthor_group_submit gsubmit = {
|
||||
.group_handle = queue->group_handle,
|
||||
.queue_submits = DRM_PANTHOR_OBJ_ARRAY(1, &qsubmit),
|
||||
};
|
||||
|
||||
int ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANTHOR_GROUP_SUBMIT, &gsubmit);
|
||||
if (ret)
|
||||
return vk_errorf(dev->vk.physical, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to initialized subqueue: %m");
|
||||
|
||||
ret = drmSyncobjWait(dev->vk.drm_fd, &queue->syncobj_handle, 1, INT64_MAX, 0,
|
||||
NULL);
|
||||
if (ret)
|
||||
return vk_errorf(dev->vk.physical, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"SyncobjWait failed: %m");
|
||||
|
||||
if (debug & PANVK_DEBUG_TRACE) {
|
||||
uint32_t regs[256] = {0};
|
||||
|
||||
pandecode_cs(dev->debug.decode_ctx, qsubmit.stream_addr,
|
||||
qsubmit.stream_size, phys_dev->kmod.props.gpu_prod_id, regs);
|
||||
pandecode_next_frame(dev->debug.decode_ctx);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_queue(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++)
|
||||
panvk_pool_free_mem(&dev->mempools.rw, queue->subqueues[i].context);
|
||||
|
||||
finish_render_desc_ringbuf(queue);
|
||||
|
||||
panvk_pool_free_mem(&dev->mempools.rw, queue->debug_syncobjs);
|
||||
panvk_pool_free_mem(&dev->mempools.rw, queue->syncobjs);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
init_queue(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(queue->vk.base.device->physical);
|
||||
struct panvk_instance *instance =
|
||||
to_panvk_instance(dev->vk.physical->instance);
|
||||
VkResult result;
|
||||
|
||||
struct panvk_pool_alloc_info alloc_info = {
|
||||
.size =
|
||||
ALIGN_POT(sizeof(struct panvk_cs_sync64), 64) * PANVK_SUBQUEUE_COUNT,
|
||||
.alignment = 64,
|
||||
};
|
||||
|
||||
queue->syncobjs = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
|
||||
if (!panvk_priv_mem_host_addr(queue->syncobjs))
|
||||
return vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to allocate subqueue sync objects");
|
||||
|
||||
if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) {
|
||||
alloc_info.size =
|
||||
ALIGN_POT(sizeof(struct panvk_cs_sync32), 64) * PANVK_SUBQUEUE_COUNT,
|
||||
queue->debug_syncobjs =
|
||||
panvk_pool_alloc_mem(&dev->mempools.rw_nc, alloc_info);
|
||||
if (!panvk_priv_mem_host_addr(queue->debug_syncobjs)) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to allocate subqueue sync objects");
|
||||
goto err_cleanup_queue;
|
||||
}
|
||||
}
|
||||
|
||||
result = init_render_desc_ringbuf(queue);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_cleanup_queue;
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
result = init_subqueue(queue, i);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_cleanup_queue;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_cleanup_queue:
|
||||
cleanup_queue(queue);
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
create_group(struct panvk_queue *queue)
|
||||
{
|
||||
const struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(queue->vk.base.device->physical);
|
||||
|
||||
struct drm_panthor_queue_create qc[] = {
|
||||
[PANVK_SUBQUEUE_VERTEX_TILER] =
|
||||
{
|
||||
.priority = 1,
|
||||
.ringbuf_size = 64 * 1024,
|
||||
},
|
||||
[PANVK_SUBQUEUE_FRAGMENT] =
|
||||
{
|
||||
.priority = 1,
|
||||
.ringbuf_size = 64 * 1024,
|
||||
},
|
||||
[PANVK_SUBQUEUE_COMPUTE] =
|
||||
{
|
||||
.priority = 1,
|
||||
.ringbuf_size = 64 * 1024,
|
||||
},
|
||||
};
|
||||
|
||||
struct drm_panthor_group_create gc = {
|
||||
.compute_core_mask = phys_dev->kmod.props.shader_present,
|
||||
.fragment_core_mask = phys_dev->kmod.props.shader_present,
|
||||
.tiler_core_mask = 1,
|
||||
.max_compute_cores = util_bitcount64(phys_dev->kmod.props.shader_present),
|
||||
.max_fragment_cores =
|
||||
util_bitcount64(phys_dev->kmod.props.shader_present),
|
||||
.max_tiler_cores = 1,
|
||||
.priority = PANTHOR_GROUP_PRIORITY_MEDIUM,
|
||||
.queues = DRM_PANTHOR_OBJ_ARRAY(ARRAY_SIZE(qc), qc),
|
||||
.vm_id = pan_kmod_vm_handle(dev->kmod.vm),
|
||||
};
|
||||
|
||||
int ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANTHOR_GROUP_CREATE, &gc);
|
||||
if (ret)
|
||||
return vk_errorf(phys_dev, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to create a scheduling group");
|
||||
|
||||
queue->group_handle = gc.group_handle;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
destroy_group(struct panvk_queue *queue)
|
||||
{
|
||||
const struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
struct drm_panthor_group_destroy gd = {
|
||||
.group_handle = queue->group_handle,
|
||||
};
|
||||
|
||||
ASSERTED int ret =
|
||||
drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANTHOR_GROUP_DESTROY, &gd);
|
||||
assert(!ret);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
init_tiler(struct panvk_queue *queue)
|
||||
{
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(queue->vk.base.device->physical);
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
struct panvk_tiler_heap *tiler_heap = &queue->tiler_heap;
|
||||
VkResult result;
|
||||
|
||||
/* We allocate the tiler heap descriptor and geometry buffer in one go,
|
||||
* so we can pass it through a single 64-bit register to the VERTEX_TILER
|
||||
* command streams. */
|
||||
struct panvk_pool_alloc_info alloc_info = {
|
||||
.size = (64 * 1024) + 4096,
|
||||
.alignment = 4096,
|
||||
};
|
||||
|
||||
tiler_heap->desc = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
|
||||
if (!panvk_priv_mem_host_addr(tiler_heap->desc)) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to create a tiler heap context");
|
||||
goto err_free_desc;
|
||||
}
|
||||
|
||||
tiler_heap->chunk_size = 2 * 1024 * 1024;
|
||||
|
||||
struct drm_panthor_tiler_heap_create thc = {
|
||||
.vm_id = pan_kmod_vm_handle(dev->kmod.vm),
|
||||
.chunk_size = tiler_heap->chunk_size,
|
||||
.initial_chunk_count = 5,
|
||||
.max_chunks = 64,
|
||||
.target_in_flight = 65535,
|
||||
};
|
||||
|
||||
int ret =
|
||||
drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE, &thc);
|
||||
if (ret) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to create a tiler heap context");
|
||||
goto err_free_desc;
|
||||
}
|
||||
|
||||
tiler_heap->context.handle = thc.handle;
|
||||
tiler_heap->context.dev_addr = thc.tiler_heap_ctx_gpu_va;
|
||||
|
||||
pan_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP, cfg) {
|
||||
cfg.size = tiler_heap->chunk_size;
|
||||
cfg.base = thc.first_heap_chunk_gpu_va;
|
||||
cfg.bottom = cfg.base + 64;
|
||||
cfg.top = cfg.base + cfg.size;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_free_desc:
|
||||
panvk_pool_free_mem(&dev->mempools.rw, tiler_heap->desc);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_tiler(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
struct panvk_tiler_heap *tiler_heap = &queue->tiler_heap;
|
||||
struct drm_panthor_tiler_heap_destroy thd = {
|
||||
.handle = tiler_heap->context.handle,
|
||||
};
|
||||
ASSERTED int ret =
|
||||
drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY, &thd);
|
||||
assert(!ret);
|
||||
|
||||
panvk_pool_free_mem(&dev->mempools.rw, tiler_heap->desc);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
panvk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
||||
{
|
||||
struct panvk_queue *queue = container_of(vk_queue, struct panvk_queue, vk);
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(queue->vk.base.device->physical);
|
||||
VkResult result = VK_SUCCESS;
|
||||
int ret;
|
||||
|
||||
if (vk_queue_is_lost(&queue->vk))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
struct panvk_instance *instance =
|
||||
to_panvk_instance(dev->vk.physical->instance);
|
||||
unsigned debug = instance->debug_flags;
|
||||
bool force_sync = debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC);
|
||||
uint32_t qsubmit_count = 0;
|
||||
uint32_t used_queue_mask = 0;
|
||||
for (uint32_t i = 0; i < submit->command_buffer_count; i++) {
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(submit->command_buffers[i], struct panvk_cmd_buffer, vk);
|
||||
|
||||
for (uint32_t j = 0; j < ARRAY_SIZE(cmdbuf->state.cs); j++) {
|
||||
assert(cs_is_valid(&cmdbuf->state.cs[j].builder));
|
||||
if (!cs_is_empty(&cmdbuf->state.cs[j].builder)) {
|
||||
used_queue_mask |= BITFIELD_BIT(j);
|
||||
qsubmit_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Synchronize all subqueues if we have no command buffer submitted. */
|
||||
if (!qsubmit_count)
|
||||
used_queue_mask = BITFIELD_MASK(PANVK_SUBQUEUE_COUNT);
|
||||
|
||||
/* We add sync-only queue submits to place our wait/signal operations. */
|
||||
if (submit->wait_count > 0)
|
||||
qsubmit_count += util_bitcount(used_queue_mask);
|
||||
|
||||
if (submit->signal_count > 0)
|
||||
qsubmit_count += util_bitcount(used_queue_mask);
|
||||
|
||||
uint32_t syncop_count = submit->wait_count + util_bitcount(used_queue_mask);
|
||||
|
||||
STACK_ARRAY(struct drm_panthor_queue_submit, qsubmits, qsubmit_count);
|
||||
STACK_ARRAY(struct drm_panthor_sync_op, syncops, syncop_count);
|
||||
struct drm_panthor_sync_op *wait_ops = syncops;
|
||||
struct drm_panthor_sync_op *signal_ops = syncops + submit->wait_count;
|
||||
|
||||
qsubmit_count = 0;
|
||||
if (submit->wait_count) {
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
assert(vk_sync_type_is_drm_syncobj(submit->waits[i].sync->type));
|
||||
struct vk_drm_syncobj *syncobj =
|
||||
vk_sync_as_drm_syncobj(submit->waits[i].sync);
|
||||
|
||||
wait_ops[i] = (struct drm_panthor_sync_op){
|
||||
.flags = (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE
|
||||
? DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ
|
||||
: DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ) |
|
||||
DRM_PANTHOR_SYNC_OP_WAIT,
|
||||
.handle = syncobj->syncobj,
|
||||
.timeline_value = submit->waits[i].wait_value,
|
||||
};
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
if (used_queue_mask & BITFIELD_BIT(i)) {
|
||||
qsubmits[qsubmit_count++] = (struct drm_panthor_queue_submit){
|
||||
.queue_index = i,
|
||||
.syncs = DRM_PANTHOR_OBJ_ARRAY(submit->wait_count, wait_ops),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->command_buffer_count; i++) {
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(submit->command_buffers[i], struct panvk_cmd_buffer, vk);
|
||||
|
||||
for (uint32_t j = 0; j < ARRAY_SIZE(cmdbuf->state.cs); j++) {
|
||||
if (cs_is_empty(&cmdbuf->state.cs[j].builder))
|
||||
continue;
|
||||
|
||||
qsubmits[qsubmit_count++] = (struct drm_panthor_queue_submit){
|
||||
.queue_index = j,
|
||||
.stream_size = cs_root_chunk_size(&cmdbuf->state.cs[j].builder),
|
||||
.stream_addr = cs_root_chunk_gpu_addr(&cmdbuf->state.cs[j].builder),
|
||||
.latest_flush = 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (submit->signal_count || force_sync) {
|
||||
uint32_t signal_op = 0;
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
if (used_queue_mask & BITFIELD_BIT(i)) {
|
||||
signal_ops[signal_op] = (struct drm_panthor_sync_op){
|
||||
.flags = DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ |
|
||||
DRM_PANTHOR_SYNC_OP_SIGNAL,
|
||||
.handle = queue->syncobj_handle,
|
||||
.timeline_value = signal_op + 1,
|
||||
};
|
||||
|
||||
qsubmits[qsubmit_count++] = (struct drm_panthor_queue_submit){
|
||||
.queue_index = i,
|
||||
.syncs = DRM_PANTHOR_OBJ_ARRAY(1, &signal_ops[signal_op++]),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (force_sync) {
|
||||
struct panvk_cs_sync32 *debug_syncs =
|
||||
panvk_priv_mem_host_addr(queue->debug_syncobjs);
|
||||
|
||||
assert(debug_syncs);
|
||||
memset(debug_syncs, 0, sizeof(*debug_syncs) * PANVK_SUBQUEUE_COUNT);
|
||||
}
|
||||
|
||||
struct drm_panthor_group_submit gsubmit = {
|
||||
.group_handle = queue->group_handle,
|
||||
.queue_submits = DRM_PANTHOR_OBJ_ARRAY(qsubmit_count, qsubmits),
|
||||
};
|
||||
|
||||
ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANTHOR_GROUP_SUBMIT, &gsubmit);
|
||||
if (ret) {
|
||||
result = vk_queue_set_lost(&queue->vk, "GROUP_SUBMIT: %m");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (submit->signal_count || force_sync) {
|
||||
if (force_sync) {
|
||||
uint64_t point = util_bitcount(used_queue_mask);
|
||||
ret = drmSyncobjTimelineWait(dev->vk.drm_fd, &queue->syncobj_handle,
|
||||
&point, 1, INT64_MAX,
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
|
||||
assert(!ret);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
assert(vk_sync_type_is_drm_syncobj(submit->signals[i].sync->type));
|
||||
struct vk_drm_syncobj *syncobj =
|
||||
vk_sync_as_drm_syncobj(submit->signals[i].sync);
|
||||
|
||||
drmSyncobjTransfer(dev->vk.drm_fd, syncobj->syncobj,
|
||||
submit->signals[i].signal_value,
|
||||
queue->syncobj_handle, 0, 0);
|
||||
}
|
||||
|
||||
drmSyncobjReset(dev->vk.drm_fd, &queue->syncobj_handle, 1);
|
||||
}
|
||||
|
||||
if (debug & PANVK_DEBUG_TRACE) {
|
||||
for (uint32_t i = 0; i < qsubmit_count; i++) {
|
||||
if (!qsubmits[i].stream_size)
|
||||
continue;
|
||||
|
||||
uint32_t subqueue = qsubmits[i].queue_index;
|
||||
uint32_t regs[256] = {0};
|
||||
uint64_t ctx =
|
||||
panvk_priv_mem_dev_addr(queue->subqueues[subqueue].context);
|
||||
|
||||
regs[PANVK_CS_REG_SUBQUEUE_CTX_START] = ctx;
|
||||
regs[PANVK_CS_REG_SUBQUEUE_CTX_START + 1] = ctx >> 32;
|
||||
|
||||
simple_mtx_lock(&dev->debug.decode_ctx->lock);
|
||||
pandecode_dump_file_open(dev->debug.decode_ctx);
|
||||
pandecode_log(dev->debug.decode_ctx, "CS%d\n",
|
||||
qsubmits[i].queue_index);
|
||||
simple_mtx_unlock(&dev->debug.decode_ctx->lock);
|
||||
pandecode_cs(dev->debug.decode_ctx, qsubmits[i].stream_addr,
|
||||
qsubmits[i].stream_size, phys_dev->kmod.props.gpu_prod_id,
|
||||
regs);
|
||||
}
|
||||
}
|
||||
|
||||
if (debug & PANVK_DEBUG_DUMP)
|
||||
pandecode_dump_mappings(dev->debug.decode_ctx);
|
||||
|
||||
if (force_sync) {
|
||||
struct panvk_cs_sync32 *debug_syncs =
|
||||
panvk_priv_mem_host_addr(queue->debug_syncobjs);
|
||||
uint32_t debug_sync_points[PANVK_SUBQUEUE_COUNT] = {0};
|
||||
|
||||
for (uint32_t i = 0; i < qsubmit_count; i++) {
|
||||
if (qsubmits[i].stream_size)
|
||||
debug_sync_points[qsubmits[i].queue_index]++;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
|
||||
if (debug_syncs[i].seqno != debug_sync_points[i] ||
|
||||
debug_syncs[i].error != 0)
|
||||
assert(!"Incomplete job or timeout\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (debug & PANVK_DEBUG_TRACE)
|
||||
pandecode_next_frame(dev->debug.decode_ctx);
|
||||
|
||||
out:
|
||||
STACK_ARRAY_FINISH(syncops);
|
||||
STACK_ARRAY_FINISH(qsubmits);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(queue_init)(struct panvk_device *dev, struct panvk_queue *queue,
|
||||
int idx, const VkDeviceQueueCreateInfo *create_info)
|
||||
{
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
|
||||
VkResult result = vk_queue_init(&queue->vk, &dev->vk, create_info, idx);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
int ret = drmSyncobjCreate(dev->vk.drm_fd, 0, &queue->syncobj_handle);
|
||||
if (ret) {
|
||||
result = vk_errorf(phys_dev, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to create our internal sync object");
|
||||
goto err_finish_queue;
|
||||
}
|
||||
|
||||
result = init_tiler(queue);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_destroy_syncobj;
|
||||
|
||||
result = create_group(queue);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_cleanup_tiler;
|
||||
|
||||
result = init_queue(queue);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_destroy_group;
|
||||
|
||||
queue->vk.driver_submit = panvk_queue_submit;
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_destroy_group:
|
||||
destroy_group(queue);
|
||||
|
||||
err_cleanup_tiler:
|
||||
cleanup_tiler(queue);
|
||||
|
||||
err_destroy_syncobj:
|
||||
drmSyncobjDestroy(dev->vk.drm_fd, queue->syncobj_handle);
|
||||
|
||||
err_finish_queue:
|
||||
vk_queue_finish(&queue->vk);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(queue_finish)(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
|
||||
destroy_group(queue);
|
||||
cleanup_tiler(queue);
|
||||
drmSyncobjDestroy(dev->vk.drm_fd, queue->syncobj_handle);
|
||||
vk_queue_finish(&queue->vk);
|
||||
}
|
||||
|
|
@ -57,6 +57,15 @@ jm_files = [
|
|||
'jm/panvk_vX_queue.c',
|
||||
]
|
||||
|
||||
csf_archs = [10]
|
||||
csf_inc_dir = ['csf']
|
||||
csf_files = [
|
||||
'csf/panvk_vX_cmd_buffer.c',
|
||||
'csf/panvk_vX_cmd_dispatch.c',
|
||||
'csf/panvk_vX_cmd_draw.c',
|
||||
'csf/panvk_vX_queue.c',
|
||||
]
|
||||
|
||||
common_per_arch_files = [
|
||||
panvk_entrypoints[0],
|
||||
'panvk_vX_blend.c',
|
||||
|
|
@ -74,21 +83,7 @@ common_per_arch_files = [
|
|||
]
|
||||
|
||||
foreach arch : [6, 7, 10]
|
||||
if arch in valhall_archs
|
||||
# We're lacking key components to compile common source files for v9/v10.
|
||||
# Just add the v10 entrypoints for now.
|
||||
per_arch_files = [
|
||||
panvk_entrypoints[0],
|
||||
'panvk_vX_blend.c',
|
||||
'panvk_vX_descriptor_set.c',
|
||||
'panvk_vX_descriptor_set_layout.c',
|
||||
'panvk_vX_nir_lower_descriptors.c',
|
||||
'panvk_vX_shader.c',
|
||||
]
|
||||
else
|
||||
per_arch_files = common_per_arch_files
|
||||
endif
|
||||
|
||||
per_arch_files = common_per_arch_files
|
||||
inc_panvk_per_arch = []
|
||||
|
||||
if arch in bifrost_archs
|
||||
|
|
@ -102,6 +97,9 @@ foreach arch : [6, 7, 10]
|
|||
if arch in jm_archs
|
||||
inc_panvk_per_arch += jm_inc_dir
|
||||
per_arch_files += jm_files
|
||||
elif arch in csf_archs
|
||||
inc_panvk_per_arch += csf_inc_dir
|
||||
per_arch_files += csf_files
|
||||
endif
|
||||
|
||||
panvk_per_arch_libs += static_library(
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ static const struct debug_control panvk_debug_options[] = {
|
|||
{"linear", PANVK_DEBUG_LINEAR},
|
||||
{"dump", PANVK_DEBUG_DUMP},
|
||||
{"no_known_warn", PANVK_DEBUG_NO_KNOWN_WARN},
|
||||
{"cs", PANVK_DEBUG_CS},
|
||||
{NULL, 0}};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ enum panvk_debug_flags {
|
|||
PANVK_DEBUG_LINEAR = 1 << 5,
|
||||
PANVK_DEBUG_DUMP = 1 << 6,
|
||||
PANVK_DEBUG_NO_KNOWN_WARN = 1 << 7,
|
||||
PANVK_DEBUG_CS = 1 << 8,
|
||||
};
|
||||
|
||||
#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
|
||||
|
|
|
|||
|
|
@ -29,11 +29,7 @@
|
|||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
/* FIXME: make the include statement unconditional when the CSF command buffer
|
||||
* logic is implemented. */
|
||||
#if PAN_ARCH <= 7
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#endif
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_instance.h"
|
||||
#include "panvk_mempool.h"
|
||||
|
|
@ -1333,9 +1329,6 @@ static const struct vk_shader_ops panvk_shader_ops = {
|
|||
panvk_shader_get_executable_internal_representations,
|
||||
};
|
||||
|
||||
/* FIXME: make this unconditional when the CSF command buffer logic is
|
||||
* implemented. */
|
||||
#if PAN_ARCH <= 7
|
||||
static void
|
||||
panvk_cmd_bind_shader(struct panvk_cmd_buffer *cmd, const gl_shader_stage stage,
|
||||
struct panvk_shader *shader)
|
||||
|
|
@ -1380,7 +1373,6 @@ panvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, uint32_t stage_count,
|
|||
panvk_cmd_bind_shader(cmd, stages[i], shader);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct vk_device_shader_ops panvk_per_arch(device_shader_ops) = {
|
||||
.get_nir_options = panvk_get_nir_options,
|
||||
|
|
@ -1390,10 +1382,5 @@ const struct vk_device_shader_ops panvk_per_arch(device_shader_ops) = {
|
|||
.compile = panvk_compile_shaders,
|
||||
.deserialize = panvk_deserialize_shader,
|
||||
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
|
||||
|
||||
/* FIXME: make the assignment unconditional when the CSF command buffer logic is
|
||||
* implemented. */
|
||||
#if PAN_ARCH <= 7
|
||||
.cmd_bind_shaders = panvk_cmd_bind_shaders,
|
||||
#endif
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue