panvk: Prepare panvk_draw_prepare_varyings for JM indirect

On JM hardware, we need to allocate a buffer depending on vertex count.

As a result, for indirect and indexed draw we allocate a large buffer with
alloc on fault set.

The size of that buffer is calculated assuming a max of 2 millions vertices
and 18 attributes per vertex (16 user attributes, 2 specials)

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Olivia Lee <olivia.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35724>
This commit is contained in:
Mary Guillemard 2025-06-24 18:50:35 +02:00 committed by Marge Bot
parent de41eaf2d8
commit e74372c1c4
5 changed files with 63 additions and 8 deletions

View file

@ -76,6 +76,7 @@ struct panvk_draw_data {
struct {
uint64_t attribs;
uint64_t attrib_bufs;
uint64_t varying_bufs;
} indirect_info;
};
@ -441,6 +442,7 @@ static VkResult
panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_data *draw)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
const struct panvk_shader_variant *vs =
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
const struct panvk_shader_link *link = &cmdbuf->state.gfx.link;
@ -455,18 +457,44 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
bool writes_point_size =
vs->info.vs.writes_point_size &&
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
unsigned vertex_count =
draw->padded_vertex_count * draw->info.instance.count;
uint64_t psiz_buf = 0;
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
unsigned buf_size = vertex_count * link->buf_strides[i];
uint64_t buf_addr =
buf_size ? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu
: 0;
if (buf_size && !buf_addr)
if (is_indirect_draw(draw) &&
!cmdbuf->state.gfx.vs.indirect_varying_bufs_infos) {
struct pan_ptr bufs_info_storage = panvk_cmd_alloc_dev_mem(
cmdbuf, desc, sizeof(struct libpan_draw_helper_varying_buf_info), 8);
if (!bufs_info_storage.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
cmdbuf->state.gfx.vs.indirect_varying_bufs_infos = bufs_info_storage.gpu;
struct libpan_draw_helper_varying_buf_info *vary_bufs_info =
bufs_info_storage.cpu;
vary_bufs_info->address = dev->indirect_varying_buffer->addr.dev;
vary_bufs_info->size = PANVK_JM_MAX_PER_VTX_ATTRIBUTES_INDIRECT_SIZE *
PANVK_JM_MAX_VERTICES_INDIRECT;
vary_bufs_info->offset = 0;
}
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
uint32_t buf_size;
uint64_t buf_addr;
if (is_indirect_draw(draw)) {
buf_addr = dev->indirect_varying_buffer->addr.dev;
buf_size = 0;
} else {
buf_size = draw->padded_vertex_count * draw->info.instance.count *
link->buf_strides[i];
buf_addr =
buf_size
? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu
: 0;
if (buf_size && !buf_addr)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) {
cfg.stride = link->buf_strides[i];
cfg.size = buf_size;
@ -493,6 +521,8 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
draw->line_width = 1.0f;
draw->varying_bufs = bufs.gpu;
draw->indirect_info.varying_bufs =
cmdbuf->state.gfx.vs.indirect_varying_bufs_infos;
draw->vs.varyings = panvk_priv_mem_dev_addr(link->vs.attribs);
draw->fs.varyings = panvk_priv_mem_dev_addr(link->fs.attribs);
return VK_SUCCESS;

View file

@ -151,6 +151,7 @@ struct panvk_cmd_graphics_state {
uint64_t attrib_bufs;
uint64_t indirect_attribs_infos;
uint64_t indirect_attrib_bufs_infos;
uint64_t indirect_varying_bufs_infos;
bool previous_draw_was_indirect;
#endif
} vs;

View file

@ -27,6 +27,18 @@
#include "util/u_printf.h"
#include "util/vma.h"
/* On JM hardware, we need to allocate a buffer depending on vertex count.
*
* As a result, for indirect and indexed draw we allocate a large buffer with
* alloc on fault set.
*
* The size of that buffer is calculated assuming a max of 2 millions vertices
* and 18 attributes per vertex (16 user attributes, 2 specials)
*/
#define PANVK_JM_MAX_VERTICES_INDIRECT (2000000)
#define PANVK_JM_MAX_PER_VTX_ATTRIBUTES_INDIRECT_SIZE (18 * 4)
struct panvk_precomp_cache;
struct panvk_device_draw_context;
@ -55,6 +67,7 @@ struct panvk_device {
} kmod;
struct panvk_priv_bo *tiler_heap;
struct panvk_priv_bo *indirect_varying_buffer;
struct panvk_priv_bo *sample_positions;
struct {

View file

@ -174,6 +174,7 @@ panvk_per_arch(cmd_meta_gfx_end)(
cmdbuf->state.gfx.vs.attrib_bufs = 0;
cmdbuf->state.gfx.vs.indirect_attribs_infos = 0;
cmdbuf->state.gfx.vs.indirect_attrib_bufs_infos = 0;
cmdbuf->state.gfx.vs.indirect_varying_bufs_infos = 0;
cmdbuf->state.gfx.fs.rsd = 0;
#else
cmdbuf->state.gfx.fs.desc.res_table = 0;

View file

@ -426,6 +426,14 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &device->tiler_heap);
if (result != VK_SUCCESS)
goto err_free_priv_bos;
result = panvk_priv_bo_create(
device,
PANVK_JM_MAX_VERTICES_INDIRECT * PANVK_JM_MAX_PER_VTX_ATTRIBUTES_INDIRECT_SIZE,
PAN_KMOD_BO_FLAG_NO_MMAP | PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &device->indirect_varying_buffer);
if (result != VK_SUCCESS)
goto err_free_priv_bos;
#endif
result = panvk_priv_bo_create(
@ -533,6 +541,7 @@ err_free_priv_bos:
panvk_priv_bo_unref(device->printf.bo);
panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
panvk_priv_bo_unref(device->sample_positions);
panvk_priv_bo_unref(device->indirect_varying_buffer);
panvk_priv_bo_unref(device->tiler_heap);
panvk_device_cleanup_mempools(device);
vk_free(&device->vk.alloc, device->dump_region_size);
@ -578,6 +587,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
u_printf_destroy(&device->printf.ctx);
panvk_priv_bo_unref(device->printf.bo);
panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
panvk_priv_bo_unref(device->indirect_varying_buffer);
panvk_priv_bo_unref(device->tiler_heap);
panvk_priv_bo_unref(device->sample_positions);
panvk_device_cleanup_mempools(device);