panvk: Use WB mappings for the global RW and executable memory pools
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

This implies relying on all users of these pools to do the flushing
explicitly.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36385>
This commit is contained in:
Boris Brezillon 2025-09-17 11:11:40 +02:00
parent 2dd27c647b
commit c0d982751c
10 changed files with 255 additions and 222 deletions

View file

@ -326,8 +326,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
return 0; return 0;
} }
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, panvk_priv_mem_write_desc(shader->rsd, 0, RENDERER_STATE, cfg) {
cfg) {
pan_shader_prepare_rsd(&shader->info, pan_shader_prepare_rsd(&shader->info,
panvk_priv_mem_dev_addr(shader->code_mem), &cfg); panvk_priv_mem_dev_addr(shader->code_mem), &cfg);
} }

View file

@ -34,8 +34,10 @@ panvk_per_arch(CreateEvent)(VkDevice _device,
return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
} }
memset(panvk_priv_mem_host_addr(event->syncobjs), 0, panvk_priv_mem_write_array(event->syncobjs, 0, struct panvk_cs_sync32,
sizeof(struct panvk_cs_sync32) * PANVK_SUBQUEUE_COUNT); PANVK_SUBQUEUE_COUNT, sobjs) {
memset(sobjs, 0, sizeof(struct panvk_cs_sync32) * PANVK_SUBQUEUE_COUNT);
}
*pEvent = panvk_event_to_handle(event); *pEvent = panvk_event_to_handle(event);
return VK_SUCCESS; return VK_SUCCESS;
@ -61,11 +63,12 @@ panvk_per_arch(GetEventStatus)(VkDevice _device, VkEvent _event)
{ {
VK_FROM_HANDLE(panvk_event, event, _event); VK_FROM_HANDLE(panvk_event, event, _event);
struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); panvk_priv_mem_readback_array(event->syncobjs, 0, struct panvk_cs_sync32,
PANVK_SUBQUEUE_COUNT, syncobjs) {
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
if (!syncobjs[i].seqno) if (!syncobjs[i].seqno)
return VK_EVENT_RESET; return VK_EVENT_RESET;
}
} }
return VK_EVENT_SET; return VK_EVENT_SET;
@ -76,10 +79,11 @@ panvk_per_arch(SetEvent)(VkDevice _device, VkEvent _event)
{ {
VK_FROM_HANDLE(panvk_event, event, _event); VK_FROM_HANDLE(panvk_event, event, _event);
struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); panvk_priv_mem_write_array(event->syncobjs, 0, struct panvk_cs_sync32,
PANVK_SUBQUEUE_COUNT, syncobjs) {
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++)
syncobjs[i].seqno = 1; syncobjs[i].seqno = 1;
}
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -89,8 +93,10 @@ panvk_per_arch(ResetEvent)(VkDevice _device, VkEvent _event)
{ {
VK_FROM_HANDLE(panvk_event, event, _event); VK_FROM_HANDLE(panvk_event, event, _event);
struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); panvk_priv_mem_write_array(event->syncobjs, 0, struct panvk_cs_sync32,
PANVK_SUBQUEUE_COUNT, syncobjs) {
memset(syncobjs, 0, sizeof(*syncobjs) * PANVK_SUBQUEUE_COUNT);
}
memset(syncobjs, 0, sizeof(*syncobjs) * PANVK_SUBQUEUE_COUNT);
return VK_SUCCESS; return VK_SUCCESS;
} }

View file

@ -168,11 +168,11 @@ init_render_desc_ringbuf(struct panvk_gpu_queue *queue)
return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to create the render desc ringbuf context"); "Failed to create the render desc ringbuf context");
struct panvk_cs_sync32 *syncobj = panvk_priv_mem_host_addr(ringbuf->syncobj); panvk_priv_mem_write(ringbuf->syncobj, 0, struct panvk_cs_sync32, syncobj) {
*syncobj = (struct panvk_cs_sync32){
*syncobj = (struct panvk_cs_sync32){ .seqno = RENDER_DESC_RINGBUF_SIZE,
.seqno = RENDER_DESC_RINGBUF_SIZE, };
}; }
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -350,7 +350,6 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
struct panvk_subqueue *subq = &queue->subqueues[subqueue]; struct panvk_subqueue *subq = &queue->subqueues[subqueue];
const struct panvk_physical_device *phys_dev = const struct panvk_physical_device *phys_dev =
to_panvk_physical_device(queue->vk.base.device->physical); to_panvk_physical_device(queue->vk.base.device->physical);
struct panvk_cs_sync64 *syncobjs = panvk_priv_mem_host_addr(queue->syncobjs);
VkResult result = init_subqueue_tracing(queue, subqueue); VkResult result = init_subqueue_tracing(queue, subqueue);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@ -401,6 +400,8 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
assert(cs_is_valid(&b)); assert(cs_is_valid(&b));
subq->req_resource.cs_buffer_size = cs_root_chunk_size(&b); subq->req_resource.cs_buffer_size = cs_root_chunk_size(&b);
subq->req_resource.cs_buffer_addr = cs_root_chunk_gpu_addr(&b); subq->req_resource.cs_buffer_addr = cs_root_chunk_gpu_addr(&b);
panvk_priv_mem_flush(subq->req_resource.buf, 0,
subq->req_resource.cs_buffer_size);
alloc_info.size = sizeof(struct panvk_cs_subqueue_context); alloc_info.size = sizeof(struct panvk_cs_subqueue_context);
alloc_info.alignment = 64; alloc_info.alignment = 64;
@ -410,25 +411,43 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to create a queue context"); "Failed to create a queue context");
struct panvk_cs_subqueue_context *cs_ctx = panvk_priv_mem_write(subq->context, 0, struct panvk_cs_subqueue_context,
panvk_priv_mem_host_addr(subq->context); cs_ctx) {
*cs_ctx = (struct panvk_cs_subqueue_context){
*cs_ctx = (struct panvk_cs_subqueue_context){ .syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs),
.syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs), .debug.tracebuf.cs = subq->tracebuf.addr.dev,
.debug.tracebuf.cs = subq->tracebuf.addr.dev,
#if PAN_ARCH == 10 #if PAN_ARCH == 10
/* On the VT/COMPUTE queue, the first iter_sb will skipped since /* On the VT/COMPUTE queue, the first iter_sb will skipped since
* cs_next_iter_sb() is called before the first use, but that's okay, * cs_next_iter_sb() is called before the first use, but that's okay,
* because the next slot will be equally free, and the skipped one will * because the next slot will be equally free, and the skipped one will
* be re-used at some point. * be re-used at some point.
* On the fragment queue, we increment the iterator when the * On the fragment queue, we increment the iterator when the
* FINISH_FRAGMENT job is issued, which is why we need this value * FINISH_FRAGMENT job is issued, which is why we need this value
* to point to a valid+free scoreboard from the start. * to point to a valid+free scoreboard from the start.
*/ */
.iter_sb = SB_ITER(0), .iter_sb = SB_ITER(0),
#endif #endif
.reg_dump_addr = panvk_priv_mem_dev_addr(subq->regs_save), .reg_dump_addr = panvk_priv_mem_dev_addr(subq->regs_save),
}; };
if (subqueue != PANVK_SUBQUEUE_COMPUTE) {
cs_ctx->render.tiler_heap =
panvk_priv_mem_dev_addr(queue->tiler_heap.desc);
/* Our geometry buffer comes 4k after the tiler heap, and we encode the
* size in the lower 12 bits so the address can be copied directly
* to the tiler descriptors. */
cs_ctx->render.geom_buf =
(cs_ctx->render.tiler_heap + 4096) | ((64 * 1024) >> 12);
/* Initialize the ringbuf */
cs_ctx->render.desc_ringbuf = (struct panvk_cs_desc_ringbuf){
.syncobj =
panvk_priv_mem_dev_addr(queue->render_desc_ringbuf.syncobj),
.ptr = queue->render_desc_ringbuf.addr.dev,
.pos = 0,
};
}
}
/* We use the geometry buffer for our temporary CS buffer. */ /* We use the geometry buffer for our temporary CS buffer. */
root_cs = (struct cs_buffer){ root_cs = (struct cs_buffer){
@ -465,24 +484,13 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
/* We do greater than test on sync objects, and given the reference seqno /* We do greater than test on sync objects, and given the reference seqno
* registers are all zero at init time, we need to initialize all syncobjs * registers are all zero at init time, we need to initialize all syncobjs
* with a seqno of one. */ * with a seqno of one. */
syncobjs[subqueue].seqno = 1; panvk_priv_mem_write(queue->syncobjs,
subqueue * sizeof(struct panvk_cs_sync64),
struct panvk_cs_sync64, syncobj) {
syncobj->seqno = 1;
}
if (subqueue != PANVK_SUBQUEUE_COMPUTE) { if (subqueue != PANVK_SUBQUEUE_COMPUTE) {
cs_ctx->render.tiler_heap =
panvk_priv_mem_dev_addr(queue->tiler_heap.desc);
/* Our geometry buffer comes 4k after the tiler heap, and we encode the
* size in the lower 12 bits so the address can be copied directly
* to the tiler descriptors. */
cs_ctx->render.geom_buf =
(cs_ctx->render.tiler_heap + 4096) | ((64 * 1024) >> 12);
/* Initialize the ringbuf */
cs_ctx->render.desc_ringbuf = (struct panvk_cs_desc_ringbuf){
.syncobj = panvk_priv_mem_dev_addr(queue->render_desc_ringbuf.syncobj),
.ptr = queue->render_desc_ringbuf.addr.dev,
.pos = 0,
};
struct cs_index heap_ctx_addr = cs_scratch_reg64(&b, 0); struct cs_index heap_ctx_addr = cs_scratch_reg64(&b, 0);
/* Pre-set the heap context on the vertex-tiler/fragment queues. */ /* Pre-set the heap context on the vertex-tiler/fragment queues. */
@ -493,6 +501,8 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
assert(cs_is_valid(&b)); assert(cs_is_valid(&b));
panvk_priv_mem_flush(queue->tiler_heap.desc, 4096, cs_root_chunk_size(&b));
struct drm_panthor_sync_op syncop = { struct drm_panthor_sync_op syncop = {
.flags = .flags =
DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ | DRM_PANTHOR_SYNC_OP_SIGNAL, DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ | DRM_PANTHOR_SYNC_OP_SIGNAL,
@ -700,8 +710,7 @@ init_tiler(struct panvk_gpu_queue *queue)
tiler_heap->context.handle = thc.handle; tiler_heap->context.handle = thc.handle;
tiler_heap->context.dev_addr = thc.tiler_heap_ctx_gpu_va; tiler_heap->context.dev_addr = thc.tiler_heap_ctx_gpu_va;
pan_cast_and_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP, panvk_priv_mem_write_desc(tiler_heap->desc, 0, TILER_HEAP, cfg) {
cfg) {
cfg.size = tiler_heap->chunk_size; cfg.size = tiler_heap->chunk_size;
cfg.base = thc.first_heap_chunk_gpu_va; cfg.base = thc.first_heap_chunk_gpu_va;
cfg.bottom = cfg.base + 64; cfg.bottom = cfg.base + 64;
@ -1125,16 +1134,17 @@ panvk_queue_submit_ioctl(struct panvk_queue_submit *submit)
/* If we're tracing, we need to reset the desc ringbufs and the CS /* If we're tracing, we need to reset the desc ringbufs and the CS
* tracebuf. */ * tracebuf. */
for (uint32_t i = 0; i < ARRAY_SIZE(queue->subqueues); i++) { for (uint32_t i = 0; i < ARRAY_SIZE(queue->subqueues); i++) {
struct panvk_cs_subqueue_context *ctx = panvk_priv_mem_rmw(queue->subqueues[i].context, 0,
panvk_priv_mem_host_addr(queue->subqueues[i].context); struct panvk_cs_subqueue_context, ctx) {
if (ctx->render.desc_ringbuf.ptr) {
ctx->render.desc_ringbuf.ptr =
queue->render_desc_ringbuf.addr.dev;
ctx->render.desc_ringbuf.pos = 0;
}
if (ctx->render.desc_ringbuf.ptr) { if (ctx->debug.tracebuf.cs)
ctx->render.desc_ringbuf.ptr = queue->render_desc_ringbuf.addr.dev; ctx->debug.tracebuf.cs = queue->subqueues[i].tracebuf.addr.dev;
ctx->render.desc_ringbuf.pos = 0;
} }
if (ctx->debug.tracebuf.cs)
ctx->debug.tracebuf.cs = queue->subqueues[i].tracebuf.addr.dev;
} }
} }
@ -1235,28 +1245,30 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit,
} }
for (uint32_t i = 0; i < ARRAY_SIZE(queue->subqueues); i++) { for (uint32_t i = 0; i < ARRAY_SIZE(queue->subqueues); i++) {
struct panvk_cs_subqueue_context *ctx = panvk_priv_mem_readback(queue->subqueues[i].context, 0,
panvk_priv_mem_host_addr(queue->subqueues[i].context); struct panvk_cs_subqueue_context, ctx) {
size_t trace_size = trace_size =
ctx->debug.tracebuf.cs - queue->subqueues[i].tracebuf.addr.dev;
size_t trace_size = if (trace_size) {
ctx->debug.tracebuf.cs - queue->subqueues[i].tracebuf.addr.dev; assert(
if (!trace_size) trace_size <= queue->subqueues[i].tracebuf.size ||
continue; !"OOB access on the CS tracebuf, pass a bigger PANVK_CS_TRACEBUF_SIZE");
assert( assert(
trace_size <= queue->subqueues[i].tracebuf.size || !ctx->render.desc_ringbuf.ptr ||
!"OOB access on the CS tracebuf, pass a bigger PANVK_CS_TRACEBUF_SIZE"); ctx->render.desc_ringbuf.pos <=
queue->render_desc_ringbuf.size ||
!"OOB access on the desc tracebuf, pass a bigger PANVK_DESC_TRACEBUF_SIZE");
assert( uint64_t trace = queue->subqueues[i].tracebuf.addr.dev;
!ctx->render.desc_ringbuf.ptr ||
ctx->render.desc_ringbuf.pos <= queue->render_desc_ringbuf.size ||
!"OOB access on the desc tracebuf, pass a bigger PANVK_DESC_TRACEBUF_SIZE");
uint64_t trace = queue->subqueues[i].tracebuf.addr.dev; pandecode_user_msg(decode_ctx, "\nCS traces on subqueue %d\n\n",
i);
pandecode_user_msg(decode_ctx, "\nCS traces on subqueue %d\n\n", i); pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_id);
pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_id); pandecode_user_msg(decode_ctx, "\n");
pandecode_user_msg(decode_ctx, "\n"); }
}
} }
} }
@ -1407,10 +1419,11 @@ panvk_per_arch(gpu_queue_check_status)(struct vk_queue *vk_queue)
/* check for CS error and treat it as device lost */ /* check for CS error and treat it as device lost */
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
const struct panvk_cs_subqueue_context *subq_ctx = panvk_priv_mem_readback(queue->subqueues[i].context, 0,
panvk_priv_mem_host_addr(queue->subqueues[i].context); struct panvk_cs_subqueue_context, subq_ctx) {
if (subq_ctx->last_error != 0) if (subq_ctx->last_error != 0)
return vk_queue_set_lost(&queue->vk, "CS_FAULT"); return vk_queue_set_lost(&queue->vk, "CS_FAULT");
}
} }
int ret = pan_kmod_ioctl(dev->drm_fd, DRM_IOCTL_PANTHOR_GROUP_GET_STATE, int ret = pan_kmod_ioctl(dev->drm_fd, DRM_IOCTL_PANTHOR_GROUP_GET_STATE,

View file

@ -47,38 +47,31 @@ struct panvk_query_pool {
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_query_pool, vk.base, VkQueryPool, VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_query_pool, vk.base, VkQueryPool,
VK_OBJECT_TYPE_QUERY_POOL) VK_OBJECT_TYPE_QUERY_POOL)
static uint64_t static inline uint32_t
panvk_query_available_offset(struct panvk_query_pool *pool, uint32_t query)
{
assert(query < pool->vk.query_count);
return query * sizeof(struct panvk_query_available_obj);
}
static inline uint64_t
panvk_query_available_dev_addr(struct panvk_query_pool *pool, uint32_t query) panvk_query_available_dev_addr(struct panvk_query_pool *pool, uint32_t query)
{ {
assert(query < pool->vk.query_count); return panvk_priv_mem_dev_addr(pool->available_mem) +
return panvk_priv_mem_dev_addr(pool->available_mem) + query * sizeof(struct panvk_query_available_obj); panvk_query_available_offset(pool, query);
} }
static struct panvk_query_available_obj * static inline uint64_t
panvk_query_available_host_addr(struct panvk_query_pool *pool, uint32_t query)
{
assert(query < pool->vk.query_count);
return (struct panvk_query_available_obj *)panvk_priv_mem_host_addr(pool->available_mem) + query;
}
static uint64_t
panvk_query_offset(struct panvk_query_pool *pool, uint32_t query) panvk_query_offset(struct panvk_query_pool *pool, uint32_t query)
{ {
assert(query < pool->vk.query_count); assert(query < pool->vk.query_count);
return query * (uint64_t)pool->query_stride; return query * (uint64_t)pool->query_stride;
} }
static uint64_t static inline uint64_t
panvk_query_report_dev_addr(struct panvk_query_pool *pool, uint32_t query) panvk_query_report_dev_addr(struct panvk_query_pool *pool, uint32_t query)
{ {
return panvk_priv_mem_dev_addr(pool->mem) + panvk_query_offset(pool, query); return panvk_priv_mem_dev_addr(pool->mem) + panvk_query_offset(pool, query);
} }
static struct panvk_query_report *
panvk_query_report_host_addr(struct panvk_query_pool *pool, uint32_t query)
{
return (void *)((char *)panvk_priv_mem_host_addr(pool->mem) +
panvk_query_offset(pool, query));
}
#endif #endif

View file

@ -70,11 +70,14 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
if (!panvk_priv_mem_check_alloc(view->mem)) if (!panvk_priv_mem_check_alloc(view->mem))
return panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
struct pan_ptr ptr = { panvk_priv_mem_write(view->mem, 0, struct mali_surface_with_stride_packed, sd) {
.gpu = panvk_priv_mem_dev_addr(view->mem), struct pan_ptr ptr = {
.cpu = panvk_priv_mem_host_addr(view->mem), .gpu = panvk_priv_mem_dev_addr(view->mem),
}; .cpu = sd,
GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr); };
GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr);
}
#endif #endif
} }

View file

@ -173,8 +173,7 @@ get_preload_shader(struct panvk_device *dev,
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
} }
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, panvk_priv_mem_write_desc(shader->spd, 0, SHADER_PROGRAM, cfg) {
cfg) {
cfg.stage = MALI_SHADER_STAGE_FRAGMENT; cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD; cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;

View file

@ -70,7 +70,8 @@ static void
panvk_device_init_mempools(struct panvk_device *dev) panvk_device_init_mempools(struct panvk_device *dev)
{ {
struct panvk_pool_properties rw_pool_props = { struct panvk_pool_properties rw_pool_props = {
.create_flags = 0, .create_flags =
panvk_device_adjust_bo_flags(dev, PAN_KMOD_BO_FLAG_WB_MMAP),
.slab_size = 16 * 1024, .slab_size = 16 * 1024,
.label = "Device RW cached memory pool", .label = "Device RW cached memory pool",
.owns_bos = false, .owns_bos = false,
@ -93,7 +94,8 @@ panvk_device_init_mempools(struct panvk_device *dev)
panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, NULL, &rw_nc_pool_props); panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, NULL, &rw_nc_pool_props);
struct panvk_pool_properties exec_pool_props = { struct panvk_pool_properties exec_pool_props = {
.create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, .create_flags = panvk_device_adjust_bo_flags(
dev, PAN_KMOD_BO_FLAG_EXECUTABLE | PAN_KMOD_BO_FLAG_WB_MMAP),
.slab_size = 16 * 1024, .slab_size = 16 * 1024,
.label = "Device executable memory pool (shaders)", .label = "Device executable memory pool (shaders)",
.owns_bos = false, .owns_bos = false,

View file

@ -153,69 +153,73 @@ prepare_tex_descs(struct panvk_image_view *view)
if (!panvk_priv_mem_check_alloc(view->mem)) if (!panvk_priv_mem_check_alloc(view->mem))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
struct pan_ptr ptr = { panvk_priv_mem_write_array(view->mem, 0, uint8_t, alloc_info.size, cpu_ptr) {
.gpu = panvk_priv_mem_dev_addr(view->mem), struct pan_ptr ptr = {
.cpu = panvk_priv_mem_host_addr(view->mem), .gpu = panvk_priv_mem_dev_addr(view->mem),
}; .cpu = cpu_ptr,
};
#if PAN_ARCH >= 9 #if PAN_ARCH >= 9
struct pan_ptr storage_ptr = ptr; struct pan_ptr storage_ptr = ptr;
if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
uint32_t storage_payload_offset = alloc_info.size - storage_payload_size; uint32_t storage_payload_offset =
storage_ptr.gpu += storage_payload_offset; alloc_info.size - storage_payload_size;
storage_ptr.cpu += storage_payload_offset; storage_ptr.gpu += storage_payload_offset;
} storage_ptr.cpu += storage_payload_offset;
}
#endif #endif
if (plane_count > 1) { if (plane_count > 1) {
memset(pview.planes, 0, sizeof(pview.planes)); memset(pview.planes, 0, sizeof(pview.planes));
for (uint32_t plane = 0; plane < plane_count; plane++) { for (uint32_t plane = 0; plane < plane_count; plane++) {
VkFormat plane_format = VkFormat plane_format =
vk_format_get_plane_format(view->vk.view_format, plane); vk_format_get_plane_format(view->vk.view_format, plane);
/* We need a per-plane pview. */ /* We need a per-plane pview. */
pview.planes[0] = view->pview.planes[plane]; pview.planes[0] = view->pview.planes[plane];
pview.format = vk_format_to_pipe_format(plane_format); pview.format = vk_format_to_pipe_format(plane_format);
GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[plane], &ptr); GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[plane],
&ptr);
#if PAN_ARCH >= 9 #if PAN_ARCH >= 9
if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
GENX(pan_storage_texture_emit)( GENX(pan_storage_texture_emit)(
&pview, &view->descs.storage_tex[plane], &storage_ptr); &pview, &view->descs.storage_tex[plane], &storage_ptr);
storage_ptr.cpu += tex_payload_size; storage_ptr.cpu += tex_payload_size;
storage_ptr.gpu += tex_payload_size; storage_ptr.gpu += tex_payload_size;
}
#endif
ptr.cpu += tex_payload_size;
ptr.gpu += tex_payload_size;
} }
} else {
GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[0], &ptr);
#if PAN_ARCH >= 9
if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)
GENX(pan_storage_texture_emit)(&pview, &view->descs.storage_tex[0],
&storage_ptr);
#endif #endif
}
if (can_preload_other_aspect) {
/* If the depth was present in the aspects mask, we've handled it
* already, so move on to the stencil. If it wasn't present, it's the
* stencil texture we create first, and we need t handle the depth here.
*/
pview.format = (view->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
? panvk_image_stencil_only_pfmt(image)
: panvk_image_depth_only_pfmt(image);
ptr.cpu += tex_payload_size; ptr.cpu += tex_payload_size;
ptr.gpu += tex_payload_size; ptr.gpu += tex_payload_size;
GENX(pan_sampled_texture_emit)(&pview,
&view->descs.zs.other_aspect_tex, &ptr);
} }
} else {
GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[0], &ptr);
#if PAN_ARCH >= 9
if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)
GENX(pan_storage_texture_emit)(&pview, &view->descs.storage_tex[0],
&storage_ptr);
#endif
} }
if (!can_preload_other_aspect)
return VK_SUCCESS;
/* If the depth was present in the aspects mask, we've handled it already, so
* move on to the stencil. If it wasn't present, it's the stencil texture we
* create first, and we need t handle the depth here.
*/
pview.format = (view->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
? panvk_image_stencil_only_pfmt(image)
: panvk_image_depth_only_pfmt(image);
ptr.cpu += tex_payload_size;
ptr.gpu += tex_payload_size;
GENX(pan_sampled_texture_emit)(&pview, &view->descs.zs.other_aspect_tex,
&ptr);
return VK_SUCCESS; return VK_SUCCESS;
} }

View file

@ -24,9 +24,11 @@ static void
reset_query_pool(struct panvk_query_pool *pool, uint32_t firstQuery, reset_query_pool(struct panvk_query_pool *pool, uint32_t firstQuery,
uint32_t queryCount) uint32_t queryCount)
{ {
struct panvk_query_available_obj *available = panvk_priv_mem_write_array(pool->available_mem,
panvk_query_available_host_addr(pool, firstQuery); panvk_query_available_offset(pool, firstQuery),
memset(available, 0, queryCount * sizeof(*available)); struct panvk_query_available_obj, queryCount,
available)
memset(available, 0, queryCount * sizeof(*available));
} }
VKAPI_ATTR VkResult VKAPI_CALL VKAPI_ATTR VkResult VKAPI_CALL
@ -131,14 +133,19 @@ panvk_per_arch(ResetQueryPool)(VkDevice device, VkQueryPool queryPool,
static bool static bool
panvk_query_is_available(struct panvk_query_pool *pool, uint32_t query) panvk_query_is_available(struct panvk_query_pool *pool, uint32_t query)
{ {
struct panvk_query_available_obj *available = bool res = false;
panvk_query_available_host_addr(pool, query);
panvk_priv_mem_readback(pool->available_mem,
panvk_query_available_offset(pool, query),
struct panvk_query_available_obj, available) {
#if PAN_ARCH >= 10 #if PAN_ARCH >= 10
return p_atomic_read(&available->sync_obj.seqno) != 0; res = p_atomic_read(&available->sync_obj.seqno) != 0;
#else #else
return p_atomic_read(&available->value) != 0; res = p_atomic_read(&available->value) != 0;
#endif #endif
}
return res;
} }
static VkResult static VkResult
@ -248,28 +255,29 @@ panvk_per_arch(GetQueryPoolResults)(VkDevice _device, VkQueryPool queryPool,
bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT); bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
const struct panvk_query_report *src =
panvk_query_report_host_addr(pool, query);
assert(i * stride < dataSize); assert(i * stride < dataSize);
void *dst = (char *)pData + i * stride; void *dst = (char *)pData + i * stride;
switch (pool->vk.query_type) { panvk_priv_mem_readback(pool->mem, panvk_query_offset(pool, query),
case VK_QUERY_TYPE_OCCLUSION: { struct panvk_query_report, src) {
if (write_results) switch (pool->vk.query_type) {
cpu_write_occlusion_query_result(dst, 0, flags, src, case VK_QUERY_TYPE_OCCLUSION: {
pool->reports_per_query); if (write_results)
break; cpu_write_occlusion_query_result(dst, 0, flags, src,
} pool->reports_per_query);
break;
}
#if PAN_ARCH >= 10 #if PAN_ARCH >= 10
case VK_QUERY_TYPE_TIMESTAMP: { case VK_QUERY_TYPE_TIMESTAMP: {
if (write_results) if (write_results)
cpu_write_timestamp_query_result(dst, 0, flags, src, cpu_write_timestamp_query_result(dst, 0, flags, src,
pool->reports_per_query); pool->reports_per_query);
break; break;
} }
#endif #endif
default: default:
UNREACHABLE("Unsupported query type"); UNREACHABLE("Unsupported query type");
}
} }
if (!write_results) if (!write_results)

View file

@ -1093,8 +1093,7 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->rsd)) if (!panvk_priv_mem_check_alloc(shader->rsd))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, panvk_priv_mem_write_desc(shader->rsd, 0, RENDERER_STATE, cfg) {
cfg) {
pan_shader_prepare_rsd(&shader->info, pan_shader_prepare_rsd(&shader->info,
panvk_shader_variant_get_dev_addr(shader), &cfg); panvk_shader_variant_get_dev_addr(shader), &cfg);
} }
@ -1104,8 +1103,7 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->spd)) if (!panvk_priv_mem_check_alloc(shader->spd))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, panvk_priv_mem_write_desc(shader->spd, 0, SHADER_PROGRAM, cfg) {
cfg) {
cfg.stage = pan_shader_stage(&shader->info); cfg.stage = pan_shader_stage(&shader->info);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@ -1131,8 +1129,8 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->spds.all_points)) if (!panvk_priv_mem_check_alloc(shader->spds.all_points))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.all_points), panvk_priv_mem_write_desc(shader->spds.all_points, 0, SHADER_PROGRAM,
SHADER_PROGRAM, cfg) { cfg) {
cfg.stage = pan_shader_stage(&shader->info); cfg.stage = pan_shader_stage(&shader->info);
cfg.register_allocation = cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count); pan_register_allocation(shader->info.work_reg_count);
@ -1146,8 +1144,8 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->spds.all_triangles)) if (!panvk_priv_mem_check_alloc(shader->spds.all_triangles))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.all_triangles), panvk_priv_mem_write_desc(shader->spds.all_triangles, 0, SHADER_PROGRAM,
SHADER_PROGRAM, cfg) { cfg) {
cfg.stage = pan_shader_stage(&shader->info); cfg.stage = pan_shader_stage(&shader->info);
cfg.register_allocation = cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count); pan_register_allocation(shader->info.work_reg_count);
@ -1162,8 +1160,8 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->spds.pos_points)) if (!panvk_priv_mem_check_alloc(shader->spds.pos_points))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_points), panvk_priv_mem_write_desc(shader->spds.pos_points, 0, SHADER_PROGRAM,
SHADER_PROGRAM, cfg) { cfg) {
cfg.stage = pan_shader_stage(&shader->info); cfg.stage = pan_shader_stage(&shader->info);
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
cfg.register_allocation = cfg.register_allocation =
@ -1178,8 +1176,8 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->spds.pos_triangles)) if (!panvk_priv_mem_check_alloc(shader->spds.pos_triangles))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles), panvk_priv_mem_write_desc(shader->spds.pos_triangles, 0, SHADER_PROGRAM,
SHADER_PROGRAM, cfg) { cfg) {
cfg.stage = pan_shader_stage(&shader->info); cfg.stage = pan_shader_stage(&shader->info);
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
cfg.register_allocation = cfg.register_allocation =
@ -1196,8 +1194,7 @@ panvk_shader_upload(struct panvk_device *dev,
if (!panvk_priv_mem_check_alloc(shader->spds.var)) if (!panvk_priv_mem_check_alloc(shader->spds.var))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.var), panvk_priv_mem_write_desc(shader->spds.var, 0, SHADER_PROGRAM, cfg) {
SHADER_PROGRAM, cfg) {
unsigned work_count = shader->info.vs.secondary_work_reg_count; unsigned work_count = shader->info.vs.secondary_work_reg_count;
cfg.stage = pan_shader_stage(&shader->info); cfg.stage = pan_shader_stage(&shader->info);
@ -1583,13 +1580,13 @@ shader_desc_info_deserialize(struct panvk_device *dev,
}; };
shader->desc_info.others.map = shader->desc_info.others.map =
panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info); panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
uint32_t *copy_table = if (!panvk_priv_mem_check_alloc(shader->desc_info.others.map))
panvk_priv_mem_host_addr(shader->desc_info.others.map);
if (!copy_table)
return panvk_error(shader, VK_ERROR_OUT_OF_DEVICE_MEMORY); return panvk_error(shader, VK_ERROR_OUT_OF_DEVICE_MEMORY);
blob_copy_bytes(blob, copy_table, others_count * sizeof(*copy_table)); panvk_priv_mem_write_array(shader->desc_info.others.map, 0, uint32_t,
others_count, copy_table) {
blob_copy_bytes(blob, copy_table, others_count * sizeof(*copy_table));
}
} }
#else #else
shader->desc_info.dyn_bufs.count = blob_read_uint32(blob); shader->desc_info.dyn_bufs.count = blob_read_uint32(blob);
@ -1738,6 +1735,8 @@ shader_desc_info_serialize(struct blob *blob,
others_count += shader->desc_info.others.count[i]; others_count += shader->desc_info.others.count[i];
} }
/* No need to wrap this one in panvk_priv_mem_readback(), because the
* GPU is not supposed to touch it. */
blob_write_bytes(blob, blob_write_bytes(blob,
panvk_priv_mem_host_addr(shader->desc_info.others.map), panvk_priv_mem_host_addr(shader->desc_info.others.map),
sizeof(uint32_t) * others_count); sizeof(uint32_t) * others_count);
@ -2080,33 +2079,40 @@ emit_varying_attrs(struct panvk_pool *desc_pool,
unsigned varying_count, const struct varyings_info *info, unsigned varying_count, const struct varyings_info *info,
unsigned *buf_offsets, struct panvk_priv_mem *mem) unsigned *buf_offsets, struct panvk_priv_mem *mem)
{ {
*mem = panvk_pool_alloc_desc_array(desc_pool, varying_count, ATTRIBUTE); if (!varying_count) {
*mem = (struct panvk_priv_mem){0};
return VK_SUCCESS;
}
if (varying_count && !panvk_priv_mem_check_alloc(*mem)) *mem = panvk_pool_alloc_desc_array(desc_pool, varying_count, ATTRIBUTE);
if (!panvk_priv_mem_check_alloc(*mem))
return VK_ERROR_OUT_OF_DEVICE_MEMORY; return VK_ERROR_OUT_OF_DEVICE_MEMORY;
struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(*mem); panvk_priv_mem_write_array(*mem, 0, struct mali_attribute_packed,
unsigned attr_idx = 0; varying_count, attrs) {
unsigned attr_idx = 0;
for (unsigned i = 0; i < varying_count; i++) { for (unsigned i = 0; i < varying_count; i++) {
pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) { pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) {
gl_varying_slot loc = varyings[i].location; gl_varying_slot loc = varyings[i].location;
enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE
? info->fmts[loc] ? info->fmts[loc]
: PIPE_FORMAT_NONE; : PIPE_FORMAT_NONE;
if (pfmt == PIPE_FORMAT_NONE) { if (pfmt == PIPE_FORMAT_NONE) {
#if PAN_ARCH >= 7 #if PAN_ARCH >= 7
cfg.format = (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; cfg.format =
(MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
#else #else
cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
#endif #endif
} else { } else {
cfg.buffer_index = varying_buf_id(loc); cfg.buffer_index = varying_buf_id(loc);
cfg.offset = buf_offsets[loc]; cfg.offset = buf_offsets[loc];
cfg.format = varying_format(loc, info->fmts[loc]); cfg.format = varying_format(loc, info->fmts[loc]);
}
cfg.offset_enable = false;
} }
cfg.offset_enable = false;
} }
} }