panfrost,panvk: Wire printf and abort support

Those are quite useful for debugging and having sanity checks in place.
It is also quite tidious to get ride of all asserts in every headers we
would ever want to use, lets just accept those now.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32720>
This commit is contained in:
Mary Guillemard 2025-01-02 16:34:03 +01:00 committed by Marge Bot
parent 8adede1d44
commit 6438b3e2bd
22 changed files with 157 additions and 4 deletions

View file

@ -1313,6 +1313,9 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *ptr_cpu,
case PAN_SYSVAL_DRAWID:
uniforms[i].u[0] = batch->ctx->drawid;
break;
case PAN_SYSVAL_PRINTF_BUFFER:
uniforms[i].du[0] = batch->ctx->printf.bo->ptr.gpu;
break;
default:
assert(0);
}

View file

@ -43,10 +43,12 @@
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/u_prim_restart.h"
#include "util/u_printf.h"
#include "util/u_surface.h"
#include "util/u_upload_mgr.h"
#include "util/u_vbuf.h"
#include "clc/panfrost_compile.h"
#include "compiler/nir/nir_serialize.h"
#include "util/pan_lower_framebuffer.h"
#include "decode.h"
@ -565,6 +567,9 @@ panfrost_destroy(struct pipe_context *pipe)
pan_screen(pipe->screen)->vtbl.context_cleanup(panfrost);
u_printf_destroy(&panfrost->printf.ctx);
panfrost_bo_unreference(panfrost->printf.bo);
if (panfrost->writers)
_mesa_hash_table_destroy(panfrost->writers, NULL);
@ -1122,6 +1127,14 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
ret = drmSyncobjCreate(panfrost_device_fd(dev), 0, &ctx->in_sync_obj);
assert(!ret);
ctx->printf.bo =
panfrost_bo_create(dev, LIBPAN_PRINTF_BUFFER_SIZE, 0, "Printf Buffer");
if (ctx->printf.bo == NULL)
goto failed;
u_printf_init(&ctx->printf.ctx, ctx->printf.bo, ctx->printf.bo->ptr.cpu);
ret = pan_screen(screen)->vtbl.context_init(ctx);
if (ret)

View file

@ -242,6 +242,11 @@ struct panfrost_context {
union {
struct panfrost_csf_context csf;
};
struct {
struct u_printf_ctx ctx;
struct panfrost_bo *bo;
} printf;
};
/* Corresponds to the CSO */
@ -296,6 +301,7 @@ enum {
PAN_SYSVAL_BLEND_CONSTANTS = 16,
PAN_SYSVAL_XFB = 17,
PAN_SYSVAL_NUM_VERTICES = 18,
PAN_SYSVAL_PRINTF_BUFFER = 19,
};
#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \

View file

@ -62,6 +62,7 @@ panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
case PAN_SYSVAL_WORK_DIM:
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
case PAN_SYSVAL_NUM_VERTICES:
case PAN_SYSVAL_PRINTF_BUFFER:
dirty |= PAN_DIRTY_PARAMS;
break;

View file

@ -116,6 +116,9 @@ sysval_for_intrinsic(unsigned arch, nir_intrinsic_instr *intr, unsigned *offset)
case nir_intrinsic_load_workgroup_size:
return PAN_SYSVAL_LOCAL_GROUP_SIZE;
case nir_intrinsic_load_printf_buffer_address:
return PAN_SYSVAL_PRINTF_BUFFER;
case nir_intrinsic_load_rt_conversion_pan: {
unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
unsigned rt = nir_intrinsic_base(intr);

View file

@ -255,6 +255,7 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
sysvals.num_workgroups.x = grid.count[0];
sysvals.num_workgroups.y = grid.count[1];
sysvals.num_workgroups.z = grid.count[2];
sysvals.printf_buffer_address = ctx->printf.bo->ptr.gpu;
bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data,
data_size, &sysvals);

View file

@ -5,6 +5,7 @@
* SPDX-License-Identifier: MIT
*/
#include "panfrost_compile.h"
#include "compiler/glsl_types.h"
#include "compiler/spirv/nir_spirv.h"
#include "panfrost/compiler/bifrost_compile.h"
@ -27,6 +28,7 @@
#include "util/macros.h"
#include "util/u_dynarray.h"
#include <sys/mman.h>
#include "panfrost_compile.h"
static const struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
@ -35,6 +37,7 @@ static const struct spirv_to_nir_options spirv_options = {
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
.create_library = true,
.printf = true,
};
static const nir_shader_compiler_options *
@ -101,6 +104,12 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size, unsigned arch)
nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true};
NIR_PASS(_, nir, nir_lower_compute_system_values, &cs);
NIR_PASS(_, nir, nir_lower_printf,
&(const struct nir_lower_printf_options){
.max_buffer_size = LIBPAN_PRINTF_BUFFER_SIZE - 8,
.ptr_bit_size = 64,
});
/* We have to lower away local constant initializers right before we
* inline functions. That way they get properly initialized at the top
* of the function and not at the top of its caller.
@ -215,6 +224,14 @@ lower_sysvals(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_data)
bit_size, num_comps);
break;
case nir_intrinsic_load_printf_buffer_address:
val = load_sysval_from_push_const(
b,
offsetof(struct bifrost_precompiled_kernel_sysvals,
printf_buffer_address),
bit_size, num_comps);
break;
default:
return false;
}

View file

@ -0,0 +1,8 @@
/*
* Copyright 2025 Collabora Ltd
* SPDX-License-Identifier: MIT
*/
#pragma once
#define LIBPAN_PRINTF_BUFFER_SIZE 16384

View file

@ -5346,9 +5346,29 @@ bifrost_nir_lower_load_output(nir_shader *nir)
nir_metadata_control_flow, NULL);
}
static bool
bi_lower_halt_to_return(nir_builder *b, nir_instr *instr, UNUSED void *_data)
{
if (instr->type != nir_instr_type_jump)
return false;
nir_jump_instr *jump = nir_instr_as_jump(instr);
if (jump->type != nir_jump_halt)
return false;
assert(b->impl == nir_shader_get_entrypoint(b->shader));
jump->type = nir_jump_return;
return true;
}
void
bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
/* Ensure that halt are translated to returns and get ride of them */
NIR_PASS(_, nir, nir_shader_instructions_pass, bi_lower_halt_to_return,
nir_metadata_all, NULL);
NIR_PASS(_, nir, nir_lower_returns);
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
* messed with our I/O quite a bit already) */

View file

@ -24,6 +24,7 @@
#ifndef __BIFROST_PUBLIC_H_
#define __BIFROST_PUBLIC_H_
#include <stdint.h>
#include <string.h>
#include "compiler/nir/nir.h"
#include "panfrost/util/pan_ir.h"
@ -33,6 +34,7 @@ struct bifrost_precompiled_kernel_sysvals {
struct {
unsigned x, y, z;
} num_workgroups;
uint64_t printf_buffer_address;
} __attribute__((aligned(8)));
;

View file

@ -368,11 +368,31 @@ lower_vec816_alu(const nir_instr *instr, const void *cb_data)
return 4;
}
static bool
lower_halt_to_return(nir_builder *b, nir_instr *instr, UNUSED void *_data)
{
if (instr->type != nir_instr_type_jump)
return false;
nir_jump_instr *jump = nir_instr_as_jump(instr);
if (jump->type != nir_jump_halt)
return false;
assert(b->impl == nir_shader_get_entrypoint(b->shader));
jump->type = nir_jump_return;
return true;
}
void
midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
unsigned quirks = midgard_get_quirks(gpu_id);
/* Ensure that halt are translated to returns and get ride of them */
NIR_PASS(_, nir, nir_shader_instructions_pass, lower_halt_to_return,
nir_metadata_all, NULL);
NIR_PASS(_, nir, nir_lower_returns);
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
* messed with our I/O quite a bit already).

View file

@ -41,6 +41,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
sysvals.num_workgroups.x = grid.count[0];
sysvals.num_workgroups.y = grid.count[1];
sysvals.num_workgroups.z = grid.count[2];
sysvals.printf_buffer_address = dev->printf.bo->addr.dev;
bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data,
data_size, &sysvals);

View file

@ -10,7 +10,7 @@ VkResult
panvk_per_arch(device_check_status)(struct vk_device *vk_dev)
{
struct panvk_device *dev = to_panvk_device(vk_dev);
VkResult result = VK_SUCCESS;
VkResult result = panvk_common_check_status(dev);
for (uint32_t qfi = 0; qfi < PANVK_MAX_QUEUE_FAMILIES; qfi++) {
for (uint32_t q = 0; q < dev->queue_count[qfi]; q++) {

View file

@ -39,6 +39,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
sysvals.num_workgroups.x = grid.count[0];
sysvals.num_workgroups.y = grid.count[1];
sysvals.num_workgroups.z = grid.count[2];
sysvals.printf_buffer_address = dev->printf.bo->addr.dev;
bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data,
data_size, &sysvals);

View file

@ -0,0 +1,13 @@
/*
* Copyright © 2024 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#include "panvk_device.h"
VkResult
panvk_per_arch(device_check_status)(struct vk_device *vk_dev)
{
struct panvk_device *dev = to_panvk_device(vk_dev);
return panvk_common_check_status(dev);
}

View file

@ -71,6 +71,7 @@ jm_files = [
'jm/panvk_vX_cmd_event.c',
'jm/panvk_vX_cmd_query.c',
'jm/panvk_vX_cmd_precomp.c',
'jm/panvk_vX_device.c',
'jm/panvk_vX_event.c',
'jm/panvk_vX_queue.c',
]

View file

@ -8,6 +8,7 @@
#include <stdint.h>
#include "vk_debug_utils.h"
#include "vk_device.h"
#include "vk_meta.h"
@ -23,6 +24,7 @@
#include "util/pan_ir.h"
#include "util/perf/u_trace.h"
#include "util/u_printf.h"
#include "util/vma.h"
#define PANVK_MAX_QUEUE_FAMILIES 1
@ -77,6 +79,11 @@ struct panvk_device {
struct {
struct pandecode_context *decode_ctx;
} debug;
struct {
struct u_printf_ctx ctx;
struct panvk_priv_bo *bo;
} printf;
};
VK_DEFINE_HANDLE_CASTS(panvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
@ -110,9 +117,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
void panvk_per_arch(destroy_device)(struct panvk_device *device,
const VkAllocationCallbacks *pAllocator);
#if PAN_ARCH >= 10
static inline VkResult
panvk_common_check_status(struct panvk_device *dev)
{
return vk_check_printf_status(&dev->vk, &dev->printf.ctx);
}
VkResult panvk_per_arch(device_check_status)(struct vk_device *vk_dev);
#if PAN_ARCH >= 10
VkResult panvk_per_arch(init_tiler_oom)(struct panvk_device *device);
#endif
#endif

View file

@ -76,6 +76,7 @@ struct panvk_graphics_sysvals {
} vs;
aligned_u64 push_consts;
aligned_u64 printf_buffer_address;
#if PAN_ARCH <= 7
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
@ -112,6 +113,7 @@ struct panvk_compute_sysvals {
} local_group_size;
aligned_u64 push_consts;
aligned_u64 printf_buffer_address;
#if PAN_ARCH <= 7
struct {

View file

@ -13,6 +13,8 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info)
{
const struct panvk_shader *shader = cmdbuf->state.compute.shader;
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
/* In indirect case, some sysvals are read from the indirect dispatch
@ -40,6 +42,8 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
shader->local_size.y);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z,
shader->local_size.z);
set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);
#if PAN_ARCH <= 7
struct panvk_descriptor_state *desc_state =

View file

@ -548,11 +548,14 @@ void
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_info *info)
{
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
const struct panvk_shader *fs = get_fs(cmdbuf);
uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
set_gfx_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
noperspective_varyings);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);

View file

@ -30,7 +30,9 @@
#include "genxml/decode.h"
#include "genxml/gen_macros.h"
#include "clc/panfrost_compile.h"
#include "kmod/pan_kmod.h"
#include "util/u_printf.h"
#include "pan_props.h"
#include "pan_samples.h"
@ -272,9 +274,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
device->vk.command_dispatch_table = &device->cmd_dispatch;
device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops);
device->vk.shader_ops = &panvk_per_arch(device_shader_ops);
#if PAN_ARCH >= 10
device->vk.check_status = panvk_per_arch(device_check_status);
#endif
device->kmod.allocator = (struct pan_kmod_allocator){
.zalloc = panvk_kmod_zalloc,
@ -342,6 +342,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
goto err_free_priv_bos;
#endif
result = panvk_priv_bo_create(device, LIBPAN_PRINTF_BUFFER_SIZE, 0,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
&device->printf.bo);
if (result != VK_SUCCESS)
goto err_free_priv_bos;
u_printf_init(&device->printf.ctx, device->printf.bo,
device->printf.bo->addr.host);
vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
@ -404,6 +413,9 @@ err_finish_queues:
err_free_precomp:
panvk_precomp_cleanup(device);
err_free_priv_bos:
if (device->printf.bo)
u_printf_destroy(&device->printf.ctx);
panvk_priv_bo_unref(device->printf.bo);
panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
panvk_priv_bo_unref(device->sample_positions);
panvk_priv_bo_unref(device->tiler_heap);
@ -441,6 +453,8 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
panvk_precomp_cleanup(device);
panvk_meta_cleanup(device);
u_printf_destroy(&device->printf.ctx);
panvk_priv_bo_unref(device->printf.bo);
panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
panvk_priv_bo_unref(device->tiler_heap);
panvk_priv_bo_unref(device->sample_positions);

View file

@ -118,6 +118,13 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
val = nir_imm_int(b, 0);
break;
case nir_intrinsic_load_printf_buffer_address:
if (b->shader->info.stage == MESA_SHADER_COMPUTE)
val = load_sysval(b, compute, bit_size, printf_buffer_address);
else
val = load_sysval(b, graphics, bit_size, printf_buffer_address);
break;
default:
return false;
}