panfrost,panvk: Wire printf and abort support

Those are quite useful for debugging and having sanity checks in place. It is also quite tidious to get ride of all asserts in every headers we would ever want to use, lets just accept those now. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32720>
2025-12-24 17:30:12 +01:00 · 2025-01-02 16:34:03 +01:00 · 2025-01-02 16:34:03 +01:00 · 6438b3e2bd
commit 6438b3e2bd
parent 8adede1d44
22 changed files with 157 additions and 4 deletions
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@ -1313,6 +1313,9 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *ptr_cpu,
      case PAN_SYSVAL_DRAWID:
         uniforms[i].u[0] = batch->ctx->drawid;
         break;
+      case PAN_SYSVAL_PRINTF_BUFFER:
+         uniforms[i].du[0] = batch->ctx->printf.bo->ptr.gpu;
+         break;
      default:
         assert(0);
      }
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@ -43,10 +43,12 @@
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 #include "util/u_prim_restart.h"
+#include "util/u_printf.h"
 #include "util/u_surface.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_vbuf.h"

+#include "clc/panfrost_compile.h"
 #include "compiler/nir/nir_serialize.h"
 #include "util/pan_lower_framebuffer.h"
 #include "decode.h"
@ -565,6 +567,9 @@ panfrost_destroy(struct pipe_context *pipe)

   pan_screen(pipe->screen)->vtbl.context_cleanup(panfrost);

+   u_printf_destroy(&panfrost->printf.ctx);
+   panfrost_bo_unreference(panfrost->printf.bo);
+
   if (panfrost->writers)
      _mesa_hash_table_destroy(panfrost->writers, NULL);

@ -1122,6 +1127,14 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
   ret = drmSyncobjCreate(panfrost_device_fd(dev), 0, &ctx->in_sync_obj);
   assert(!ret);

+   ctx->printf.bo =
+      panfrost_bo_create(dev, LIBPAN_PRINTF_BUFFER_SIZE, 0, "Printf Buffer");
+
+   if (ctx->printf.bo == NULL)
+      goto failed;
+
+   u_printf_init(&ctx->printf.ctx, ctx->printf.bo, ctx->printf.bo->ptr.cpu);
+
   ret = pan_screen(screen)->vtbl.context_init(ctx);

   if (ret)
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@ -242,6 +242,11 @@ struct panfrost_context {
   union {
      struct panfrost_csf_context csf;
   };
+
+   struct {
+      struct u_printf_ctx ctx;
+      struct panfrost_bo *bo;
+   } printf;
 };

 /* Corresponds to the CSO */
@ -296,6 +301,7 @@ enum {
   PAN_SYSVAL_BLEND_CONSTANTS = 16,
   PAN_SYSVAL_XFB = 17,
   PAN_SYSVAL_NUM_VERTICES = 18,
+   PAN_SYSVAL_PRINTF_BUFFER = 19,
 };

 #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)                               \
--- a/src/gallium/drivers/panfrost/pan_helpers.c
+++ b/src/gallium/drivers/panfrost/pan_helpers.c
@ -62,6 +62,7 @@ panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
      case PAN_SYSVAL_WORK_DIM:
      case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
      case PAN_SYSVAL_NUM_VERTICES:
+      case PAN_SYSVAL_PRINTF_BUFFER:
         dirty |= PAN_DIRTY_PARAMS;
         break;

--- a/src/gallium/drivers/panfrost/pan_nir_lower_sysvals.c
+++ b/src/gallium/drivers/panfrost/pan_nir_lower_sysvals.c
@ -116,6 +116,9 @@ sysval_for_intrinsic(unsigned arch, nir_intrinsic_instr *intr, unsigned *offset)
   case nir_intrinsic_load_workgroup_size:
      return PAN_SYSVAL_LOCAL_GROUP_SIZE;

+   case nir_intrinsic_load_printf_buffer_address:
+      return PAN_SYSVAL_PRINTF_BUFFER;
+
   case nir_intrinsic_load_rt_conversion_pan: {
      unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
      unsigned rt = nir_intrinsic_base(intr);
--- a/src/gallium/drivers/panfrost/pan_precomp.c
+++ b/src/gallium/drivers/panfrost/pan_precomp.c
@ -255,6 +255,7 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
   sysvals.num_workgroups.x = grid.count[0];
   sysvals.num_workgroups.y = grid.count[1];
   sysvals.num_workgroups.z = grid.count[2];
+   sysvals.printf_buffer_address = ctx->printf.bo->ptr.gpu;

   bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data,
                                                    data_size, &sysvals);
--- a/src/panfrost/clc/panfrost_compile.c
+++ b/src/panfrost/clc/panfrost_compile.c
@ -5,6 +5,7 @@
 * SPDX-License-Identifier: MIT
 */

+#include "panfrost_compile.h"
 #include "compiler/glsl_types.h"
 #include "compiler/spirv/nir_spirv.h"
 #include "panfrost/compiler/bifrost_compile.h"
@ -27,6 +28,7 @@
 #include "util/macros.h"
 #include "util/u_dynarray.h"
 #include <sys/mman.h>
+#include "panfrost_compile.h"

 static const struct spirv_to_nir_options spirv_options = {
   .environment = NIR_SPIRV_OPENCL,
@ -35,6 +37,7 @@ static const struct spirv_to_nir_options spirv_options = {
   .temp_addr_format = nir_address_format_62bit_generic,
   .constant_addr_format = nir_address_format_64bit_global,
   .create_library = true,
+   .printf = true,
 };

 static const nir_shader_compiler_options *
@ -101,6 +104,12 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size, unsigned arch)
   nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true};
   NIR_PASS(_, nir, nir_lower_compute_system_values, &cs);

+   NIR_PASS(_, nir, nir_lower_printf,
+            &(const struct nir_lower_printf_options){
+               .max_buffer_size = LIBPAN_PRINTF_BUFFER_SIZE - 8,
+               .ptr_bit_size = 64,
+            });
+
   /* We have to lower away local constant initializers right before we
    * inline functions.  That way they get properly initialized at the top
    * of the function and not at the top of its caller.
@ -215,6 +224,14 @@ lower_sysvals(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_data)
         bit_size, num_comps);
      break;

+   case nir_intrinsic_load_printf_buffer_address:
+      val = load_sysval_from_push_const(
+         b,
+         offsetof(struct bifrost_precompiled_kernel_sysvals,
+                  printf_buffer_address),
+         bit_size, num_comps);
+      break;
+
   default:
      return false;
   }
--- a/src/panfrost/clc/panfrost_compile.h
+++ b/src/panfrost/clc/panfrost_compile.h
@ -0,0 +1,8 @@
+/*
+ * Copyright 2025 Collabora Ltd
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#define LIBPAN_PRINTF_BUFFER_SIZE 16384
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@ -5346,9 +5346,29 @@ bifrost_nir_lower_load_output(nir_shader *nir)
      nir_metadata_control_flow, NULL);
 }

+static bool
+bi_lower_halt_to_return(nir_builder *b, nir_instr *instr, UNUSED void *_data)
+{
+   if (instr->type != nir_instr_type_jump)
+      return false;
+
+   nir_jump_instr *jump = nir_instr_as_jump(instr);
+   if (jump->type != nir_jump_halt)
+      return false;
+
+   assert(b->impl == nir_shader_get_entrypoint(b->shader));
+   jump->type = nir_jump_return;
+   return true;
+}
+
 void
 bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
 {
+   /* Ensure that halt are translated to returns and get ride of them */
+   NIR_PASS(_, nir, nir_shader_instructions_pass, bi_lower_halt_to_return,
+            nir_metadata_all, NULL);
+   NIR_PASS(_, nir, nir_lower_returns);
+
   /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
    * (so we don't accidentally duplicate the epilogue since mesa/st has
    * messed with our I/O quite a bit already) */
--- a/src/panfrost/compiler/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost_compile.h
@ -24,6 +24,7 @@
 #ifndef __BIFROST_PUBLIC_H_
 #define __BIFROST_PUBLIC_H_

+#include <stdint.h>
 #include <string.h>
 #include "compiler/nir/nir.h"
 #include "panfrost/util/pan_ir.h"
@ -33,6 +34,7 @@ struct bifrost_precompiled_kernel_sysvals {
   struct {
      unsigned x, y, z;
   } num_workgroups;
+   uint64_t printf_buffer_address;
 } __attribute__((aligned(8)));
 ;

--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -368,11 +368,31 @@ lower_vec816_alu(const nir_instr *instr, const void *cb_data)
  return 4;
 }

+static bool
+lower_halt_to_return(nir_builder *b, nir_instr *instr, UNUSED void *_data)
+{
+   if (instr->type != nir_instr_type_jump)
+      return false;
+
+   nir_jump_instr *jump = nir_instr_as_jump(instr);
+   if (jump->type != nir_jump_halt)
+      return false;
+
+   assert(b->impl == nir_shader_get_entrypoint(b->shader));
+   jump->type = nir_jump_return;
+   return true;
+}
+
 void
 midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id)
 {
   unsigned quirks = midgard_get_quirks(gpu_id);

+   /* Ensure that halt are translated to returns and get ride of them */
+   NIR_PASS(_, nir, nir_shader_instructions_pass, lower_halt_to_return,
+            nir_metadata_all, NULL);
+   NIR_PASS(_, nir, nir_lower_returns);
+
   /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
    * (so we don't accidentally duplicate the epilogue since mesa/st has
    * messed with our I/O quite a bit already).
--- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c
@ -41,6 +41,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
   sysvals.num_workgroups.x = grid.count[0];
   sysvals.num_workgroups.y = grid.count[1];
   sysvals.num_workgroups.z = grid.count[2];
+   sysvals.printf_buffer_address = dev->printf.bo->addr.dev;

   bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data,
                                                    data_size, &sysvals);
--- a/src/panfrost/vulkan/csf/panvk_vX_device.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_device.c
@ -10,7 +10,7 @@ VkResult
 panvk_per_arch(device_check_status)(struct vk_device *vk_dev)
 {
   struct panvk_device *dev = to_panvk_device(vk_dev);
-   VkResult result = VK_SUCCESS;
+   VkResult result = panvk_common_check_status(dev);

   for (uint32_t qfi = 0; qfi < PANVK_MAX_QUEUE_FAMILIES; qfi++) {
      for (uint32_t q = 0; q < dev->queue_count[qfi]; q++) {
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c
@ -39,6 +39,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
   sysvals.num_workgroups.x = grid.count[0];
   sysvals.num_workgroups.y = grid.count[1];
   sysvals.num_workgroups.z = grid.count[2];
+   sysvals.printf_buffer_address = dev->printf.bo->addr.dev;

   bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data,
                                                    data_size, &sysvals);
--- a/src/panfrost/vulkan/jm/panvk_vX_device.c
+++ b/src/panfrost/vulkan/jm/panvk_vX_device.c
@ -0,0 +1,13 @@
+/*
+ * Copyright © 2024 Collabora Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "panvk_device.h"
+
+VkResult
+panvk_per_arch(device_check_status)(struct vk_device *vk_dev)
+{
+   struct panvk_device *dev = to_panvk_device(vk_dev);
+   return panvk_common_check_status(dev);
+}
--- a/src/panfrost/vulkan/meson.build
+++ b/src/panfrost/vulkan/meson.build
@ -71,6 +71,7 @@ jm_files = [
  'jm/panvk_vX_cmd_event.c',
  'jm/panvk_vX_cmd_query.c',
  'jm/panvk_vX_cmd_precomp.c',
+  'jm/panvk_vX_device.c',
  'jm/panvk_vX_event.c',
  'jm/panvk_vX_queue.c',
 ]
--- a/src/panfrost/vulkan/panvk_device.h
+++ b/src/panfrost/vulkan/panvk_device.h
@ -8,6 +8,7 @@

 #include <stdint.h>

+#include "vk_debug_utils.h"
 #include "vk_device.h"
 #include "vk_meta.h"

@ -23,6 +24,7 @@
 #include "util/pan_ir.h"
 #include "util/perf/u_trace.h"

+#include "util/u_printf.h"
 #include "util/vma.h"

 #define PANVK_MAX_QUEUE_FAMILIES 1
@ -77,6 +79,11 @@ struct panvk_device {
   struct {
      struct pandecode_context *decode_ctx;
   } debug;
+
+   struct {
+      struct u_printf_ctx ctx;
+      struct panvk_priv_bo *bo;
+   } printf;
 };

 VK_DEFINE_HANDLE_CASTS(panvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
@ -110,9 +117,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
 void panvk_per_arch(destroy_device)(struct panvk_device *device,
                                    const VkAllocationCallbacks *pAllocator);

-#if PAN_ARCH >= 10
+static inline VkResult
+panvk_common_check_status(struct panvk_device *dev)
+{
+   return vk_check_printf_status(&dev->vk, &dev->printf.ctx);
+}
+
 VkResult panvk_per_arch(device_check_status)(struct vk_device *vk_dev);

+#if PAN_ARCH >= 10
 VkResult panvk_per_arch(init_tiler_oom)(struct panvk_device *device);
 #endif
 #endif
--- a/src/panfrost/vulkan/panvk_shader.h
+++ b/src/panfrost/vulkan/panvk_shader.h
@ -76,6 +76,7 @@ struct panvk_graphics_sysvals {
   } vs;

   aligned_u64 push_consts;
+   aligned_u64 printf_buffer_address;

 #if PAN_ARCH <= 7
   /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
@ -112,6 +113,7 @@ struct panvk_compute_sysvals {
   } local_group_size;

   aligned_u64 push_consts;
+   aligned_u64 printf_buffer_address;

 #if PAN_ARCH <= 7
   struct {
--- a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c
@ -13,6 +13,8 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
   struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info)
 {
   const struct panvk_shader *shader = cmdbuf->state.compute.shader;
+   const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
+
   BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};

   /* In indirect case, some sysvals are read from the indirect dispatch
@ -40,6 +42,8 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
                      shader->local_size.y);
   set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z,
                      shader->local_size.z);
+   set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
+                      dev->printf.bo->addr.dev);

 #if PAN_ARCH <= 7
   struct panvk_descriptor_state *desc_state =
--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c
@ -548,11 +548,14 @@ void
 panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
                                         const struct panvk_draw_info *info)
 {
+   const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
   struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
   const struct panvk_shader *fs = get_fs(cmdbuf);
   uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
   BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};

+   set_gfx_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
+                  dev->printf.bo->addr.dev);
   set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
                  noperspective_varyings);
   set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);
--- a/src/panfrost/vulkan/panvk_vX_device.c
+++ b/src/panfrost/vulkan/panvk_vX_device.c
@ -30,7 +30,9 @@
 #include "genxml/decode.h"
 #include "genxml/gen_macros.h"

+#include "clc/panfrost_compile.h"
 #include "kmod/pan_kmod.h"
+#include "util/u_printf.h"
 #include "pan_props.h"
 #include "pan_samples.h"

@ -272,9 +274,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
   device->vk.command_dispatch_table = &device->cmd_dispatch;
   device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops);
   device->vk.shader_ops = &panvk_per_arch(device_shader_ops);
-#if PAN_ARCH >= 10
   device->vk.check_status = panvk_per_arch(device_check_status);
-#endif

   device->kmod.allocator = (struct pan_kmod_allocator){
      .zalloc = panvk_kmod_zalloc,
@ -342,6 +342,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
      goto err_free_priv_bos;
 #endif

+   result = panvk_priv_bo_create(device, LIBPAN_PRINTF_BUFFER_SIZE, 0,
+                                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
+                                 &device->printf.bo);
+   if (result != VK_SUCCESS)
+      goto err_free_priv_bos;
+
+   u_printf_init(&device->printf.ctx, device->printf.bo,
+                 device->printf.bo->addr.host);
+
   vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);


@ -404,6 +413,9 @@ err_finish_queues:
 err_free_precomp:
   panvk_precomp_cleanup(device);
 err_free_priv_bos:
+   if (device->printf.bo)
+      u_printf_destroy(&device->printf.ctx);
+   panvk_priv_bo_unref(device->printf.bo);
   panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
   panvk_priv_bo_unref(device->sample_positions);
   panvk_priv_bo_unref(device->tiler_heap);
@ -441,6 +453,8 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,

   panvk_precomp_cleanup(device);
   panvk_meta_cleanup(device);
+   u_printf_destroy(&device->printf.ctx);
+   panvk_priv_bo_unref(device->printf.bo);
   panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
   panvk_priv_bo_unref(device->tiler_heap);
   panvk_priv_bo_unref(device->sample_positions);
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@ -118,6 +118,13 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
      val = nir_imm_int(b, 0);
      break;

+   case nir_intrinsic_load_printf_buffer_address:
+      if (b->shader->info.stage == MESA_SHADER_COMPUTE)
+         val = load_sysval(b, compute, bit_size, printf_buffer_address);
+      else
+         val = load_sysval(b, graphics, bit_size, printf_buffer_address);
+      break;
+
   default:
      return false;
   }