diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 22fc36453fd..0193db11408 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1313,6 +1313,9 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *ptr_cpu, case PAN_SYSVAL_DRAWID: uniforms[i].u[0] = batch->ctx->drawid; break; + case PAN_SYSVAL_PRINTF_BUFFER: + uniforms[i].du[0] = batch->ctx->printf.bo->ptr.gpu; + break; default: assert(0); } diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index fbf2e3d9413..fad93fb518c 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -43,10 +43,12 @@ #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_prim_restart.h" +#include "util/u_printf.h" #include "util/u_surface.h" #include "util/u_upload_mgr.h" #include "util/u_vbuf.h" +#include "clc/panfrost_compile.h" #include "compiler/nir/nir_serialize.h" #include "util/pan_lower_framebuffer.h" #include "decode.h" @@ -565,6 +567,9 @@ panfrost_destroy(struct pipe_context *pipe) pan_screen(pipe->screen)->vtbl.context_cleanup(panfrost); + u_printf_destroy(&panfrost->printf.ctx); + panfrost_bo_unreference(panfrost->printf.bo); + if (panfrost->writers) _mesa_hash_table_destroy(panfrost->writers, NULL); @@ -1122,6 +1127,14 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) ret = drmSyncobjCreate(panfrost_device_fd(dev), 0, &ctx->in_sync_obj); assert(!ret); + ctx->printf.bo = + panfrost_bo_create(dev, LIBPAN_PRINTF_BUFFER_SIZE, 0, "Printf Buffer"); + + if (ctx->printf.bo == NULL) + goto failed; + + u_printf_init(&ctx->printf.ctx, ctx->printf.bo, ctx->printf.bo->ptr.cpu); + ret = pan_screen(screen)->vtbl.context_init(ctx); if (ret) diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 2b2fb01d9d1..805cf342a11 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -242,6 +242,11 @@ struct panfrost_context { union { struct panfrost_csf_context csf; }; + + struct { + struct u_printf_ctx ctx; + struct panfrost_bo *bo; + } printf; }; /* Corresponds to the CSO */ @@ -296,6 +301,7 @@ enum { PAN_SYSVAL_BLEND_CONSTANTS = 16, PAN_SYSVAL_XFB = 17, PAN_SYSVAL_NUM_VERTICES = 18, + PAN_SYSVAL_PRINTF_BUFFER = 19, }; #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \ diff --git a/src/gallium/drivers/panfrost/pan_helpers.c b/src/gallium/drivers/panfrost/pan_helpers.c index bb659064d8d..f94fd868f7b 100644 --- a/src/gallium/drivers/panfrost/pan_helpers.c +++ b/src/gallium/drivers/panfrost/pan_helpers.c @@ -62,6 +62,7 @@ panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss) case PAN_SYSVAL_WORK_DIM: case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: case PAN_SYSVAL_NUM_VERTICES: + case PAN_SYSVAL_PRINTF_BUFFER: dirty |= PAN_DIRTY_PARAMS; break; diff --git a/src/gallium/drivers/panfrost/pan_nir_lower_sysvals.c b/src/gallium/drivers/panfrost/pan_nir_lower_sysvals.c index 11e93b55339..f5dc1699b10 100644 --- a/src/gallium/drivers/panfrost/pan_nir_lower_sysvals.c +++ b/src/gallium/drivers/panfrost/pan_nir_lower_sysvals.c @@ -116,6 +116,9 @@ sysval_for_intrinsic(unsigned arch, nir_intrinsic_instr *intr, unsigned *offset) case nir_intrinsic_load_workgroup_size: return PAN_SYSVAL_LOCAL_GROUP_SIZE; + case nir_intrinsic_load_printf_buffer_address: + return PAN_SYSVAL_PRINTF_BUFFER; + case nir_intrinsic_load_rt_conversion_pan: { unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr)); unsigned rt = nir_intrinsic_base(intr); diff --git a/src/gallium/drivers/panfrost/pan_precomp.c b/src/gallium/drivers/panfrost/pan_precomp.c index 2e4b154bd6b..dd0c01f668c 100644 --- a/src/gallium/drivers/panfrost/pan_precomp.c +++ b/src/gallium/drivers/panfrost/pan_precomp.c @@ -255,6 +255,7 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch, sysvals.num_workgroups.x = grid.count[0]; sysvals.num_workgroups.y = grid.count[1]; sysvals.num_workgroups.z = grid.count[2]; + sysvals.printf_buffer_address = ctx->printf.bo->ptr.gpu; bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data, data_size, &sysvals); diff --git a/src/panfrost/clc/panfrost_compile.c b/src/panfrost/clc/panfrost_compile.c index 76bd62a17c4..24f57dd1a53 100644 --- a/src/panfrost/clc/panfrost_compile.c +++ b/src/panfrost/clc/panfrost_compile.c @@ -5,6 +5,7 @@ * SPDX-License-Identifier: MIT */ +#include "panfrost_compile.h" #include "compiler/glsl_types.h" #include "compiler/spirv/nir_spirv.h" #include "panfrost/compiler/bifrost_compile.h" @@ -27,6 +28,7 @@ #include "util/macros.h" #include "util/u_dynarray.h" #include +#include "panfrost_compile.h" static const struct spirv_to_nir_options spirv_options = { .environment = NIR_SPIRV_OPENCL, @@ -35,6 +37,7 @@ static const struct spirv_to_nir_options spirv_options = { .temp_addr_format = nir_address_format_62bit_generic, .constant_addr_format = nir_address_format_64bit_global, .create_library = true, + .printf = true, }; static const nir_shader_compiler_options * @@ -101,6 +104,12 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size, unsigned arch) nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true}; NIR_PASS(_, nir, nir_lower_compute_system_values, &cs); + NIR_PASS(_, nir, nir_lower_printf, + &(const struct nir_lower_printf_options){ + .max_buffer_size = LIBPAN_PRINTF_BUFFER_SIZE - 8, + .ptr_bit_size = 64, + }); + /* We have to lower away local constant initializers right before we * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. @@ -215,6 +224,14 @@ lower_sysvals(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_data) bit_size, num_comps); break; + case nir_intrinsic_load_printf_buffer_address: + val = load_sysval_from_push_const( + b, + offsetof(struct bifrost_precompiled_kernel_sysvals, + printf_buffer_address), + bit_size, num_comps); + break; + default: return false; } diff --git a/src/panfrost/clc/panfrost_compile.h b/src/panfrost/clc/panfrost_compile.h new file mode 100644 index 00000000000..e3e838f2a21 --- /dev/null +++ b/src/panfrost/clc/panfrost_compile.h @@ -0,0 +1,8 @@ +/* + * Copyright 2025 Collabora Ltd + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#define LIBPAN_PRINTF_BUFFER_SIZE 16384 diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index b7b18af9490..256161e9b15 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -5346,9 +5346,29 @@ bifrost_nir_lower_load_output(nir_shader *nir) nir_metadata_control_flow, NULL); } +static bool +bi_lower_halt_to_return(nir_builder *b, nir_instr *instr, UNUSED void *_data) +{ + if (instr->type != nir_instr_type_jump) + return false; + + nir_jump_instr *jump = nir_instr_as_jump(instr); + if (jump->type != nir_jump_halt) + return false; + + assert(b->impl == nir_shader_get_entrypoint(b->shader)); + jump->type = nir_jump_return; + return true; +} + void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) { + /* Ensure that halt are translated to returns and get ride of them */ + NIR_PASS(_, nir, nir_shader_instructions_pass, bi_lower_halt_to_return, + nir_metadata_all, NULL); + NIR_PASS(_, nir, nir_lower_returns); + /* Lower gl_Position pre-optimisation, but after lowering vars to ssa * (so we don't accidentally duplicate the epilogue since mesa/st has * messed with our I/O quite a bit already) */ diff --git a/src/panfrost/compiler/bifrost_compile.h b/src/panfrost/compiler/bifrost_compile.h index 1a4076a5906..433fa5e98ff 100644 --- a/src/panfrost/compiler/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost_compile.h @@ -24,6 +24,7 @@ #ifndef __BIFROST_PUBLIC_H_ #define __BIFROST_PUBLIC_H_ +#include #include #include "compiler/nir/nir.h" #include "panfrost/util/pan_ir.h" @@ -33,6 +34,7 @@ struct bifrost_precompiled_kernel_sysvals { struct { unsigned x, y, z; } num_workgroups; + uint64_t printf_buffer_address; } __attribute__((aligned(8))); ; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 52250f3e097..50c5c7f7186 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -368,11 +368,31 @@ lower_vec816_alu(const nir_instr *instr, const void *cb_data) return 4; } +static bool +lower_halt_to_return(nir_builder *b, nir_instr *instr, UNUSED void *_data) +{ + if (instr->type != nir_instr_type_jump) + return false; + + nir_jump_instr *jump = nir_instr_as_jump(instr); + if (jump->type != nir_jump_halt) + return false; + + assert(b->impl == nir_shader_get_entrypoint(b->shader)); + jump->type = nir_jump_return; + return true; +} + void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id) { unsigned quirks = midgard_get_quirks(gpu_id); + /* Ensure that halt are translated to returns and get ride of them */ + NIR_PASS(_, nir, nir_shader_instructions_pass, lower_halt_to_return, + nir_metadata_all, NULL); + NIR_PASS(_, nir, nir_lower_returns); + /* Lower gl_Position pre-optimisation, but after lowering vars to ssa * (so we don't accidentally duplicate the epilogue since mesa/st has * messed with our I/O quite a bit already). diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c index a776854a961..dfed6281d1f 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c @@ -41,6 +41,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, sysvals.num_workgroups.x = grid.count[0]; sysvals.num_workgroups.y = grid.count[1]; sysvals.num_workgroups.z = grid.count[2]; + sysvals.printf_buffer_address = dev->printf.bo->addr.dev; bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data, data_size, &sysvals); diff --git a/src/panfrost/vulkan/csf/panvk_vX_device.c b/src/panfrost/vulkan/csf/panvk_vX_device.c index 2422d90e86d..eacc2afc812 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_device.c +++ b/src/panfrost/vulkan/csf/panvk_vX_device.c @@ -10,7 +10,7 @@ VkResult panvk_per_arch(device_check_status)(struct vk_device *vk_dev) { struct panvk_device *dev = to_panvk_device(vk_dev); - VkResult result = VK_SUCCESS; + VkResult result = panvk_common_check_status(dev); for (uint32_t qfi = 0; qfi < PANVK_MAX_QUEUE_FAMILIES; qfi++) { for (uint32_t q = 0; q < dev->queue_count[qfi]; q++) { diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c index 012be7133ff..cae03ea726b 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c @@ -39,6 +39,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, sysvals.num_workgroups.x = grid.count[0]; sysvals.num_workgroups.y = grid.count[1]; sysvals.num_workgroups.z = grid.count[2]; + sysvals.printf_buffer_address = dev->printf.bo->addr.dev; bifrost_precompiled_kernel_prepare_push_uniforms(push_uniforms.cpu, data, data_size, &sysvals); diff --git a/src/panfrost/vulkan/jm/panvk_vX_device.c b/src/panfrost/vulkan/jm/panvk_vX_device.c new file mode 100644 index 00000000000..eec7fb46314 --- /dev/null +++ b/src/panfrost/vulkan/jm/panvk_vX_device.c @@ -0,0 +1,13 @@ +/* + * Copyright © 2024 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "panvk_device.h" + +VkResult +panvk_per_arch(device_check_status)(struct vk_device *vk_dev) +{ + struct panvk_device *dev = to_panvk_device(vk_dev); + return panvk_common_check_status(dev); +} diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index d74a1e77b41..ae3f83f7dcb 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -71,6 +71,7 @@ jm_files = [ 'jm/panvk_vX_cmd_event.c', 'jm/panvk_vX_cmd_query.c', 'jm/panvk_vX_cmd_precomp.c', + 'jm/panvk_vX_device.c', 'jm/panvk_vX_event.c', 'jm/panvk_vX_queue.c', ] diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index 30d56730922..9044c3f19a6 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -8,6 +8,7 @@ #include +#include "vk_debug_utils.h" #include "vk_device.h" #include "vk_meta.h" @@ -23,6 +24,7 @@ #include "util/pan_ir.h" #include "util/perf/u_trace.h" +#include "util/u_printf.h" #include "util/vma.h" #define PANVK_MAX_QUEUE_FAMILIES 1 @@ -77,6 +79,11 @@ struct panvk_device { struct { struct pandecode_context *decode_ctx; } debug; + + struct { + struct u_printf_ctx ctx; + struct panvk_priv_bo *bo; + } printf; }; VK_DEFINE_HANDLE_CASTS(panvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) @@ -110,9 +117,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, void panvk_per_arch(destroy_device)(struct panvk_device *device, const VkAllocationCallbacks *pAllocator); -#if PAN_ARCH >= 10 +static inline VkResult +panvk_common_check_status(struct panvk_device *dev) +{ + return vk_check_printf_status(&dev->vk, &dev->printf.ctx); +} + VkResult panvk_per_arch(device_check_status)(struct vk_device *vk_dev); +#if PAN_ARCH >= 10 VkResult panvk_per_arch(init_tiler_oom)(struct panvk_device *device); #endif #endif diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 54debdeff6f..62dfa51cb3d 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -76,6 +76,7 @@ struct panvk_graphics_sysvals { } vs; aligned_u64 push_consts; + aligned_u64 printf_buffer_address; #if PAN_ARCH <= 7 /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per @@ -112,6 +113,7 @@ struct panvk_compute_sysvals { } local_group_size; aligned_u64 push_consts; + aligned_u64 printf_buffer_address; #if PAN_ARCH <= 7 struct { diff --git a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c index 2ecbb874e4c..55f9c640997 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c @@ -13,6 +13,8 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info) { const struct panvk_shader *shader = cmdbuf->state.compute.shader; + const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0}; /* In indirect case, some sysvals are read from the indirect dispatch @@ -40,6 +42,8 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( shader->local_size.y); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z, shader->local_size.z); + set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address, + dev->printf.bo->addr.dev); #if PAN_ARCH <= 7 struct panvk_descriptor_state *desc_state = diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 109044c3c9a..1fdc32ae599 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -548,11 +548,14 @@ void panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_info *info) { + const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; const struct panvk_shader *fs = get_fs(cmdbuf); uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0; BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0}; + set_gfx_sysval(cmdbuf, dirty_sysvals, printf_buffer_address, + dev->printf.bo->addr.dev); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings, noperspective_varyings); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base); diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index cfa5f24b192..9c7a66933af 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -30,7 +30,9 @@ #include "genxml/decode.h" #include "genxml/gen_macros.h" +#include "clc/panfrost_compile.h" #include "kmod/pan_kmod.h" +#include "util/u_printf.h" #include "pan_props.h" #include "pan_samples.h" @@ -272,9 +274,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, device->vk.command_dispatch_table = &device->cmd_dispatch; device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops); device->vk.shader_ops = &panvk_per_arch(device_shader_ops); -#if PAN_ARCH >= 10 device->vk.check_status = panvk_per_arch(device_check_status); -#endif device->kmod.allocator = (struct pan_kmod_allocator){ .zalloc = panvk_kmod_zalloc, @@ -342,6 +342,15 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, goto err_free_priv_bos; #endif + result = panvk_priv_bo_create(device, LIBPAN_PRINTF_BUFFER_SIZE, 0, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + &device->printf.bo); + if (result != VK_SUCCESS) + goto err_free_priv_bos; + + u_printf_init(&device->printf.ctx, device->printf.bo, + device->printf.bo->addr.host); + vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd); @@ -404,6 +413,9 @@ err_finish_queues: err_free_precomp: panvk_precomp_cleanup(device); err_free_priv_bos: + if (device->printf.bo) + u_printf_destroy(&device->printf.ctx); + panvk_priv_bo_unref(device->printf.bo); panvk_priv_bo_unref(device->tiler_oom.handlers_bo); panvk_priv_bo_unref(device->sample_positions); panvk_priv_bo_unref(device->tiler_heap); @@ -441,6 +453,8 @@ panvk_per_arch(destroy_device)(struct panvk_device *device, panvk_precomp_cleanup(device); panvk_meta_cleanup(device); + u_printf_destroy(&device->printf.ctx); + panvk_priv_bo_unref(device->printf.bo); panvk_priv_bo_unref(device->tiler_oom.handlers_bo); panvk_priv_bo_unref(device->tiler_heap); panvk_priv_bo_unref(device->sample_positions); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 50cd8d248fd..0120246a0dd 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -118,6 +118,13 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) val = nir_imm_int(b, 0); break; + case nir_intrinsic_load_printf_buffer_address: + if (b->shader->info.stage == MESA_SHADER_COMPUTE) + val = load_sysval(b, compute, bit_size, printf_buffer_address); + else + val = load_sysval(b, graphics, bit_size, printf_buffer_address); + break; + default: return false; }