diff --git a/src/nouveau/compiler/nak.h b/src/nouveau/compiler/nak.h index 91266490fdb..14fd75d12c1 100644 --- a/src/nouveau/compiler/nak.h +++ b/src/nouveau/compiler/nak.h @@ -85,9 +85,22 @@ struct nak_constant_offset_info { * sample in a multi-pass fragment shader invocaiton. */ uint32_t sample_masks_offset; + + /** + * The offset into cb0 for the printf buffer pointer. + */ + uint32_t printf_buffer_offset; }; const extern struct nak_constant_offset_info nak_const_offsets; +#define NAK_PRINTF_BUFFER_SIZE 0x40000 + +#ifdef NDEBUG +#define NAK_CAN_PRINTF false +#else +#define NAK_CAN_PRINTF true +#endif + void nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak, nir_variable_mode robust2_modes, const struct nak_fs_key *fs_key); diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index cb60901ef89..253bf332857 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -824,6 +824,34 @@ nak_nir_remove_barrier_intrin(nir_builder *b, nir_intrinsic_instr *barrier, return progress; } +static bool +nak_nir_lower_printf_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + void *data) +{ + b->cursor = nir_before_instr(&intrin->instr); + if (intrin->intrinsic == nir_intrinsic_load_printf_buffer_address) { + nir_def *buffer_addr = nir_ldc_nv( + b, 1, 64, + nir_imm_int(b, 0), + nir_imm_int(b, nak_const_offsets.printf_buffer_offset)); + nir_def_replace(&intrin->def, buffer_addr); + return true; + } else if (intrin->intrinsic == nir_intrinsic_load_printf_buffer_size) { + nir_def_replace(&intrin->def, + nir_imm_int(b, NAK_PRINTF_BUFFER_SIZE)); + return true; + } else { + return false; + } +} + +static bool +nak_nir_lower_printf(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, nak_nir_lower_printf_intrin, + nir_metadata_none, NULL); +} + static bool nak_nir_remove_barriers(nir_shader *nir) { @@ -1307,6 +1335,9 @@ nak_postprocess_nir(nir_shader *nir, OPT(nir, nak_nir_remove_barriers); + if (NAK_CAN_PRINTF) + OPT(nir, nak_nir_lower_printf); + /* Call divergence analysis regardless of sm version. */ nir_divergence_analysis(nir); diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index bece9b0fa65..abcd38974c1 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -1173,6 +1173,20 @@ nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd, } } +void +nvk_cmd_buffer_flush_printf_buffer(struct nvk_cmd_buffer *cmd, + struct nvk_descriptor_state *desc) +{ + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + + if (!NAK_CAN_PRINTF) + return; + + struct nvkmd_mem *bo = (struct nvkmd_mem *) dev->printf.bo; + nvk_descriptor_state_set_root(cmd, desc, printf_buffer_addr, + bo->va->addr); +} + bool nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd, const struct nvk_descriptor_state *desc, diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index e73b3efc005..95ba436e607 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -60,8 +60,10 @@ struct nvk_root_descriptor_table { /* Dynamic buffer bindings */ union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS]; + uint64_t printf_buffer_addr; + /* enfore alignment to 0x100 as needed pre pascal */ - uint8_t __padding[0xb8]; + uint8_t __padding[0xb0]; }; /* helper macro for computing root descriptor byte offsets */ @@ -381,6 +383,10 @@ void nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd, struct nvk_descriptor_state *desc); +void +nvk_cmd_buffer_flush_printf_buffer(struct nvk_cmd_buffer *cmd, + struct nvk_descriptor_state *desc); + bool nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd, const struct nvk_descriptor_state *desc, diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index b3a75ddc5a2..9e47bf9a5de 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -150,6 +150,9 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd, 0, 3, base_workgroup); nvk_descriptor_state_set_root_array(cmd, desc, cs.group_count, 0, 3, global_size); + + if (NAK_CAN_PRINTF) + nvk_cmd_buffer_flush_printf_buffer(cmd, desc); } static VkResult @@ -357,6 +360,11 @@ nvk_cmd_dispatch_shader(struct nvk_cmd_buffer *cmd, assert(push_size <= sizeof(root.push)); memcpy(root.push, push_data, push_size); + if (NAK_CAN_PRINTF) { + struct nvkmd_mem *bo = (struct nvkmd_mem *)dev->printf.bo; + root.printf_buffer_addr = bo->va->addr; + } + uint64_t qmd_addr; VkResult result = nvk_cmd_upload_qmd(cmd, shader, NULL, &root, root.cs.group_count, diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 9054264d7b3..1d83eb7b6ae 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -3950,6 +3950,9 @@ nvk_cmd_flush_gfx_state(struct nvk_cmd_buffer *cmd) nvk_cmd_flush_gfx_dynamic_state(cmd); nvk_cmd_flush_gfx_shaders(cmd); nvk_cmd_flush_gfx_cbufs(cmd); + + if (NAK_CAN_PRINTF) + nvk_cmd_buffer_flush_printf_buffer(cmd, &cmd->state.gfx.descriptors); } void diff --git a/src/nouveau/vulkan/nvk_device.c b/src/nouveau/vulkan/nvk_device.c index 506f7c39026..62c2bfd3dfd 100644 --- a/src/nouveau/vulkan/nvk_device.c +++ b/src/nouveau/vulkan/nvk_device.c @@ -14,6 +14,8 @@ #include "vk_drm_syncobj.h" #include "vk_pipeline_cache.h" +#include "vk_debug_utils.h" +#include "util/u_printf.h" #include "vulkan/wsi/wsi_common.h" #include "cl9097.h" @@ -121,6 +123,46 @@ nvk_slm_area_ensure(struct nvk_device *dev, return VK_SUCCESS; } +static VkResult +nvk_init_printf(struct nvk_device *dev) +{ + VkResult result; + struct nvkmd_mem *mem; + const uint64_t mem_size = NAK_PRINTF_BUFFER_SIZE; + + result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base, + mem_size, 0 /* align_B */, + NVKMD_MEM_GART | NVKMD_MEM_COHERENT, + NVKMD_MEM_MAP_RDWR, + &mem); + + if (result != VK_SUCCESS) + return result; + + u_printf_init(&dev->printf, mem, mem->map); + + return VK_SUCCESS; +} + +static void +nvk_destroy_printf(struct nvk_device *dev) { + struct nvkmd_mem *mem = dev->printf.bo; + u_printf_destroy(&dev->printf); + nvkmd_mem_unref(mem); +} + +static VkResult +nvk_device_check_status(struct vk_device *vk_dev) +{ + VkResult status = VK_SUCCESS; + struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk); + + if (NAK_CAN_PRINTF) + status = vk_check_printf_status(&dev->vk, &dev->printf); + + return status; +} + static VkResult nvk_device_get_timestamp(struct vk_device *vk_dev, uint64_t *timestamp) { @@ -156,6 +198,7 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_alloc; dev->vk.shader_ops = &nvk_device_shader_ops; + dev->vk.check_status = &nvk_device_check_status; uint32_t queue_count = 0; for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) @@ -300,6 +343,12 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_mem_cache; } + if (queue_count > 0 && NAK_CAN_PRINTF) { + result = nvk_init_printf(dev); + if (result != VK_SUCCESS) + goto fail_mem_cache; + } + *pDevice = nvk_device_to_handle(dev); return VK_SUCCESS; @@ -350,6 +399,9 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) const struct nvk_physical_device *pdev = nvk_device_physical(dev); + if (dev->nvkmd && NAK_CAN_PRINTF) + nvk_destroy_printf(dev); + if (dev->copy_queries) vk_shader_destroy(&dev->vk, &dev->copy_queries->vk, &dev->vk.alloc); diff --git a/src/nouveau/vulkan/nvk_device.h b/src/nouveau/vulkan/nvk_device.h index 39d8a4f9f70..bbef86faf5a 100644 --- a/src/nouveau/vulkan/nvk_device.h +++ b/src/nouveau/vulkan/nvk_device.h @@ -50,6 +50,8 @@ struct nvk_device { struct nvk_slm_area slm; struct nvkmd_mem *vab_memory; + struct u_printf_ctx printf; + struct vk_meta_device meta; struct nvk_shader *copy_queries; diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 49eb2d94ffa..917eab011de 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -38,6 +38,7 @@ const struct nak_constant_offset_info nak_const_offsets = { .sample_info_cb = 0, .sample_locations_offset = nvk_root_descriptor_offset(draw.sample_locations), .sample_masks_offset = nvk_root_descriptor_offset(draw.sample_masks), + .printf_buffer_offset = nvk_root_descriptor_offset(printf_buffer_addr), }; static void