/* * Copyright © 2024 Valve Corporation * * SPDX-License-Identifier: MIT */ #include "radv_debug_nir.h" #include "radv_device.h" #include "radv_physical_device.h" #include "util/strndup.h" #include "util/u_printf.h" #include "nir.h" #include "nir_builder.h" VkResult radv_printf_data_init(struct radv_device *device) { const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_printf_data *printf = &device->debug_nir.printf; printf->formats = UTIL_DYNARRAY_INIT; printf->buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0); if (printf->buffer_size < sizeof(struct radv_printf_buffer_header)) return VK_SUCCESS; VkBufferCreateInfo buffer_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = &(VkBufferUsageFlags2CreateInfo){ .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO, .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT, }, .size = printf->buffer_size, }; VkDevice _device = radv_device_to_handle(device); VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &printf->buffer); if (result != VK_SUCCESS) return result; VkMemoryRequirements requirements; device->vk.dispatch_table.GetBufferMemoryRequirements(_device, printf->buffer, &requirements); VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = requirements.size, .memoryTypeIndex = radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), }; result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &printf->memory); if (result != VK_SUCCESS) return result; result = device->vk.dispatch_table.MapMemory(_device, printf->memory, 0, VK_WHOLE_SIZE, 0, (void **)&printf->data); if (result != VK_SUCCESS) return result; result = device->vk.dispatch_table.BindBufferMemory(_device, printf->buffer, printf->memory, 0); if (result != VK_SUCCESS) return result; struct radv_printf_buffer_header *header = printf->data; header->offset = sizeof(struct radv_printf_buffer_header); header->size = printf->buffer_size; VkBufferDeviceAddressInfo addr_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, .buffer = printf->buffer, }; printf->buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info); return VK_SUCCESS; } void radv_printf_data_finish(struct radv_device *device) { VkDevice _device = radv_device_to_handle(device); struct radv_printf_data *printf = &device->debug_nir.printf; device->vk.dispatch_table.DestroyBuffer(_device, printf->buffer, NULL); if (printf->memory) device->vk.dispatch_table.UnmapMemory(_device, printf->memory); device->vk.dispatch_table.FreeMemory(_device, printf->memory, NULL); util_dynarray_foreach (&printf->formats, struct radv_printf_format, format) free(format->string); util_dynarray_fini(&printf->formats); } void radv_build_printf_args(struct radv_debug_nir *debug_nir, nir_builder *b, nir_def *cond, const char *format_string, uint32_t argc, nir_def **in_args) { struct radv_printf_data *printf = &debug_nir->printf; if (!printf->buffer_addr) return; struct radv_printf_format format = {0}; format.string = strdup(format_string); if (!format.string) return; uint32_t format_index = util_dynarray_num_elements(&printf->formats, struct radv_printf_format); if (cond) nir_push_if(b, cond); if (b->shader->info.stage == MESA_SHADER_FRAGMENT) nir_push_if(b, nir_inot(b, nir_is_helper_invocation(b, 1))); nir_def *size = nir_imm_int(b, 4); nir_def **args = malloc(argc * sizeof(nir_def *)); nir_def **strides = malloc(argc * sizeof(nir_def *)); nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b)); nir_def *active_invocation_count = nir_bit_count(b, ballot); for (uint32_t i = 0; i < argc; i++) { nir_def *arg = in_args[i]; bool divergent = arg->divergent; if (arg->bit_size == 1) arg = nir_b2i32(b, arg); args[i] = arg; uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8; format.element_sizes[i] = arg_size; if (divergent) { strides[i] = nir_imul_imm(b, active_invocation_count, arg_size); format.divergence_mask |= BITFIELD_BIT(i); } else { strides[i] = nir_imm_int(b, arg_size); } size = nir_iadd(b, size, strides[i]); } nir_def *offset; nir_def *undef; nir_push_if(b, nir_elect(b, 1)); { offset = nir_global_atomic( b, 32, nir_imm_int64(b, printf->buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size, .atomic_op = nir_atomic_op_iadd); } nir_push_else(b, NULL); { undef = nir_undef(b, 1, 32); } nir_pop_if(b, NULL); offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef)); nir_def *buffer_size = nir_load_global( b, 1, 32, nir_imm_int64(b, printf->buffer_addr + offsetof(struct radv_printf_buffer_header, size))); nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size))); { nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), printf->buffer_addr); /* header */ nir_store_global(b, nir_ior_imm(b, active_invocation_count, format_index << 16), addr); addr = nir_iadd_imm(b, addr, 4); for (uint32_t i = 0; i < argc; i++) { nir_def *arg = args[i]; if (arg->divergent) { nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0)); nir_store_global( b, arg, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i])))); } else { nir_store_global(b, arg, addr, ); } addr = nir_iadd(b, addr, nir_u2u64(b, strides[i])); } } nir_pop_if(b, NULL); if (cond) nir_pop_if(b, NULL); if (b->shader->info.stage == MESA_SHADER_FRAGMENT) nir_pop_if(b, NULL); free(args); free(strides); util_dynarray_append(&printf->formats, format); } void radv_build_printf(struct radv_debug_nir *debug_nir, nir_builder *b, nir_def *cond, const char *format_string, ...) { struct radv_printf_data *printf = &debug_nir->printf; if (!printf->buffer_addr) return; va_list arg_list; va_start(arg_list, format_string); uint32_t num_args = 0; for (uint32_t i = 0; i < strlen(format_string); i++) if (format_string[i] == '%') num_args++; nir_def **args = malloc(num_args * sizeof(nir_def *)); for (uint32_t i = 0; i < num_args; i++) args[i] = va_arg(arg_list, nir_def *); va_end(arg_list); radv_build_printf_args(debug_nir, b, cond, format_string, num_args, args); free(args); } void radv_dump_printf_data(struct radv_device *device, FILE *out) { struct radv_printf_data *printf = &device->debug_nir.printf; if (!printf->data) return; device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device)); struct radv_printf_buffer_header *header = printf->data; uint8_t *data = printf->data; for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) { uint32_t printf_header = *(uint32_t *)&data[offset]; offset += sizeof(uint32_t); uint32_t format_index = printf_header >> 16; struct radv_printf_format *printf_format = util_dynarray_element(&printf->formats, struct radv_printf_format, format_index); uint32_t invocation_count = printf_header & 0xFFFF; uint32_t num_args = 0; for (uint32_t i = 0; i < strlen(printf_format->string); i++) if (printf_format->string[i] == '%') num_args++; char *format = printf_format->string; for (uint32_t i = 0; i <= num_args; i++) { size_t spec_pos = util_printf_next_spec_pos(format, 0); if (spec_pos == -1) { fprintf(out, "%s", format); continue; } const char *token = util_printf_prev_tok(&format[spec_pos]); char *next_format = &format[spec_pos + 1]; /* print the part before the format token */ if (token != format) fwrite(format, token - format, 1, out); char *print_str = strndup(token, next_format - token); /* rebase spec_pos so we can use it with print_str */ spec_pos += format - token; size_t element_size = printf_format->element_sizes[i]; bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL; uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1; for (uint32_t lane = 0; lane < lane_count; lane++) { switch (element_size) { case 1: { uint8_t v; memcpy(&v, &data[offset], element_size); fprintf(out, print_str, v); break; } case 2: { uint16_t v; memcpy(&v, &data[offset], element_size); fprintf(out, print_str, v); break; } case 4: { if (is_float) { float v; memcpy(&v, &data[offset], element_size); fprintf(out, print_str, v); } else { uint32_t v; memcpy(&v, &data[offset], element_size); fprintf(out, print_str, v); } break; } case 8: { if (is_float) { double v; memcpy(&v, &data[offset], element_size); fprintf(out, print_str, v); } else { uint64_t v; memcpy(&v, &data[offset], element_size); fprintf(out, print_str, v); } break; } default: UNREACHABLE("Unsupported data type"); } if (lane != lane_count - 1) fprintf(out, " "); offset += element_size; } /* rebase format */ format = next_format; free(print_str); } } fflush(out); header->offset = sizeof(struct radv_printf_buffer_header); } #define RADV_VA_VALIDATION_BITS 40 #define RADV_VA_VALIDATION_BIT_COUNT (1ull << RADV_VA_VALIDATION_BITS) #define RADV_VA_VALIDATION_GRANULARITY_BYTES 4096 VkResult radv_init_va_validation(struct radv_device *device) { struct radv_physical_device *pdev = radv_device_physical(device); struct radv_valid_va_data *valid_va = &device->debug_nir.valid_va; uint64_t size = RADV_VA_VALIDATION_BIT_COUNT / RADV_VA_VALIDATION_GRANULARITY_BYTES / 8; VkBufferCreateInfo buffer_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = &(VkBufferUsageFlags2CreateInfo){ .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO, .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT, }, .size = size, }; VkDevice _device = radv_device_to_handle(device); VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &valid_va->buffer); if (result != VK_SUCCESS) return result; VkMemoryRequirements requirements; device->vk.dispatch_table.GetBufferMemoryRequirements(_device, valid_va->buffer, &requirements); VkMemoryAllocateFlagsInfo alloc_flags_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, .flags = VK_MEMORY_ALLOCATE_ZERO_INITIALIZE_BIT_EXT, }; VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = &alloc_flags_info, .allocationSize = requirements.size, .memoryTypeIndex = radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), }; result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &valid_va->memory); if (result != VK_SUCCESS) return result; void *data = NULL; result = device->vk.dispatch_table.MapMemory(_device, valid_va->memory, 0, VK_WHOLE_SIZE, 0, &data); if (result != VK_SUCCESS) return result; valid_va->vas = data; memset(data, 0, size); result = device->vk.dispatch_table.BindBufferMemory(_device, valid_va->buffer, valid_va->memory, 0); if (result != VK_SUCCESS) return result; VkBufferDeviceAddressInfo addr_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, .buffer = valid_va->buffer, }; valid_va->buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info); return VK_SUCCESS; } void radv_finish_va_validation(struct radv_device *device) { VkDevice _device = radv_device_to_handle(device); struct radv_valid_va_data *valid_va = &device->debug_nir.valid_va; valid_va->vas = NULL; device->vk.dispatch_table.DestroyBuffer(_device, valid_va->buffer, NULL); if (valid_va->memory) device->vk.dispatch_table.UnmapMemory(_device, valid_va->memory); device->vk.dispatch_table.FreeMemory(_device, valid_va->memory, NULL); } void radv_va_validation_update_page(struct radv_device *device, uint64_t va, uint64_t size, bool valid) { struct radv_valid_va_data *valid_va = &device->debug_nir.valid_va; if (!valid_va->vas) return; struct radv_physical_device *pdev = radv_device_physical(device); assert(!(((va >> 32) & ~pdev->info.address32_hi) >> (RADV_VA_VALIDATION_BITS - 32))); uint64_t start = (va & BITFIELD64_MASK(RADV_VA_VALIDATION_BITS)) / RADV_VA_VALIDATION_GRANULARITY_BYTES; uint64_t end = start + size / RADV_VA_VALIDATION_GRANULARITY_BYTES; assert(end > 0); assert(end <= RADV_VA_VALIDATION_BIT_COUNT); if (valid) BITSET_SET_RANGE(valid_va->vas, start, end - 1); else BITSET_CLEAR_RANGE(valid_va->vas, start, end - 1); } nir_def * radv_build_is_valid_va(struct radv_debug_nir *debug_nir, nir_builder *b, nir_def *addr) { struct radv_valid_va_data *valid_va = &debug_nir->valid_va; if (!valid_va->buffer_addr) return NULL; nir_def *masked_addr = nir_iand_imm(b, addr, BITFIELD64_MASK(RADV_VA_VALIDATION_BITS)); nir_def *then_valid; nir_def *else_valid; nir_push_if(b, nir_ult_imm(b, masked_addr, RADV_VA_VALIDATION_BIT_COUNT * RADV_VA_VALIDATION_GRANULARITY_BYTES)); { nir_def *index = nir_u2u32(b, nir_udiv_imm(b, masked_addr, RADV_VA_VALIDATION_GRANULARITY_BYTES)); nir_def *offset = nir_imul_imm(b, nir_udiv_imm(b, index, 32), 4); nir_def *dword = nir_load_global(b, 1, 32, nir_iadd_imm(b, nir_u2u64(b, offset), valid_va->buffer_addr), .align_mul = 4); index = nir_umod_imm(b, index, 32); then_valid = nir_bitnz(b, dword, index); } nir_push_else(b, NULL); { else_valid = nir_imm_false(b); } nir_pop_if(b, NULL); nir_def *valid = nir_if_phi(b, then_valid, else_valid); radv_build_printf(debug_nir, b, nir_inot(b, valid), "radv: Invalid VA %lx\n", addr); return valid; }