mesa/src/amd/vulkan/radv_debug_nir.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

507 lines
16 KiB
C
Raw Normal View History

/*
* Copyright © 2024 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "radv_debug_nir.h"
#include "radv_device.h"
#include "radv_physical_device.h"
#include "util/hash_table.h"
#include "util/strndup.h"
#include "util/u_printf.h"
#include "nir.h"
#include "nir_builder.h"
static struct hash_table *device_ht = NULL;
VkResult
radv_printf_data_init(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_printf_data *printf = &device->debug_nir.printf;
printf->formats = UTIL_DYNARRAY_INIT;
printf->buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
if (printf->buffer_size < sizeof(struct radv_printf_buffer_header))
return VK_SUCCESS;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext =
&(VkBufferUsageFlags2CreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
.usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT,
},
.size = printf->buffer_size,
};
VkDevice _device = radv_device_to_handle(device);
VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &printf->buffer);
if (result != VK_SUCCESS)
return result;
VkMemoryRequirements requirements;
device->vk.dispatch_table.GetBufferMemoryRequirements(_device, printf->buffer, &requirements);
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = requirements.size,
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &printf->memory);
if (result != VK_SUCCESS)
return result;
result = device->vk.dispatch_table.MapMemory(_device, printf->memory, 0, VK_WHOLE_SIZE, 0, (void **)&printf->data);
if (result != VK_SUCCESS)
return result;
result = device->vk.dispatch_table.BindBufferMemory(_device, printf->buffer, printf->memory, 0);
if (result != VK_SUCCESS)
return result;
struct radv_printf_buffer_header *header = printf->data;
header->offset = sizeof(struct radv_printf_buffer_header);
header->size = printf->buffer_size;
VkBufferDeviceAddressInfo addr_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
.buffer = printf->buffer,
};
printf->buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
return VK_SUCCESS;
}
void
radv_printf_data_finish(struct radv_device *device)
{
VkDevice _device = radv_device_to_handle(device);
struct radv_printf_data *printf = &device->debug_nir.printf;
device->vk.dispatch_table.DestroyBuffer(_device, printf->buffer, NULL);
if (printf->memory)
device->vk.dispatch_table.UnmapMemory(_device, printf->memory);
device->vk.dispatch_table.FreeMemory(_device, printf->memory, NULL);
util_dynarray_foreach (&printf->formats, struct radv_printf_format, format)
free(format->string);
util_dynarray_fini(&printf->formats);
}
static bool
radv_shader_printf_enabled(nir_shader *shader)
{
if (!device_ht)
return false;
struct radv_device *device = _mesa_hash_table_search(device_ht, shader)->data;
struct radv_printf_data *printf = &device->debug_nir.printf;
return !!printf->buffer_addr;
}
void
radv_build_printf_args(nir_builder *b, nir_def *cond, const char *format_string, uint32_t argc, nir_def **in_args)
{
if (!radv_shader_printf_enabled(b->shader))
return;
struct radv_printf_format format = {0};
format.string = strdup(format_string);
if (!format.string)
return;
struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
struct radv_printf_data *printf = &device->debug_nir.printf;
uint32_t format_index = util_dynarray_num_elements(&printf->formats, struct radv_printf_format);
if (cond)
nir_push_if(b, cond);
if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
nir_push_if(b, nir_inot(b, nir_is_helper_invocation(b, 1)));
nir_def *size = nir_imm_int(b, 4);
nir_def **args = malloc(argc * sizeof(nir_def *));
nir_def **strides = malloc(argc * sizeof(nir_def *));
nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
nir_def *active_invocation_count = nir_bit_count(b, ballot);
for (uint32_t i = 0; i < argc; i++) {
nir_def *arg = in_args[i];
bool divergent = arg->divergent;
if (arg->bit_size == 1)
arg = nir_b2i32(b, arg);
args[i] = arg;
uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
format.element_sizes[i] = arg_size;
if (divergent) {
strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
format.divergence_mask |= BITFIELD_BIT(i);
} else {
strides[i] = nir_imm_int(b, arg_size);
}
size = nir_iadd(b, size, strides[i]);
}
nir_def *offset;
nir_def *undef;
nir_push_if(b, nir_elect(b, 1));
{
offset = nir_global_atomic(
b, 32, nir_imm_int64(b, printf->buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
.atomic_op = nir_atomic_op_iadd);
}
nir_push_else(b, NULL);
{
undef = nir_undef(b, 1, 32);
}
nir_pop_if(b, NULL);
offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
nir_def *buffer_size = nir_load_global(
b, 1, 32, nir_imm_int64(b, printf->buffer_addr + offsetof(struct radv_printf_buffer_header, size)));
nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
{
nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), printf->buffer_addr);
/* header */
nir_store_global(b, nir_ior_imm(b, active_invocation_count, format_index << 16), addr);
addr = nir_iadd_imm(b, addr, 4);
for (uint32_t i = 0; i < argc; i++) {
nir_def *arg = args[i];
if (arg->divergent) {
nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
nir_store_global(
b, arg, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))));
} else {
nir_store_global(b, arg, addr, );
}
addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
}
}
nir_pop_if(b, NULL);
if (cond)
nir_pop_if(b, NULL);
if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
nir_pop_if(b, NULL);
free(args);
free(strides);
util_dynarray_append(&printf->formats, format);
}
void
radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
{
if (!radv_shader_printf_enabled(b->shader))
return;
va_list arg_list;
va_start(arg_list, format_string);
uint32_t num_args = 0;
for (uint32_t i = 0; i < strlen(format_string); i++)
if (format_string[i] == '%')
num_args++;
nir_def **args = malloc(num_args * sizeof(nir_def *));
for (uint32_t i = 0; i < num_args; i++)
args[i] = va_arg(arg_list, nir_def *);
va_end(arg_list);
radv_build_printf_args(b, cond, format_string, num_args, args);
free(args);
}
void
radv_dump_printf_data(struct radv_device *device, FILE *out)
{
struct radv_printf_data *printf = &device->debug_nir.printf;
if (!printf->data)
return;
device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
struct radv_printf_buffer_header *header = printf->data;
uint8_t *data = printf->data;
for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
uint32_t printf_header = *(uint32_t *)&data[offset];
offset += sizeof(uint32_t);
uint32_t format_index = printf_header >> 16;
struct radv_printf_format *printf_format =
util_dynarray_element(&printf->formats, struct radv_printf_format, format_index);
uint32_t invocation_count = printf_header & 0xFFFF;
uint32_t num_args = 0;
for (uint32_t i = 0; i < strlen(printf_format->string); i++)
if (printf_format->string[i] == '%')
num_args++;
char *format = printf_format->string;
for (uint32_t i = 0; i <= num_args; i++) {
size_t spec_pos = util_printf_next_spec_pos(format, 0);
if (spec_pos == -1) {
fprintf(out, "%s", format);
continue;
}
const char *token = util_printf_prev_tok(&format[spec_pos]);
char *next_format = &format[spec_pos + 1];
/* print the part before the format token */
if (token != format)
fwrite(format, token - format, 1, out);
char *print_str = strndup(token, next_format - token);
/* rebase spec_pos so we can use it with print_str */
spec_pos += format - token;
size_t element_size = printf_format->element_sizes[i];
bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
for (uint32_t lane = 0; lane < lane_count; lane++) {
switch (element_size) {
case 1: {
uint8_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
break;
}
case 2: {
uint16_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
break;
}
case 4: {
if (is_float) {
float v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
} else {
uint32_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
}
break;
}
case 8: {
if (is_float) {
double v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
} else {
uint64_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
}
break;
}
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("Unsupported data type");
}
if (lane != lane_count - 1)
fprintf(out, " ");
offset += element_size;
}
/* rebase format */
format = next_format;
free(print_str);
}
}
fflush(out);
header->offset = sizeof(struct radv_printf_buffer_header);
}
#define RADV_VA_VALIDATION_BITS 40
#define RADV_VA_VALIDATION_BIT_COUNT (1ull << RADV_VA_VALIDATION_BITS)
#define RADV_VA_VALIDATION_GRANULARITY_BYTES 4096
VkResult
radv_init_va_validation(struct radv_device *device)
{
struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t size = RADV_VA_VALIDATION_BIT_COUNT / RADV_VA_VALIDATION_GRANULARITY_BYTES / 8;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext =
&(VkBufferUsageFlags2CreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
.usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT,
},
.size = size,
};
VkDevice _device = radv_device_to_handle(device);
VkResult result =
device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->va_validation_buffer);
if (result != VK_SUCCESS)
return result;
VkMemoryRequirements requirements;
device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->va_validation_buffer, &requirements);
VkMemoryAllocateFlagsInfo alloc_flags_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
.flags = VK_MEMORY_ALLOCATE_ZERO_INITIALIZE_BIT_EXT,
};
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &alloc_flags_info,
.allocationSize = requirements.size,
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->va_validation_memory);
if (result != VK_SUCCESS)
return result;
void *data = NULL;
result = device->vk.dispatch_table.MapMemory(_device, device->va_validation_memory, 0, VK_WHOLE_SIZE, 0, &data);
if (result != VK_SUCCESS)
return result;
device->valid_vas = data;
memset(data, 0, size);
result = device->vk.dispatch_table.BindBufferMemory(_device, device->va_validation_buffer,
device->va_validation_memory, 0);
if (result != VK_SUCCESS)
return result;
VkBufferDeviceAddressInfo addr_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
.buffer = device->va_validation_buffer,
};
device->valid_vas_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
return VK_SUCCESS;
}
void
radv_finish_va_validation(struct radv_device *device)
{
VkDevice _device = radv_device_to_handle(device);
device->valid_vas = NULL;
device->vk.dispatch_table.DestroyBuffer(_device, device->va_validation_buffer, NULL);
if (device->va_validation_memory)
device->vk.dispatch_table.UnmapMemory(_device, device->va_validation_memory);
device->vk.dispatch_table.FreeMemory(_device, device->va_validation_memory, NULL);
}
void
radv_va_validation_update_page(struct radv_device *device, uint64_t va, uint64_t size, bool valid)
{
if (!device->valid_vas)
return;
struct radv_physical_device *pdev = radv_device_physical(device);
assert(!(((va >> 32) & ~pdev->info.address32_hi) >> (RADV_VA_VALIDATION_BITS - 32)));
uint64_t start = (va & BITFIELD64_MASK(RADV_VA_VALIDATION_BITS)) / RADV_VA_VALIDATION_GRANULARITY_BYTES;
uint64_t end = start + size / RADV_VA_VALIDATION_GRANULARITY_BYTES;
assert(end > 0);
assert(end <= RADV_VA_VALIDATION_BIT_COUNT);
if (valid)
BITSET_SET_RANGE(device->valid_vas, start, end - 1);
else
BITSET_CLEAR_RANGE(device->valid_vas, start, end - 1);
}
nir_def *
radv_build_is_valid_va(nir_builder *b, nir_def *addr)
{
if (!device_ht)
return NULL;
struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
if (!device->valid_vas_addr)
return NULL;
nir_def *masked_addr = nir_iand_imm(b, addr, BITFIELD64_MASK(RADV_VA_VALIDATION_BITS));
nir_def *then_valid;
nir_def *else_valid;
nir_push_if(b, nir_ult_imm(b, masked_addr, RADV_VA_VALIDATION_BIT_COUNT * RADV_VA_VALIDATION_GRANULARITY_BYTES));
{
nir_def *index = nir_u2u32(b, nir_udiv_imm(b, masked_addr, RADV_VA_VALIDATION_GRANULARITY_BYTES));
nir_def *offset = nir_imul_imm(b, nir_udiv_imm(b, index, 32), 4);
nir_def *dword =
nir_load_global(b, 1, 32, nir_iadd_imm(b, nir_u2u64(b, offset), device->valid_vas_addr), .align_mul = 4);
index = nir_umod_imm(b, index, 32);
then_valid = nir_bitnz(b, dword, index);
}
nir_push_else(b, NULL);
{
else_valid = nir_imm_false(b);
}
nir_pop_if(b, NULL);
nir_def *valid = nir_if_phi(b, then_valid, else_valid);
radv_build_printf(b, nir_inot(b, valid), "radv: Invalid VA %lx\n", addr);
return valid;
}
void
radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
{
struct radv_printf_data *printf = &device->debug_nir.printf;
if (!printf->buffer_addr && !device->valid_vas_addr)
return;
if (!device_ht)
device_ht = _mesa_pointer_hash_table_create(NULL);
_mesa_hash_table_insert(device_ht, nir, device);
}