mesa/src/amd/vulkan/radv_debug_nir.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

499 lines
16 KiB
C
Raw Normal View History

/*
* Copyright © 2024 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "radv_debug_nir.h"
#include "radv_device.h"
#include "radv_physical_device.h"
#include "util/hash_table.h"
#include "util/strndup.h"
#include "util/u_printf.h"
#include "nir.h"
#include "nir_builder.h"
static struct hash_table *device_ht = NULL;
VkResult
radv_printf_data_init(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
device->printf.formats = UTIL_DYNARRAY_INIT;
device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
if (device->printf.buffer_size < sizeof(struct radv_printf_buffer_header))
return VK_SUCCESS;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext =
&(VkBufferUsageFlags2CreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
.usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT,
},
.size = device->printf.buffer_size,
};
VkDevice _device = radv_device_to_handle(device);
VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->printf.buffer);
if (result != VK_SUCCESS)
return result;
VkMemoryRequirements requirements;
device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->printf.buffer, &requirements);
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = requirements.size,
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
if (result != VK_SUCCESS)
return result;
result = device->vk.dispatch_table.MapMemory(_device, device->printf.memory, 0, VK_WHOLE_SIZE, 0,
(void **)&device->printf.data);
if (result != VK_SUCCESS)
return result;
result = device->vk.dispatch_table.BindBufferMemory(_device, device->printf.buffer, device->printf.memory, 0);
if (result != VK_SUCCESS)
return result;
struct radv_printf_buffer_header *header = device->printf.data;
header->offset = sizeof(struct radv_printf_buffer_header);
header->size = device->printf.buffer_size;
VkBufferDeviceAddressInfo addr_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
.buffer = device->printf.buffer,
};
device->printf.buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
return VK_SUCCESS;
}
void
radv_printf_data_finish(struct radv_device *device)
{
VkDevice _device = radv_device_to_handle(device);
device->vk.dispatch_table.DestroyBuffer(_device, device->printf.buffer, NULL);
if (device->printf.memory)
device->vk.dispatch_table.UnmapMemory(_device, device->printf.memory);
device->vk.dispatch_table.FreeMemory(_device, device->printf.memory, NULL);
util_dynarray_foreach (&device->printf.formats, struct radv_printf_format, format)
free(format->string);
util_dynarray_fini(&device->printf.formats);
}
static bool
radv_shader_printf_enabled(nir_shader *shader)
{
if (!device_ht)
return false;
struct radv_device *device = _mesa_hash_table_search(device_ht, shader)->data;
return !!device->printf.buffer_addr;
}
void
radv_build_printf_args(nir_builder *b, nir_def *cond, const char *format_string, uint32_t argc, nir_def **in_args)
{
if (!radv_shader_printf_enabled(b->shader))
return;
struct radv_printf_format format = {0};
format.string = strdup(format_string);
if (!format.string)
return;
struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
uint32_t format_index = util_dynarray_num_elements(&device->printf.formats, struct radv_printf_format);
if (cond)
nir_push_if(b, cond);
if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
nir_push_if(b, nir_inot(b, nir_is_helper_invocation(b, 1)));
nir_def *size = nir_imm_int(b, 4);
nir_def **args = malloc(argc * sizeof(nir_def *));
nir_def **strides = malloc(argc * sizeof(nir_def *));
nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
nir_def *active_invocation_count = nir_bit_count(b, ballot);
for (uint32_t i = 0; i < argc; i++) {
nir_def *arg = in_args[i];
bool divergent = arg->divergent;
if (arg->bit_size == 1)
arg = nir_b2i32(b, arg);
args[i] = arg;
uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
format.element_sizes[i] = arg_size;
if (divergent) {
strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
format.divergence_mask |= BITFIELD_BIT(i);
} else {
strides[i] = nir_imm_int(b, arg_size);
}
size = nir_iadd(b, size, strides[i]);
}
nir_def *offset;
nir_def *undef;
nir_push_if(b, nir_elect(b, 1));
{
offset = nir_global_atomic(
b, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
.atomic_op = nir_atomic_op_iadd);
}
nir_push_else(b, NULL);
{
undef = nir_undef(b, 1, 32);
}
nir_pop_if(b, NULL);
offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
nir_def *buffer_size = nir_load_global(
b, 1, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)));
nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
{
nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), device->printf.buffer_addr);
/* header */
nir_store_global(b, nir_ior_imm(b, active_invocation_count, format_index << 16), addr);
addr = nir_iadd_imm(b, addr, 4);
for (uint32_t i = 0; i < argc; i++) {
nir_def *arg = args[i];
if (arg->divergent) {
nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
nir_store_global(
b, arg, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))));
} else {
nir_store_global(b, arg, addr, );
}
addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
}
}
nir_pop_if(b, NULL);
if (cond)
nir_pop_if(b, NULL);
if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
nir_pop_if(b, NULL);
free(args);
free(strides);
util/dynarray: infer type in append Most of the time, we can infer the type to append in util_dynarray_append using __typeof__, which is standardized in C23 and support in Jesse's MSMSVCV. This patch drops the type argument most of the time, making util_dynarray a little more ergonomic to use. This is done in four steps. First, rename util_dynarray_append -> util_dynarray_append_typed bash -c "find . -type f -exec sed -i -e 's/util_dynarray_append(/util_dynarray_append_typed(/g' \{} \;" Then, add a new append that infers the type. This is much more ergonomic for what you want most of the time. Next, use type-inferred append as much as possible, via Coccinelle patch (plus manual fixup): @@ expression dynarray, element; type type; @@ -util_dynarray_append_typed(dynarray, type, element); +util_dynarray_append(dynarray, element); Finally, hand fixup cases that Coccinelle missed or incorrectly translated, of which there were several because we can't used the untyped append with a literal (since the sizeof won't do what you want). All four steps are squashed to produce a single patch changing every util_dynarray_append call site in tree to either drop a type parameter (if possible) or insert a _typed suffix (if we can't infer). As such, the final patch is best reviewed by hand even though it was tool-assisted. No Long Linguine Meals were involved in the making of this patch. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38038>
2025-10-23 15:36:13 -04:00
util_dynarray_append(&device->printf.formats, format);
}
void
radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
{
if (!radv_shader_printf_enabled(b->shader))
return;
va_list arg_list;
va_start(arg_list, format_string);
uint32_t num_args = 0;
for (uint32_t i = 0; i < strlen(format_string); i++)
if (format_string[i] == '%')
num_args++;
nir_def **args = malloc(num_args * sizeof(nir_def *));
for (uint32_t i = 0; i < num_args; i++)
args[i] = va_arg(arg_list, nir_def *);
va_end(arg_list);
radv_build_printf_args(b, cond, format_string, num_args, args);
free(args);
}
void
radv_dump_printf_data(struct radv_device *device, FILE *out)
{
if (!device->printf.data)
return;
device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
struct radv_printf_buffer_header *header = device->printf.data;
uint8_t *data = device->printf.data;
for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
uint32_t printf_header = *(uint32_t *)&data[offset];
offset += sizeof(uint32_t);
uint32_t format_index = printf_header >> 16;
struct radv_printf_format *printf_format =
util_dynarray_element(&device->printf.formats, struct radv_printf_format, format_index);
uint32_t invocation_count = printf_header & 0xFFFF;
uint32_t num_args = 0;
for (uint32_t i = 0; i < strlen(printf_format->string); i++)
if (printf_format->string[i] == '%')
num_args++;
char *format = printf_format->string;
for (uint32_t i = 0; i <= num_args; i++) {
size_t spec_pos = util_printf_next_spec_pos(format, 0);
if (spec_pos == -1) {
fprintf(out, "%s", format);
continue;
}
const char *token = util_printf_prev_tok(&format[spec_pos]);
char *next_format = &format[spec_pos + 1];
/* print the part before the format token */
if (token != format)
fwrite(format, token - format, 1, out);
char *print_str = strndup(token, next_format - token);
/* rebase spec_pos so we can use it with print_str */
spec_pos += format - token;
size_t element_size = printf_format->element_sizes[i];
bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
for (uint32_t lane = 0; lane < lane_count; lane++) {
switch (element_size) {
case 1: {
uint8_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
break;
}
case 2: {
uint16_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
break;
}
case 4: {
if (is_float) {
float v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
} else {
uint32_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
}
break;
}
case 8: {
if (is_float) {
double v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
} else {
uint64_t v;
memcpy(&v, &data[offset], element_size);
fprintf(out, print_str, v);
}
break;
}
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("Unsupported data type");
}
if (lane != lane_count - 1)
fprintf(out, " ");
offset += element_size;
}
/* rebase format */
format = next_format;
free(print_str);
}
}
fflush(out);
header->offset = sizeof(struct radv_printf_buffer_header);
}
#define RADV_VA_VALIDATION_BITS 40
#define RADV_VA_VALIDATION_BIT_COUNT (1ull << RADV_VA_VALIDATION_BITS)
#define RADV_VA_VALIDATION_GRANULARITY_BYTES 4096
VkResult
radv_init_va_validation(struct radv_device *device)
{
struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t size = RADV_VA_VALIDATION_BIT_COUNT / RADV_VA_VALIDATION_GRANULARITY_BYTES / 8;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext =
&(VkBufferUsageFlags2CreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
.usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT,
},
.size = size,
};
VkDevice _device = radv_device_to_handle(device);
VkResult result =
device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->va_validation_buffer);
if (result != VK_SUCCESS)
return result;
VkMemoryRequirements requirements;
device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->va_validation_buffer, &requirements);
VkMemoryAllocateFlagsInfo alloc_flags_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
.flags = VK_MEMORY_ALLOCATE_ZERO_INITIALIZE_BIT_EXT,
};
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &alloc_flags_info,
.allocationSize = requirements.size,
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->va_validation_memory);
if (result != VK_SUCCESS)
return result;
void *data = NULL;
result = device->vk.dispatch_table.MapMemory(_device, device->va_validation_memory, 0, VK_WHOLE_SIZE, 0, &data);
if (result != VK_SUCCESS)
return result;
device->valid_vas = data;
memset(data, 0, size);
result = device->vk.dispatch_table.BindBufferMemory(_device, device->va_validation_buffer,
device->va_validation_memory, 0);
if (result != VK_SUCCESS)
return result;
VkBufferDeviceAddressInfo addr_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
.buffer = device->va_validation_buffer,
};
device->valid_vas_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
return VK_SUCCESS;
}
void
radv_finish_va_validation(struct radv_device *device)
{
VkDevice _device = radv_device_to_handle(device);
device->valid_vas = NULL;
device->vk.dispatch_table.DestroyBuffer(_device, device->va_validation_buffer, NULL);
if (device->va_validation_memory)
device->vk.dispatch_table.UnmapMemory(_device, device->va_validation_memory);
device->vk.dispatch_table.FreeMemory(_device, device->va_validation_memory, NULL);
}
void
radv_va_validation_update_page(struct radv_device *device, uint64_t va, uint64_t size, bool valid)
{
if (!device->valid_vas)
return;
struct radv_physical_device *pdev = radv_device_physical(device);
assert(!(((va >> 32) & ~pdev->info.address32_hi) >> (RADV_VA_VALIDATION_BITS - 32)));
uint64_t start = (va & BITFIELD64_MASK(RADV_VA_VALIDATION_BITS)) / RADV_VA_VALIDATION_GRANULARITY_BYTES;
uint64_t end = start + size / RADV_VA_VALIDATION_GRANULARITY_BYTES;
assert(end > 0);
assert(end <= RADV_VA_VALIDATION_BIT_COUNT);
if (valid)
BITSET_SET_RANGE(device->valid_vas, start, end - 1);
else
BITSET_CLEAR_RANGE(device->valid_vas, start, end - 1);
}
nir_def *
radv_build_is_valid_va(nir_builder *b, nir_def *addr)
{
if (!device_ht)
return NULL;
struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
if (!device->valid_vas_addr)
return NULL;
nir_def *masked_addr = nir_iand_imm(b, addr, BITFIELD64_MASK(RADV_VA_VALIDATION_BITS));
nir_def *then_valid;
nir_def *else_valid;
nir_push_if(b, nir_ult_imm(b, masked_addr, RADV_VA_VALIDATION_BIT_COUNT * RADV_VA_VALIDATION_GRANULARITY_BYTES));
{
nir_def *index = nir_u2u32(b, nir_udiv_imm(b, masked_addr, RADV_VA_VALIDATION_GRANULARITY_BYTES));
nir_def *offset = nir_imul_imm(b, nir_udiv_imm(b, index, 32), 4);
nir_def *dword =
nir_load_global(b, 1, 32, nir_iadd_imm(b, nir_u2u64(b, offset), device->valid_vas_addr), .align_mul = 4);
index = nir_umod_imm(b, index, 32);
then_valid = nir_bitnz(b, dword, index);
}
nir_push_else(b, NULL);
{
else_valid = nir_imm_false(b);
}
nir_pop_if(b, NULL);
nir_def *valid = nir_if_phi(b, then_valid, else_valid);
radv_build_printf(b, nir_inot(b, valid), "radv: Invalid VA %lx\n", addr);
return valid;
}
void
radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
{
if (!device->printf.buffer_addr && !device->valid_vas_addr)
return;
if (!device_ht)
device_ht = _mesa_pointer_hash_table_create(NULL);
_mesa_hash_table_insert(device_ht, nir, device);
}