mesa/src/amd/vulkan/radv_device.h
David Rosca e394244a89 radv: Use extra context for video encode queue with multiple VCN instances
amdgpu does VCN instance scheduling per context, so we need to use
different context in order to allow decode to run on one instance
and encode on the other one.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36642>
2025-09-01 10:56:31 +00:00

386 lines
11 KiB
C

/*
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef RADV_DEVICE_H
#define RADV_DEVICE_H
#include "ac_descriptors.h"
#include "ac_spm.h"
#include "ac_sqtt.h"
#include "util/bitset.h"
#include "util/mesa-blake3.h"
#include "radv_debug_nir.h"
#include "radv_pipeline.h"
#include "radv_queue.h"
#include "radv_radeon_winsys.h"
#include "radv_rra.h"
#include "radv_shader.h"
#include "vk_acceleration_structure.h"
#include "vk_device.h"
#include "vk_meta.h"
#include "vk_texcompress_astc.h"
#include "vk_texcompress_etc2.h"
#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
struct radv_image_view;
struct radv_cmd_stream;
enum radv_dispatch_table {
RADV_DEVICE_DISPATCH_TABLE,
RADV_ANNOTATE_DISPATCH_TABLE,
RADV_APP_DISPATCH_TABLE,
RADV_RGP_DISPATCH_TABLE,
RADV_RRA_DISPATCH_TABLE,
RADV_RMV_DISPATCH_TABLE,
RADV_CTX_ROLL_DISPATCH_TABLE,
RADV_DISPATCH_TABLE_COUNT,
};
struct radv_layer_dispatch_tables {
struct vk_device_dispatch_table annotate;
struct vk_device_dispatch_table app;
struct vk_device_dispatch_table rgp;
struct vk_device_dispatch_table rra;
struct vk_device_dispatch_table rmv;
struct vk_device_dispatch_table ctx_roll;
};
struct radv_device_cache_key {
uint32_t keep_shader_info : 1;
uint32_t image_2d_view_of_3d : 1;
uint32_t mesh_shader_queries : 1;
uint32_t primitives_generated_query : 1;
uint32_t trap_excp_flags : 4;
uint32_t reserved : 24;
};
enum radv_force_vrs {
RADV_FORCE_VRS_1x1 = 0,
RADV_FORCE_VRS_2x2,
RADV_FORCE_VRS_2x1,
RADV_FORCE_VRS_1x2,
};
struct radv_notifier {
int fd;
int watch;
bool quit;
thrd_t thread;
};
struct radv_meta_state {
VkAllocationCallbacks alloc;
VkPipelineCache cache;
uint32_t initial_cache_entries;
/*
* For on-demand pipeline creation, makes sure that
* only one thread tries to build a pipeline at the same time.
*/
mtx_t mtx;
struct {
struct radix_sort_vk *radix_sort;
struct vk_acceleration_structure_build_ops build_ops;
struct vk_acceleration_structure_build_args build_args;
} accel_struct_build;
struct vk_texcompress_etc2_state etc_decode;
struct vk_texcompress_astc_state *astc_decode;
struct vk_meta_device device;
};
struct radv_memory_trace_data {
/* ID of the PTE update event in ftrace data */
uint16_t ftrace_update_ptes_id;
uint32_t num_cpus;
int *pipe_fds;
};
struct radv_sqtt_timestamp {
uint8_t *map;
unsigned offset;
uint64_t size;
struct radeon_winsys_bo *bo;
struct list_head list;
};
#define RADV_BORDER_COLOR_COUNT 4096
#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
struct radv_device_border_color_data {
bool used[RADV_BORDER_COLOR_COUNT];
struct radeon_winsys_bo *bo;
VkClearColorValue *colors_gpu_ptr;
/* Mutex is required to guarantee vkCreateSampler thread safety
* given that we are writing to a buffer and checking color occupation */
mtx_t mutex;
};
struct radv_pso_cache_stats {
uint32_t hits;
uint32_t misses;
};
struct radv_device {
struct vk_device vk;
struct radeon_winsys *ws;
struct radv_layer_dispatch_tables layer_dispatch;
struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
struct radeon_winsys_ctx *hw_vcn_enc_ctx;
struct radv_meta_state meta_state;
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
int queue_count[RADV_MAX_QUEUE_FAMILIES];
bool pbb_allowed;
uint32_t scratch_waves;
uint32_t dispatch_initiator;
uint32_t dispatch_initiator_task;
/* MSAA sample locations.
* The first index is the sample index.
* The second index is the coordinate: X, Y. */
float sample_locations_1x[1][2];
float sample_locations_2x[2][2];
float sample_locations_4x[4][2];
float sample_locations_8x[8][2];
/* GFX7 and later */
uint32_t gfx_init_size_dw;
struct radeon_winsys_bo *gfx_init;
struct radeon_winsys_bo *trace_bo;
struct radv_trace_data *trace_data;
VkDeviceMemory va_validation_memory;
VkBuffer va_validation_buffer;
BITSET_WORD *valid_vas;
uint64_t valid_vas_addr;
/* Whether to keep shader debug info, for debugging. */
bool keep_shader_info;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct vk_pipeline_cache *mem_cache;
/*
* use different counters so MSAA MRTs get consecutive surface indices,
* even if MASK is allocated in between.
*/
uint32_t image_mrt_offset_counter;
uint32_t fmask_mrt_offset_counter;
struct list_head shader_arenas;
struct hash_table_u64 *capture_replay_arena_vas;
unsigned shader_arena_shift;
uint8_t shader_free_list_mask;
struct radv_shader_free_list shader_free_list;
struct radv_shader_free_list capture_replay_free_list;
struct list_head shader_block_obj_pool;
mtx_t shader_arena_mutex;
mtx_t shader_upload_hw_ctx_mutex;
struct radeon_winsys_ctx *shader_upload_hw_ctx;
VkSemaphore shader_upload_sem;
uint64_t shader_upload_seq;
struct list_head shader_dma_submissions;
mtx_t shader_dma_submission_list_mutex;
cnd_t shader_dma_submission_list_cond;
/* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */
bool shader_use_invisible_vram;
/* Whether to inline the compute dispatch size in user sgprs. */
bool load_grid_size_from_user_sgpr;
/* Whether the driver uses a global BO list. */
bool use_global_bo_list;
/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
int force_aniso;
/* Always disable TRUNC_COORD. */
bool disable_trunc_coord;
struct radv_device_border_color_data border_color_data;
/* Thread trace. */
struct ac_sqtt sqtt;
bool sqtt_enabled;
bool sqtt_triggered;
/* SQTT timestamps for queue events. */
simple_mtx_t sqtt_timestamp_mtx;
struct radv_sqtt_timestamp sqtt_timestamp;
/* SQTT timed cmd buffers. */
simple_mtx_t sqtt_command_pool_mtx;
struct vk_command_pool *sqtt_command_pool[2];
/* Memory trace. */
struct radv_memory_trace_data memory_trace;
/* SPM. */
struct ac_spm spm;
/* Radeon Raytracing Analyzer trace. */
struct radv_rra_trace_data rra_trace;
FILE *ctx_roll_file;
simple_mtx_t ctx_roll_mtx;
/* Trap handler. */
struct radv_shader *trap_handler_shader;
struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
uint32_t *tma_ptr;
/* Overallocation. */
bool overallocation_disallowed;
uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
mtx_t overallocation_mutex;
/* RADV_FORCE_VRS. */
struct radv_notifier notifier;
enum radv_force_vrs force_vrs;
/* Depth image for VRS when not bound by the app. */
struct {
struct radv_image *image;
struct radv_buffer *buffer; /* HTILE */
struct radv_device_memory *mem;
} vrs;
/* Prime blit sdma queue */
struct radv_queue *private_sdma_queue;
struct radv_shader_part_cache vs_prologs;
struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
struct radv_shader_part *instance_rate_vs_prologs[816];
struct radv_shader_part_cache ps_epilogs;
simple_mtx_t trace_mtx;
/* Whether per-vertex VRS is forced. */
bool force_vrs_enabled;
simple_mtx_t pstate_mtx;
unsigned pstate_cnt;
/* BO to contain some performance counter helpers:
* - A lock for profiling cmdbuffers.
* - a temporary fence for the end query synchronization.
* - the pass to use for profiling. (as an array of bools)
*/
struct radeon_winsys_bo *perf_counter_bo;
/* Interleaved lock/unlock commandbuffers for perfcounter passes. */
struct radv_cmd_stream **perf_counter_lock_cs;
bool uses_shadow_regs;
struct hash_table *rt_handles;
simple_mtx_t rt_handles_mtx;
struct radv_printf_data printf;
struct radv_device_cache_key cache_key;
blake3_hash cache_hash;
/* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
char *gpu_hang_report;
/* PSO cache stats */
simple_mtx_t pso_cache_stats_mtx;
struct radv_pso_cache_stats pso_cache_stats[RADV_PIPELINE_TYPE_COUNT];
struct radv_address_binding_tracker *addr_binding_tracker;
};
VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
static inline struct radv_physical_device *
radv_device_physical(const struct radv_device *dev)
{
return (struct radv_physical_device *)dev->vk.physical;
}
static inline bool
radv_uses_primitives_generated_query(const struct radv_device *device)
{
return device->vk.enabled_features.primitivesGeneratedQuery ||
device->vk.enabled_features.primitivesGeneratedQueryWithRasterizerDiscard ||
device->vk.enabled_features.primitivesGeneratedQueryWithNonZeroStreams;
}
static inline bool
radv_uses_image_float32_atomics(const struct radv_device *device)
{
return device->vk.enabled_features.shaderImageFloat32Atomics ||
device->vk.enabled_features.sparseImageFloat32Atomics ||
device->vk.enabled_features.shaderImageFloat32AtomicMinMax ||
device->vk.enabled_features.sparseImageFloat32AtomicMinMax;
}
VkResult radv_device_init_vrs_state(struct radv_device *device);
unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs,
int nr_samples);
struct radv_color_buffer_info {
struct ac_cb_surface ac;
};
struct radv_ds_buffer_info {
struct ac_ds_surface ac;
uint32_t db_render_override2;
uint32_t db_render_control;
};
void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
struct radv_image_view *iview);
void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
struct radv_ds_buffer_info *ds);
void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
struct radv_image_view *iview, VkImageAspectFlags ds_aspects);
void radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples,
unsigned *db_render_control);
bool radv_device_set_pstate(struct radv_device *device, bool enable);
bool radv_device_acquire_performance_counters(struct radv_device *device);
void radv_device_release_performance_counters(struct radv_device *device);
bool radv_device_should_clear_vram(const struct radv_device *device);
#endif /* RADV_DEVICE_H */