mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 13:48:06 +02:00
radv/amdgpu: emulate sparse residency for the SMEM loads with NULL PRT workaround
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38698>
This commit is contained in:
parent
0be39ce4ad
commit
41fa965386
5 changed files with 126 additions and 4 deletions
|
|
@ -53,6 +53,7 @@ enum radeon_bo_flag { /* bitfield */
|
|||
RADEON_FLAG_VM_UPDATE_WAIT = (1 << 14),
|
||||
RADEON_FLAG_VM_PAD_1PAGE = (1 << 15),
|
||||
RADEON_FLAG_ENCRYPTED = (1 << 16),
|
||||
RADEON_FLAG_EMULATE_SPARSE_RESIDENCY = (1 << 17),
|
||||
};
|
||||
|
||||
enum radeon_ctx_priority {
|
||||
|
|
|
|||
|
|
@ -134,18 +134,74 @@ radv_amdgpu_log_va_op(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_b
|
|||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
radv_amdgpu_virtual_bo_get_low_addr(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
|
||||
{
|
||||
return bo->base.va & ~(1ull << ws->info.address_prt_wa_control_bit);
|
||||
}
|
||||
|
||||
static int
|
||||
radv_amdgpu_virtual_bo_bind_low_null_prt(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
|
||||
uint64_t bo_offset, uint64_t bo_size, uint32_t ops)
|
||||
{
|
||||
const uint64_t low_va = radv_amdgpu_virtual_bo_get_low_addr(ws, bo);
|
||||
uint64_t offset = 0;
|
||||
|
||||
assert(util_is_aligned(bo_offset, 4096) && util_is_aligned(bo_size, 4096));
|
||||
|
||||
while (bo_size > 0) {
|
||||
const uint64_t chunk_size = MIN2(bo_size, ws->null_prt_bug.bo->size);
|
||||
int r;
|
||||
|
||||
r = radv_amdgpu_bo_va_op(ws, radv_amdgpu_winsys_bo(ws->null_prt_bug.bo)->bo_handle, 0, chunk_size,
|
||||
low_va + bo_offset + offset, 0, 0, ops);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
offset += chunk_size;
|
||||
bo_size -= chunk_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
radv_amdgpu_virtual_bo_init_mapping(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, uint64_t size)
|
||||
{
|
||||
int r;
|
||||
|
||||
return radv_amdgpu_bo_va_op(ws, 0, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
|
||||
r = radv_amdgpu_bo_va_op(ws, 0, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (bo->emulate_sparse_residency) {
|
||||
/* Bind the "LOW" address space to the zero-initialized BO when it's allocated to emulate
|
||||
* residency.
|
||||
*/
|
||||
r = radv_amdgpu_virtual_bo_bind_low_null_prt(ws, bo, 0, size, AMDGPU_VA_OP_MAP);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
radv_amdgpu_virtual_bo_clear_mapping(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
|
||||
{
|
||||
const uint64_t va_size = radv_amdgpu_bo_va_size(bo->base.size, bo->flags);
|
||||
return radv_amdgpu_bo_va_op(ws, 0, 0, va_size, bo->base.va, 0, 0, AMDGPU_VA_OP_CLEAR);
|
||||
int r;
|
||||
|
||||
r = radv_amdgpu_bo_va_op(ws, 0, 0, va_size, bo->base.va, 0, 0, AMDGPU_VA_OP_CLEAR);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (bo->emulate_sparse_residency) {
|
||||
/* Clear the "LOW" address space mapping when it's released. */
|
||||
const uint64_t low_va = radv_amdgpu_virtual_bo_get_low_addr(ws, bo);
|
||||
|
||||
r = radv_amdgpu_bo_va_op(ws, 0, 0, va_size, low_va, 0, 0, AMDGPU_VA_OP_CLEAR);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -158,8 +214,16 @@ radv_amdgpu_virtual_bo_map(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_win
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
radv_amdgpu_log_va_op(ws, bo, bo_offset, size, parent->base.va + offset);
|
||||
if (parent->emulate_sparse_residency) {
|
||||
/* Bind the "LOW" address space to the same BO. */
|
||||
const uint64_t low_va = radv_amdgpu_virtual_bo_get_low_addr(ws, parent);
|
||||
|
||||
r = radv_amdgpu_bo_va_op(ws, bo->bo_handle, bo_offset, size, low_va + offset, 0, 0, AMDGPU_VA_OP_REPLACE);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
radv_amdgpu_log_va_op(ws, bo, bo_offset, size, parent->base.va + offset);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
@ -173,8 +237,16 @@ radv_amdgpu_virtual_bo_unmap(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_w
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
radv_amdgpu_log_va_op(ws, NULL, 0, size, parent->base.va + offset);
|
||||
if (parent->emulate_sparse_residency) {
|
||||
/* Re-bind the "LOW" address space to the zero-initialized BO when it's unmapped to emulate
|
||||
* residency.
|
||||
*/
|
||||
r = radv_amdgpu_virtual_bo_bind_low_null_prt(ws, parent, offset, size, AMDGPU_VA_OP_REPLACE);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
radv_amdgpu_log_va_op(ws, NULL, 0, size, parent->base.va + offset);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
@ -356,6 +428,9 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
|
|||
|
||||
assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
|
||||
|
||||
if (flags & RADEON_FLAG_EMULATE_SPARSE_RESIDENCY)
|
||||
replay_address &= ~(1ull << ws->info.address_prt_wa_control_bit);
|
||||
|
||||
const uint64_t va_size = radv_amdgpu_bo_va_size(size, flags);
|
||||
const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
|
||||
(flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
|
||||
|
|
@ -371,8 +446,12 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
|
|||
bo->base.va = va;
|
||||
bo->base.size = size;
|
||||
bo->va_handle = va_handle;
|
||||
bo->emulate_sparse_residency = !!(flags & RADEON_FLAG_EMULATE_SPARSE_RESIDENCY);
|
||||
bo->base.is_virtual = true;
|
||||
|
||||
if (bo->emulate_sparse_residency)
|
||||
bo->base.va |= 1ull << ws->info.address_prt_wa_control_bit;
|
||||
|
||||
/* Reserve a PRT VA region. */
|
||||
r = radv_amdgpu_virtual_bo_init_mapping(ws, bo, va_size);
|
||||
if (r) {
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ struct radv_amdgpu_winsys_bo {
|
|||
uint32_t bo_handle;
|
||||
|
||||
void *cpu_map;
|
||||
|
||||
bool emulate_sparse_residency;
|
||||
};
|
||||
|
||||
static inline struct radv_amdgpu_winsys_bo *
|
||||
|
|
|
|||
|
|
@ -113,6 +113,34 @@ radv_amdgpu_winsys_query_gpuvm_fault(struct radeon_winsys *rws, struct radv_wins
|
|||
static simple_mtx_t winsys_creation_mutex = SIMPLE_MTX_INITIALIZER;
|
||||
static struct hash_table *winsyses = NULL;
|
||||
|
||||
static VkResult
|
||||
radv_amdgpu_null_prt_bug_init(struct radeon_winsys *rws)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
|
||||
|
||||
if (!ws->info.compiler_info.has_smem_with_null_prt_bug)
|
||||
return VK_SUCCESS;
|
||||
|
||||
/* Create a zero-allocated 8MiB BO that will be used to map partially resident sparse buffers at
|
||||
* creation or when explicitly unmapped.
|
||||
*/
|
||||
return ws->base.buffer_create(&ws->base, 8 * 1024 * 1024 /* 8MiB */, 4096, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_READ_ONLY |
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO,
|
||||
RADV_BO_PRIORITY_VIRTUAL, 0, &ws->null_prt_bug.bo);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_null_prt_bug_finish(struct radeon_winsys *rws)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
|
||||
|
||||
if (!ws->info.compiler_info.has_smem_with_null_prt_bug)
|
||||
return;
|
||||
|
||||
ws->base.buffer_destroy(&ws->base, ws->null_prt_bug.bo);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
|
||||
{
|
||||
|
|
@ -146,6 +174,9 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
|
|||
fclose(ws->bo_history_logfile);
|
||||
|
||||
u_rwlock_destroy(&ws->log_bo_list_lock);
|
||||
|
||||
radv_amdgpu_null_prt_bug_finish(rws);
|
||||
|
||||
ac_drm_device_deinitialize(ws->dev);
|
||||
FREE(rws);
|
||||
}
|
||||
|
|
@ -346,6 +377,10 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
|
|||
radv_amdgpu_bo_init_functions(ws);
|
||||
radv_amdgpu_cs_init_functions(ws);
|
||||
|
||||
result = radv_amdgpu_null_prt_bug_init(&ws->base);
|
||||
if (result != VK_SUCCESS)
|
||||
goto winsys_fail;
|
||||
|
||||
_mesa_hash_table_insert(winsyses, (void *)ac_drm_device_get_cookie(dev), ws);
|
||||
simple_mtx_unlock(&winsys_creation_mutex);
|
||||
|
||||
|
|
|
|||
|
|
@ -63,6 +63,11 @@ struct radv_amdgpu_winsys {
|
|||
uint64_t vm_timeline_seq_num;
|
||||
|
||||
uint32_t refcount;
|
||||
|
||||
struct {
|
||||
/* A zero-allocated BO used to map the LOW address space of virtual allocations. */
|
||||
struct radeon_winsys_bo *bo;
|
||||
} null_prt_bug;
|
||||
};
|
||||
|
||||
static inline struct radv_amdgpu_winsys *
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue