mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 21:30:09 +01:00
winsys/amdgpu: add RADEON_FLAG_UNCACHED for faster blits over PCIe
Small blits benefit more. Good access pattern is required. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4935>
This commit is contained in:
parent
cbbc18bc67
commit
36c0124804
2 changed files with 79 additions and 27 deletions
|
|
@ -67,7 +67,8 @@ enum radeon_bo_flag
|
|||
RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
|
||||
RADEON_FLAG_READ_ONLY = (1 << 5),
|
||||
RADEON_FLAG_32BIT = (1 << 6),
|
||||
RADEON_FLAG_ENCRYPTED = (1 << 7)
|
||||
RADEON_FLAG_ENCRYPTED = (1 << 7),
|
||||
RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */
|
||||
};
|
||||
|
||||
enum radeon_dependency_flag
|
||||
|
|
@ -712,6 +713,11 @@ enum radeon_heap
|
|||
RADEON_HEAP_GTT_WC_READ_ONLY_32BIT,
|
||||
RADEON_HEAP_GTT_WC_32BIT,
|
||||
RADEON_HEAP_GTT,
|
||||
RADEON_HEAP_GTT_UNCACHED_WC,
|
||||
RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY,
|
||||
RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT,
|
||||
RADEON_HEAP_GTT_UNCACHED_WC_32BIT,
|
||||
RADEON_HEAP_GTT_UNCACHED,
|
||||
RADEON_MAX_SLAB_HEAPS,
|
||||
RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
|
||||
};
|
||||
|
|
@ -730,6 +736,11 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
|
|||
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_WC_32BIT:
|
||||
case RADEON_HEAP_GTT:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED:
|
||||
return RADEON_DOMAIN_GTT;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
@ -739,36 +750,69 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
|
|||
|
||||
static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
|
||||
{
|
||||
unsigned flags =
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING | (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0);
|
||||
unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
|
||||
|
||||
switch (heap) {
|
||||
case RADEON_HEAP_GTT:
|
||||
case RADEON_HEAP_GTT_UNCACHED:
|
||||
break;
|
||||
default:
|
||||
flags |= RADEON_FLAG_GTT_WC;
|
||||
}
|
||||
|
||||
switch (heap) {
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED:
|
||||
flags |= RADEON_FLAG_UNCACHED;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (heap) {
|
||||
case RADEON_HEAP_VRAM_READ_ONLY:
|
||||
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_WC_READ_ONLY:
|
||||
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
|
||||
flags |= RADEON_FLAG_READ_ONLY;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (heap) {
|
||||
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_VRAM_32BIT:
|
||||
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_WC_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
|
||||
flags |= RADEON_FLAG_32BIT;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (heap) {
|
||||
case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
|
||||
return flags | RADEON_FLAG_NO_CPU_ACCESS;
|
||||
|
||||
case RADEON_HEAP_VRAM_READ_ONLY:
|
||||
case RADEON_HEAP_GTT_WC_READ_ONLY:
|
||||
return flags | RADEON_FLAG_READ_ONLY;
|
||||
|
||||
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
|
||||
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
|
||||
return flags | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT;
|
||||
|
||||
case RADEON_HEAP_VRAM_32BIT:
|
||||
case RADEON_HEAP_GTT_WC_32BIT:
|
||||
return flags | RADEON_FLAG_32BIT;
|
||||
|
||||
case RADEON_HEAP_VRAM:
|
||||
case RADEON_HEAP_GTT_WC:
|
||||
case RADEON_HEAP_GTT:
|
||||
flags |= RADEON_FLAG_NO_CPU_ACCESS;
|
||||
break;
|
||||
default:
|
||||
return flags;
|
||||
break;
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
/* Return the heap index for winsys allocators, or -1 on failure. */
|
||||
static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
|
||||
{
|
||||
bool uncached;
|
||||
|
||||
/* VRAM implies WC (write combining) */
|
||||
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
|
||||
/* NO_CPU_ACCESS implies VRAM only. */
|
||||
|
|
@ -779,7 +823,7 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
|
|||
return -1;
|
||||
|
||||
/* Unsupported flags: NO_SUBALLOC, SPARSE. */
|
||||
if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS |
|
||||
if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED |
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT))
|
||||
return -1;
|
||||
|
||||
|
|
@ -806,15 +850,20 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
|
|||
}
|
||||
break;
|
||||
case RADEON_DOMAIN_GTT:
|
||||
uncached = flags & RADEON_FLAG_UNCACHED;
|
||||
|
||||
switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) {
|
||||
case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
|
||||
return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
|
||||
return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT
|
||||
: RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
|
||||
case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY:
|
||||
return RADEON_HEAP_GTT_WC_READ_ONLY;
|
||||
return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY
|
||||
: RADEON_HEAP_GTT_WC_READ_ONLY;
|
||||
case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT:
|
||||
return RADEON_HEAP_GTT_WC_32BIT;
|
||||
return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_32BIT
|
||||
: RADEON_HEAP_GTT_WC_32BIT;
|
||||
case RADEON_FLAG_GTT_WC:
|
||||
return RADEON_HEAP_GTT_WC;
|
||||
return uncached ? RADEON_HEAP_GTT_UNCACHED_WC : RADEON_HEAP_GTT_WC;
|
||||
case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
|
||||
case RADEON_FLAG_READ_ONLY:
|
||||
assert(!"READ_ONLY without WC is disallowed");
|
||||
|
|
@ -823,7 +872,7 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
|
|||
assert(!"32BIT without WC is disallowed");
|
||||
return -1;
|
||||
case 0:
|
||||
return RADEON_HEAP_GTT;
|
||||
return uncached ? RADEON_HEAP_GTT_UNCACHED : RADEON_HEAP_GTT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -554,6 +554,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
|
|||
if (!(flags & RADEON_FLAG_READ_ONLY))
|
||||
vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
|
||||
|
||||
if (flags & RADEON_FLAG_UNCACHED)
|
||||
vm_flags |= AMDGPU_VM_MTYPE_UC;
|
||||
|
||||
r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
|
||||
AMDGPU_VA_OP_MAP);
|
||||
if (r)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue