radeonsi: use the new flag AMDGPU_GEM_CREATE_DISCARDABLE

It forces the best placement (usually VRAM) and evictions discard the contents
instead of copying.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466>
This commit is contained in:
Marek Olšák 2022-05-10 16:57:08 -04:00
parent 8a04a0c95b
commit e9e9086b66
9 changed files with 38 additions and 11 deletions

View file

@ -147,6 +147,10 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
res->b.b.flags & SI_RESOURCE_FLAG_GL2_BYPASS)
res->flags |= RADEON_FLAG_GL2_BYPASS;
if (res->b.b.flags & SI_RESOURCE_FLAG_DISCARDABLE &&
sscreen->info.drm_major == 3 && sscreen->info.drm_minor >= 47)
res->flags |= RADEON_FLAG_DISCARDABLE;
/* Set expected VRAM and GART usage for the buffer. */
res->memory_usage_kb = MAX2(1, size / 1024);

View file

@ -481,7 +481,8 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
sctx->compute_scratch_buffer =
si_aligned_buffer_create(&sctx->screen->b,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
scratch_needed, sctx->screen->info.pte_fragment_size);

View file

@ -260,7 +260,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns
if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) {
si_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT, scratch_size, 256);
if (!sctx->scratch_buffer)
return;

View file

@ -1356,7 +1356,8 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
/* TODO: remove the overallocation */
attr_ring_size * 16, 2 * 1024 * 1024);

View file

@ -146,6 +146,8 @@ extern "C" {
#define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \
(((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3)
#define SI_RESOURCE_FLAG_GL2_BYPASS (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)
/* Discard instead of evict. */
#define SI_RESOURCE_FLAG_DISCARDABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 13)
enum si_has_gs {
GS_OFF,

View file

@ -3754,7 +3754,8 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
pipe_resource_reference(&sctx->esgs_ring, NULL);
sctx->esgs_ring =
pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
esgs_ring_size, sctx->screen->info.pte_fragment_size);
if (!sctx->esgs_ring)
@ -3765,7 +3766,8 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
pipe_resource_reference(&sctx->gsvs_ring, NULL);
sctx->gsvs_ring =
pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
gsvs_ring_size, sctx->screen->info.pte_fragment_size);
if (!sctx->gsvs_ring)
@ -3987,7 +3989,8 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
sctx->scratch_buffer = si_aligned_buffer_create(
&sctx->screen->b,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT, scratch_needed_size,
sctx->screen->info.pte_fragment_size);
if (!sctx->scratch_buffer)
@ -4017,7 +4020,8 @@ void si_init_tess_factor_ring(struct si_context *sctx)
sctx->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sctx->screen->hs.tess_offchip_ring_size +
sctx->screen->hs.tess_factor_ring_size,
@ -4030,7 +4034,8 @@ void si_init_tess_factor_ring(struct si_context *sctx)
PIPE_RESOURCE_FLAG_UNMAPPABLE |
PIPE_RESOURCE_FLAG_ENCRYPTED |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sctx->screen->hs.tess_offchip_ring_size +
sctx->screen->hs.tess_factor_ring_size,

View file

@ -76,6 +76,10 @@ enum radeon_bo_flag
RADEON_FLAG_ENCRYPTED = (1 << 7),
RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */
RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
/* Discard on eviction (instead of moving the buffer to GTT).
* This guarantees that this buffer will never be moved to GTT.
*/
RADEON_FLAG_DISCARDABLE = (1 << 10),
};
enum radeon_map_flags
@ -811,7 +815,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
/* These are unsupported flags. */
/* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE))
if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE |
RADEON_FLAG_DISCARDABLE))
return -1;
int heap = 0;

View file

@ -527,9 +527,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
if (flags & RADEON_FLAG_DISCARDABLE &&
ws->info.drm_minor >= 47)
request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
if (ws->zero_all_vram_allocs &&
(request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
if ((flags & RADEON_FLAG_ENCRYPTED) &&
ws->info.has_tmz_support) {
request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
@ -1405,7 +1411,8 @@ no_slab:
alignment = align(alignment, ws->info.gart_page_size);
}
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
!(flags & RADEON_FLAG_DISCARDABLE);
if (use_reusable_pool) {
/* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */

View file

@ -1053,7 +1053,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
size = align(size, ws->info.gart_page_size);
alignment = align(alignment, ws->info.gart_page_size);
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
!(flags & RADEON_FLAG_DISCARDABLE);
/* Shared resources don't use cached heaps. */
if (use_reusable_pool) {