ac,radeonsi: limit Smart Access Memory to Zen 3 and GFX10.3 due to perf issues

Many people experience performance degradation on some systems.
There will be a driconf option to enable SAM on other chips as well as
disable it on enabled systems.

Fixes: d3d6d38145 - ac: add radeon_info::all_vram_visible for Smart Access Memory
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3982

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8225>
This commit is contained in:
Marek Olšák 2020-12-24 06:14:11 -05:00 committed by Marge Bot
parent e4fa7c440d
commit b94626d3ee
6 changed files with 17 additions and 7 deletions

View file

@ -29,6 +29,7 @@
#include "drm-uapi/amdgpu_drm.h"
#include "sid.h"
#include "util/macros.h"
#include "util/u_cpu_detect.h"
#include "util/u_math.h"
#include <stdio.h>
@ -506,6 +507,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
/* Add some margin of error, though this shouldn't be needed in theory. */
info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size;
util_cpu_detect();
info->smart_access_memory = info->all_vram_visible &&
info->chip_class >= GFX10_3 &&
util_cpu_caps.family >= CPU_AMD_ZEN3 &&
util_cpu_caps.family < CPU_AMD_LAST;
/* Set chip identification. */
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
info->pci_rev_id = amdinfo->pci_rev_id;
@ -1048,6 +1055,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " address32_hi = %u\n", info->address32_hi);
fprintf(f, " has_dedicated_vram = %u\n", info->has_dedicated_vram);
fprintf(f, " all_vram_visible = %u\n", info->all_vram_visible);
fprintf(f, " smart_access_memory = %u\n", info->smart_access_memory);
fprintf(f, " num_sdp_interfaces = %u\n", info->num_sdp_interfaces);
fprintf(f, " num_tcc_blocks = %i\n", info->num_tcc_blocks);
fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size);

View file

@ -99,6 +99,7 @@ struct radeon_info {
uint32_t address32_hi;
bool has_dedicated_vram;
bool all_vram_visible;
bool smart_access_memory;
bool has_l2_uncached;
bool r600_has_virtual_memory;
uint32_t num_sdp_interfaces;

View file

@ -56,7 +56,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
switch (res->b.b.usage) {
case PIPE_USAGE_STREAM:
res->flags |= RADEON_FLAG_GTT_WC;
if (sscreen->info.all_vram_visible)
if (sscreen->info.smart_access_memory)
res->domains = RADEON_DOMAIN_VRAM;
else
res->domains = RADEON_DOMAIN_GTT;
@ -153,7 +153,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage = size;
if (!sscreen->info.all_vram_visible) {
if (!sscreen->info.smart_access_memory) {
/* We don't want to evict buffers from VRAM by mapping them for CPU access,
* because they might never be moved back again. If a buffer is large enough,
* upload data by copying from a temporary GTT buffer. 8K might not seem much,

View file

@ -493,15 +493,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
goto fail;
/* Initialize public allocators. */
bool all_vram_visible = sscreen->info.all_vram_visible;
bool smart_access_memory = sscreen->info.smart_access_memory;
sctx->b.stream_uploader =
u_upload_create(&sctx->b, 1024 * 1024, 0,
all_vram_visible ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STREAM,
smart_access_memory ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STREAM,
SI_RESOURCE_FLAG_32BIT); /* same flags as const_uploader */
if (!sctx->b.stream_uploader)
goto fail;
if (all_vram_visible) {
if (smart_access_memory) {
sctx->b.const_uploader = sctx->b.stream_uploader;
} else {
sctx->b.const_uploader =

View file

@ -1810,7 +1810,8 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
* is busy.
*/
if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
(tex->buffer.domains & RADEON_DOMAIN_VRAM && !sctx->screen->info.all_vram_visible))
(tex->buffer.domains & RADEON_DOMAIN_VRAM &&
!sctx->screen->info.smart_access_memory))
use_staging_texture = true;
else if (usage & PIPE_MAP_READ)
use_staging_texture =

View file

@ -711,7 +711,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws,
if (cs->ring_type == RING_GFX ||
cs->ring_type == RING_COMPUTE ||
cs->ring_type == RING_DMA) {
domain = ws->info.all_vram_visible ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
domain = ws->info.smart_access_memory ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
flags |= RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC;
} else {
/* UVD/VCE */