mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 20:08:06 +02:00
ac,radeonsi: limit Smart Access Memory to Zen 3 and GFX10.3 due to perf issues
Many people experience performance degradation on some systems.
There will be a driconf option to enable SAM on other chips as well as
disable it on enabled systems.
Fixes: d3d6d38145 - ac: add radeon_info::all_vram_visible for Smart Access Memory
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3982
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8225>
This commit is contained in:
parent
e4fa7c440d
commit
b94626d3ee
6 changed files with 17 additions and 7 deletions
|
|
@ -29,6 +29,7 @@
|
|||
#include "drm-uapi/amdgpu_drm.h"
|
||||
#include "sid.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
@ -506,6 +507,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
/* Add some margin of error, though this shouldn't be needed in theory. */
|
||||
info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size;
|
||||
|
||||
util_cpu_detect();
|
||||
info->smart_access_memory = info->all_vram_visible &&
|
||||
info->chip_class >= GFX10_3 &&
|
||||
util_cpu_caps.family >= CPU_AMD_ZEN3 &&
|
||||
util_cpu_caps.family < CPU_AMD_LAST;
|
||||
|
||||
/* Set chip identification. */
|
||||
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
|
||||
info->pci_rev_id = amdinfo->pci_rev_id;
|
||||
|
|
@ -1048,6 +1055,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " address32_hi = %u\n", info->address32_hi);
|
||||
fprintf(f, " has_dedicated_vram = %u\n", info->has_dedicated_vram);
|
||||
fprintf(f, " all_vram_visible = %u\n", info->all_vram_visible);
|
||||
fprintf(f, " smart_access_memory = %u\n", info->smart_access_memory);
|
||||
fprintf(f, " num_sdp_interfaces = %u\n", info->num_sdp_interfaces);
|
||||
fprintf(f, " num_tcc_blocks = %i\n", info->num_tcc_blocks);
|
||||
fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ struct radeon_info {
|
|||
uint32_t address32_hi;
|
||||
bool has_dedicated_vram;
|
||||
bool all_vram_visible;
|
||||
bool smart_access_memory;
|
||||
bool has_l2_uncached;
|
||||
bool r600_has_virtual_memory;
|
||||
uint32_t num_sdp_interfaces;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
|
|||
switch (res->b.b.usage) {
|
||||
case PIPE_USAGE_STREAM:
|
||||
res->flags |= RADEON_FLAG_GTT_WC;
|
||||
if (sscreen->info.all_vram_visible)
|
||||
if (sscreen->info.smart_access_memory)
|
||||
res->domains = RADEON_DOMAIN_VRAM;
|
||||
else
|
||||
res->domains = RADEON_DOMAIN_GTT;
|
||||
|
|
@ -153,7 +153,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
|
|||
if (res->domains & RADEON_DOMAIN_VRAM) {
|
||||
res->vram_usage = size;
|
||||
|
||||
if (!sscreen->info.all_vram_visible) {
|
||||
if (!sscreen->info.smart_access_memory) {
|
||||
/* We don't want to evict buffers from VRAM by mapping them for CPU access,
|
||||
* because they might never be moved back again. If a buffer is large enough,
|
||||
* upload data by copying from a temporary GTT buffer. 8K might not seem much,
|
||||
|
|
|
|||
|
|
@ -493,15 +493,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
|
|||
goto fail;
|
||||
|
||||
/* Initialize public allocators. */
|
||||
bool all_vram_visible = sscreen->info.all_vram_visible;
|
||||
bool smart_access_memory = sscreen->info.smart_access_memory;
|
||||
sctx->b.stream_uploader =
|
||||
u_upload_create(&sctx->b, 1024 * 1024, 0,
|
||||
all_vram_visible ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STREAM,
|
||||
smart_access_memory ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STREAM,
|
||||
SI_RESOURCE_FLAG_32BIT); /* same flags as const_uploader */
|
||||
if (!sctx->b.stream_uploader)
|
||||
goto fail;
|
||||
|
||||
if (all_vram_visible) {
|
||||
if (smart_access_memory) {
|
||||
sctx->b.const_uploader = sctx->b.stream_uploader;
|
||||
} else {
|
||||
sctx->b.const_uploader =
|
||||
|
|
|
|||
|
|
@ -1810,7 +1810,8 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
|
|||
* is busy.
|
||||
*/
|
||||
if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
|
||||
(tex->buffer.domains & RADEON_DOMAIN_VRAM && !sctx->screen->info.all_vram_visible))
|
||||
(tex->buffer.domains & RADEON_DOMAIN_VRAM &&
|
||||
!sctx->screen->info.smart_access_memory))
|
||||
use_staging_texture = true;
|
||||
else if (usage & PIPE_MAP_READ)
|
||||
use_staging_texture =
|
||||
|
|
|
|||
|
|
@ -711,7 +711,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws,
|
|||
if (cs->ring_type == RING_GFX ||
|
||||
cs->ring_type == RING_COMPUTE ||
|
||||
cs->ring_type == RING_DMA) {
|
||||
domain = ws->info.all_vram_visible ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
|
||||
domain = ws->info.smart_access_memory ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
|
||||
flags |= RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC;
|
||||
} else {
|
||||
/* UVD/VCE */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue