diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index c958ab8acd5..98925a33c6a 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -145,12 +145,9 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, res->flags |= RADEON_FLAG_UNCACHED; /* Set expected VRAM and GART usage for the buffer. */ - res->vram_usage_kb = 0; - res->gart_usage_kb = 0; + res->memory_usage_kb = MAX2(1, size / 1024); if (res->domains & RADEON_DOMAIN_VRAM) { - res->vram_usage_kb = MAX2(1, size / 1024); - /* We don't want to evict buffers from VRAM by mapping them for CPU access, * because they might never be moved back again. If a buffer is large enough, * upload data by copying from a temporary GTT buffer. 8K might not seem much, @@ -162,8 +159,6 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, sscreen->info.has_dedicated_vram && size >= 8196) res->b.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY; - } else if (res->domains & RADEON_DOMAIN_GTT) { - res->gart_usage_kb = MAX2(1, size / 1024); } } @@ -292,8 +287,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *d sdst->b.b.bind = ssrc->b.b.bind; sdst->flags = ssrc->flags; - assert(sdst->vram_usage_kb == ssrc->vram_usage_kb); - assert(sdst->gart_usage_kb == ssrc->gart_usage_kb); + assert(sdst->memory_usage_kb == ssrc->memory_usage_kb); assert(sdst->bo_size == ssrc->bo_size); assert(sdst->bo_alignment_log2 == ssrc->bo_alignment_log2); assert(sdst->domains == ssrc->domains); @@ -655,8 +649,7 @@ static struct pipe_resource *si_buffer_from_user_memory(struct pipe_screen *scre } buf->gpu_address = ws->buffer_get_virtual_address(buf->buf); - buf->vram_usage_kb = 0; - buf->gart_usage_kb = templ->width0 / 1024; + buf->memory_usage_kb = templ->width0 / 1024; buf->b.buffer_id_unique = util_idalloc_mt_alloc(&sscreen->buffer_ids); return &buf->b.b; } @@ -678,10 +671,7 @@ struct pipe_resource *si_buffer_from_winsys_buffer(struct pipe_screen *screen, res->bo_alignment_log2 = imported_buf->alignment_log2; res->domains = sscreen->ws->buffer_get_initial_domain(res->buf); - if (res->domains & RADEON_DOMAIN_VRAM) - res->vram_usage_kb = MAX2(1, res->bo_size / 1024); - else if (res->domains & RADEON_DOMAIN_GTT) - res->gart_usage_kb = MAX2(1, res->bo_size / 1024); + res->memory_usage_kb = MAX2(1, res->bo_size / 1024); if (sscreen->ws->buffer_get_flags) res->flags = sscreen->ws->buffer_get_flags(res->buf); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 1e001989c02..f91a6445474 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -37,17 +37,15 @@ void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) struct radeon_cmdbuf *cs = &ctx->gfx_cs; /* There are two memory usage counters in the winsys for all buffers - * that have been added (cs_add_buffer) and two counters in the pipe + * that have been added (cs_add_buffer) and one counter in the pipe * driver for those that haven't been added yet. */ - if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->vram_kb, ctx->gtt_kb))) { - ctx->gtt_kb = 0; - ctx->vram_kb = 0; + if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->memory_usage_kb))) { + ctx->memory_usage_kb = 0; si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return; } - ctx->gtt_kb = 0; - ctx->vram_kb = 0; + ctx->memory_usage_kb = 0; unsigned need_dwords = si_get_minimum_num_gfx_cs_dwords(ctx, num_draws); if (!ctx->ws->cs_check_space(cs, need_dwords, false)) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 14930725931..24c4ff09dc8 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1164,6 +1164,8 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); + sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3; + unsigned prim_discard_vertex_count_threshold, tmp; si_initialize_prim_discard_tunables(sscreen, false, &prim_discard_vertex_count_threshold, &tmp); /* Compute-shader-based culling doesn't support VBOs in user SGPRs. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c7d8de417f6..49634e9e723 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -292,8 +292,7 @@ struct si_resource { struct pb_buffer *buf; uint64_t gpu_address; /* Memory usage if the buffer placement is optimal. */ - uint32_t vram_usage_kb; - uint32_t gart_usage_kb; + uint32_t memory_usage_kb; /* Resource properties. */ uint64_t bo_size; @@ -527,6 +526,7 @@ struct si_screen { uint32_t *fmask_state); unsigned num_vbos_in_user_sgprs; + unsigned max_memory_usage_kb; unsigned pa_sc_raster_config; unsigned pa_sc_raster_config_1; unsigned se_tile_repeat; @@ -988,8 +988,7 @@ struct si_context { unsigned last_num_draw_calls; unsigned flags; /* flush flags */ /* Current unaccounted memory usage. */ - uint32_t vram_kb; - uint32_t gtt_kb; + uint32_t memory_usage_kb; /* NGG streamout. */ struct pb_buffer *gds; @@ -1696,8 +1695,7 @@ static inline void si_context_add_resource_size(struct si_context *sctx, struct { if (r) { /* Add memory usage for need_gfx_cs_space */ - sctx->vram_kb += si_resource(r)->vram_usage_kb; - sctx->gtt_kb += si_resource(r)->gart_usage_kb; + sctx->memory_usage_kb += si_resource(r)->memory_usage_kb; } } @@ -1926,17 +1924,9 @@ static inline bool util_rast_prim_is_triangles(unsigned prim) * \param gtt GTT memory size not added to the buffer list yet */ static inline bool radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_cmdbuf *cs, - uint32_t vram_kb, uint32_t gtt_kb) + uint32_t kb) { - vram_kb += cs->used_vram_kb; - gtt_kb += cs->used_gart_kb; - - /* Anything that goes above the VRAM size should go to GTT. */ - if (vram_kb > screen->info.vram_size_kb) - gtt_kb += vram_kb - screen->info.vram_size_kb; - - /* Now we just need to check if we have enough GTT (the limit is 75% of max). */ - return gtt_kb < screen->info.gart_size_kb / 4 * 3; + return kb + cs->used_vram_kb + cs->used_gart_kb < screen->max_memory_usage_kb; } /** @@ -1980,8 +1970,7 @@ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sc bool check_mem) { if (check_mem && - !radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram_kb + bo->vram_usage_kb, - sctx->gtt_kb + bo->gart_usage_kb)) + !radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->memory_usage_kb + bo->memory_usage_kb)) si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage, priority); diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index cf41f4842eb..054d5056d0e 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -447,8 +447,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex tex->buffer.b.b.bind = templ.bind; radeon_bo_reference(sctx->screen->ws, &tex->buffer.buf, new_tex->buffer.buf); tex->buffer.gpu_address = new_tex->buffer.gpu_address; - tex->buffer.vram_usage_kb = new_tex->buffer.vram_usage_kb; - tex->buffer.gart_usage_kb = new_tex->buffer.gart_usage_kb; + tex->buffer.memory_usage_kb = new_tex->buffer.memory_usage_kb; tex->buffer.bo_size = new_tex->buffer.bo_size; tex->buffer.bo_alignment_log2 = new_tex->buffer.bo_alignment_log2; tex->buffer.domains = new_tex->buffer.domains; @@ -984,8 +983,7 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->bo_alignment_log2 = plane0->buffer.bo_alignment_log2; resource->flags = plane0->buffer.flags; resource->domains = plane0->buffer.domains; - resource->vram_usage_kb = plane0->buffer.vram_usage_kb; - resource->gart_usage_kb = plane0->buffer.gart_usage_kb; + resource->memory_usage_kb = plane0->buffer.memory_usage_kb; radeon_bo_reference(sscreen->ws, &resource->buf, plane0->buffer.buf); resource->gpu_address = plane0->buffer.gpu_address; @@ -1001,10 +999,7 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->bo_size = imported_buf->size; resource->bo_alignment_log2 = imported_buf->alignment_log2; resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf); - if (resource->domains & RADEON_DOMAIN_VRAM) - resource->vram_usage_kb = MAX2(1, resource->bo_size / 1024); - else if (resource->domains & RADEON_DOMAIN_GTT) - resource->gart_usage_kb = MAX2(1, resource->bo_size / 1024); + resource->memory_usage_kb = MAX2(1, resource->bo_size / 1024); if (sscreen->ws->buffer_get_flags) resource->flags = sscreen->ws->buffer_get_flags(resource->buf); }