mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 04:10:09 +01:00
gallium/u_threaded: don't map big VRAM buffers for the first upload directly
This improves Paraview "many spheres" performance 4x along with the radeonsi commit. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
a5d3999c31
commit
4b0dc098b2
3 changed files with 28 additions and 2 deletions
|
|
@ -1284,6 +1284,20 @@ tc_improve_map_buffer_flags(struct threaded_context *tc,
|
|||
if (usage & tc_flags)
|
||||
return usage;
|
||||
|
||||
/* Use the staging upload if it's preferred. */
|
||||
if (usage & (PIPE_TRANSFER_DISCARD_RANGE |
|
||||
PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
|
||||
!(usage & PIPE_TRANSFER_PERSISTENT) &&
|
||||
/* Try not to decrement the counter if it's not positive. Still racy,
|
||||
* but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
|
||||
tres->max_forced_staging_uploads > 0 &&
|
||||
p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) {
|
||||
usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||
|
||||
return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE;
|
||||
}
|
||||
|
||||
/* Sparse buffers can't be mapped directly and can't be reallocated
|
||||
* (fully invalidated). That may just be a radeonsi limitation, but
|
||||
* the threaded context must obey it with radeonsi.
|
||||
|
|
|
|||
|
|
@ -241,6 +241,12 @@ struct threaded_resource {
|
|||
* pointers. */
|
||||
bool is_shared;
|
||||
bool is_user_ptr;
|
||||
|
||||
/* If positive, prefer DISCARD_RANGE with a staging buffer over any other
|
||||
* method of CPU access when map flags allow it. Useful for buffers that
|
||||
* are too large for the visible VRAM window.
|
||||
*/
|
||||
int max_forced_staging_uploads;
|
||||
};
|
||||
|
||||
struct threaded_transfer {
|
||||
|
|
|
|||
|
|
@ -191,10 +191,15 @@ void si_init_resource_fields(struct r600_common_screen *rscreen,
|
|||
res->vram_usage = 0;
|
||||
res->gart_usage = 0;
|
||||
|
||||
if (res->domains & RADEON_DOMAIN_VRAM)
|
||||
if (res->domains & RADEON_DOMAIN_VRAM) {
|
||||
res->vram_usage = size;
|
||||
else if (res->domains & RADEON_DOMAIN_GTT)
|
||||
|
||||
res->b.max_forced_staging_uploads =
|
||||
rscreen->info.has_dedicated_vram &&
|
||||
size >= rscreen->info.vram_vis_size / 4 ? 1 : 0;
|
||||
} else if (res->domains & RADEON_DOMAIN_GTT) {
|
||||
res->gart_usage = size;
|
||||
}
|
||||
}
|
||||
|
||||
bool si_alloc_resource(struct r600_common_screen *rscreen,
|
||||
|
|
@ -289,6 +294,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
|
|||
pb_reference(&rdst->buf, rsrc->buf);
|
||||
rdst->gpu_address = rsrc->gpu_address;
|
||||
rdst->b.b.bind = rsrc->b.b.bind;
|
||||
rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads;
|
||||
rdst->flags = rsrc->flags;
|
||||
|
||||
assert(rdst->vram_usage == rsrc->vram_usage);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue