mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
radeonsi: upload constants into VRAM instead of GTT
This lowers lgkm wait cycles by 30% on VI and normal conditions. The might be a measurable improvement when CE is disabled (radeon) or under L2 thrashing. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
a550fbb510
commit
ac6007460a
4 changed files with 18 additions and 10 deletions
|
|
@ -607,7 +607,11 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
|||
0, PIPE_USAGE_STREAM);
|
||||
if (!rctx->b.stream_uploader)
|
||||
return false;
|
||||
rctx->b.const_uploader = rctx->b.stream_uploader;
|
||||
|
||||
rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
|
||||
0, PIPE_USAGE_DEFAULT);
|
||||
if (!rctx->b.const_uploader)
|
||||
return false;
|
||||
|
||||
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
|
||||
if (!rctx->ctx)
|
||||
|
|
@ -649,9 +653,10 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
|
|||
if (rctx->ctx)
|
||||
rctx->ws->ctx_destroy(rctx->ctx);
|
||||
|
||||
if (rctx->b.stream_uploader) {
|
||||
if (rctx->b.stream_uploader)
|
||||
u_upload_destroy(rctx->b.stream_uploader);
|
||||
}
|
||||
if (rctx->b.const_uploader)
|
||||
u_upload_destroy(rctx->b.const_uploader);
|
||||
|
||||
slab_destroy_child(&rctx->pool_transfers);
|
||||
|
||||
|
|
|
|||
|
|
@ -503,7 +503,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx,
|
|||
|
||||
dispatch.kernarg_address = kernel_args_va;
|
||||
|
||||
u_upload_data(sctx->b.b.stream_uploader, 0, sizeof(dispatch),
|
||||
u_upload_data(sctx->b.b.const_uploader, 0, sizeof(dispatch),
|
||||
256, &dispatch, &dispatch_offset,
|
||||
(struct pipe_resource**)&dispatch_buf);
|
||||
|
||||
|
|
@ -565,7 +565,7 @@ static void si_upload_compute_input(struct si_context *sctx,
|
|||
/* The extra num_work_size_bytes are for work group / work item size information */
|
||||
kernel_args_size = program->input_size + num_work_size_bytes;
|
||||
|
||||
u_upload_alloc(sctx->b.b.stream_uploader, 0, kernel_args_size,
|
||||
u_upload_alloc(sctx->b.b.const_uploader, 0, kernel_args_size,
|
||||
sctx->screen->b.info.tcc_cache_line_size,
|
||||
&kernel_args_offset,
|
||||
(struct pipe_resource**)&input_buffer, &kernel_args_ptr);
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
|
|||
} else {
|
||||
void *ptr;
|
||||
|
||||
u_upload_alloc(sctx->b.b.stream_uploader, 0, list_size,
|
||||
u_upload_alloc(sctx->b.b.const_uploader, 0, list_size,
|
||||
sctx->screen->b.info.tcc_cache_line_size,
|
||||
&desc->buffer_offset,
|
||||
(struct pipe_resource**)&desc->buffer, &ptr);
|
||||
|
|
@ -963,7 +963,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
|||
* directly through a staging buffer and don't go through
|
||||
* the fine-grained upload path.
|
||||
*/
|
||||
u_upload_alloc(sctx->b.b.stream_uploader, 0,
|
||||
u_upload_alloc(sctx->b.b.const_uploader, 0,
|
||||
desc_list_byte_size,
|
||||
si_optimal_tcc_alignment(sctx, desc_list_byte_size),
|
||||
&desc->buffer_offset,
|
||||
|
|
@ -1051,7 +1051,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
|
|||
{
|
||||
void *tmp;
|
||||
|
||||
u_upload_alloc(sctx->b.b.stream_uploader, 0, size,
|
||||
u_upload_alloc(sctx->b.b.const_uploader, 0, size,
|
||||
si_optimal_tcc_alignment(sctx, size),
|
||||
const_offset,
|
||||
(struct pipe_resource**)rbuffer, &tmp);
|
||||
|
|
|
|||
|
|
@ -3526,8 +3526,11 @@ static void si_set_vertex_buffers(struct pipe_context *ctx,
|
|||
assert(src->stride == 0);
|
||||
|
||||
/* Assume the attrib has 4 dwords like the vbo
|
||||
* module. This is also a good upper bound. */
|
||||
u_upload_data(sctx->b.b.stream_uploader, 0, 16, 16,
|
||||
* module. This is also a good upper bound.
|
||||
*
|
||||
* Use const_uploader to upload into VRAM directly.
|
||||
*/
|
||||
u_upload_data(sctx->b.b.const_uploader, 0, 16, 16,
|
||||
src->user_buffer,
|
||||
&dsti->buffer_offset,
|
||||
&dsti->buffer);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue