gallium/radeon: don't use the DMA ring for pipelined buffer uploads

Submitting a DMA IB flushes the GFX IB and all GPU caches.

Vedran Miletić said:
  "On Tonga 380X, this improves The Talos Principle from 8.3 fps to 28.3 fps
   (all graphics settings Ultra, 4xAA, 1080p resolution with downsampling
   from 1200p)."

Some anonymous dude said:
   R9 390 results:
      Tomb Raider (normal settings): 80 -> 88 FPS
      Talos Principle (custom settings): 23 -> 56 FPS
      Metro Last Light Redux (default benchmark settings): 39 -> 40 FPS

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Vedran Miletić <vedran@miletic.net>
Tested-by: Grazvydas Ignotas <notasas@gmail.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
This commit is contained in:
Marek Olšák 2016-05-26 18:20:42 +02:00
parent 9c35ec2042
commit d5491a81ff

View file

@ -368,9 +368,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
rctx->dma_copy(ctx, &staging->b.b, 0,
box->x % R600_MAP_BUFFER_ALIGNMENT,
0, 0, resource, level, box);
ctx->resource_copy_region(ctx, &staging->b.b, 0,
box->x % R600_MAP_BUFFER_ALIGNMENT,
0, 0, resource, level, box);
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
if (!data) {
@ -398,7 +398,6 @@ static void r600_buffer_do_flush_region(struct pipe_context *ctx,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_resource *rbuffer = r600_resource(transfer->resource);
@ -414,7 +413,7 @@ static void r600_buffer_do_flush_region(struct pipe_context *ctx,
u_box_1d(soffset, box->width, &dma_box);
/* Copy the staging buffer into the original one. */
rctx->dma_copy(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
}
util_range_add(&rbuffer->valid_buffer_range, box->x,