radv/gfx10: use L2 for DMA copy/fill operations

It's coherent and faster. GFX7-GFX9 should also support this but
for now only uses L2 for GFX10 because it's untested on previous gens.

This fixes dEQP-VK.memory.pipeline_barrier.transfer_*

This also fixes some missing geometry in Dawn Of War III because
VBOs weren't updated correctly.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Samuel Pitoiset 2019-07-25 15:38:51 +02:00
parent 9ce75826cb
commit 6a504ab473

View file

@ -1501,6 +1501,14 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
unsigned dma_flags = 0;
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
/* DMA operations via L2 are coherent and faster.
* TODO: GFX7-GFX9 should also support this but it
* requires tests/benchmarks.
*/
dma_flags |= CP_DMA_USE_L2;
}
si_cp_dma_prepare(cmd_buffer, byte_count,
size + skipped_size + realign_size,
&dma_flags);
@ -1545,6 +1553,14 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
unsigned dma_flags = CP_DMA_CLEAR;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
/* DMA operations via L2 are coherent and faster.
* TODO: GFX7-GFX9 should also support this but it
* requires tests/benchmarks.
*/
dma_flags |= CP_DMA_USE_L2;
}
si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
/* Emit the clear packet. */