radeonsi: tweak si_test_dma_perf for better experience

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31168>
This commit is contained in:
Marek Olšák 2024-08-22 06:38:01 -04:00
parent 05353cfd4f
commit 434eddd422

View file

@ -187,7 +187,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
/* Don't test large sizes with GTT because it's slow. */
if ((dst_usage == PIPE_USAGE_STREAM || src_usage == PIPE_USAGE_STREAM) &&
size > 32 * 1024 * 1024) {
size > 16 * 1024 * 1024) {
printf("%8s,", "n/a");
continue;
}
@ -222,18 +222,27 @@ void si_test_dma_perf(struct si_screen *sscreen)
}
if (is_copy) {
/* CP DMA copies are about as slow as PCIe on GFX6-8. */
if (sctx->gfx_level <= GFX8 && size > 16 * 1024 * 1024) {
success = false;
continue;
}
si_cp_dma_copy_buffer(sctx, dst, src, dst_offset, src_offset, size,
SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, L2_LRU);
SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER,
sctx->gfx_level >= GFX7 ? L2_LRU : L2_BYPASS);
} else {
/* CP DMA clears must be aligned to 4 bytes. */
if (dst_offset % 4 || size % 4) {
if (dst_offset % 4 || size % 4 ||
/* CP DMA clears are so slow on GFX6-8 that we risk getting a GPU timeout. */
(sctx->gfx_level <= GFX8 && size > 512 * 1024)) {
success = false;
continue;
}
assert(clear_value_size == 4);
si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, dst_offset, size,
clear_value[0], SI_OP_SYNC_BEFORE_AFTER,
SI_COHERENCY_SHADER, L2_LRU);
SI_COHERENCY_SHADER,
sctx->gfx_level >= GFX7 ? L2_LRU : L2_BYPASS);
}
} else {
/* Compute */
@ -258,7 +267,10 @@ void si_test_dma_perf(struct si_screen *sscreen)
ctx->get_query_result(ctx, q, true, &result);
ctx->destroy_query(ctx, q);
if (success) {
/* Navi10 and Vega10 sometimes incorrectly return elapsed time of 0 nanoseconds
* for very small ops.
*/
if (success && result.u64) {
double GB = 1024.0 * 1024.0 * 1024.0;
double seconds = result.u64 / (double)NUM_RUNS / (1000.0 * 1000.0 * 1000.0);
double GBps = (size / GB) / seconds * (test_flavor == TEST_COPY_VRAM_VRAM ? 2 : 1);