diff --git a/.pick_status.json b/.pick_status.json index b6b4403115f..86b8dce9f2c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -616,7 +616,7 @@ "description": "winsys/radeon: fix the scratch buffer on gfx6-7", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "1bf39b1f9d115d69aa7b192beb7cde5eea31dffe" }, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index ce099dc841e..d748d2a6c7b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -586,6 +586,13 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) ws->info.spi_cu_en = 0xffff; ws->info.never_stop_sq_perf_counters = false; + /* The maximum number of scratch waves. The number is only a function of the number of CUs. + * It should be large enough to hold at least 1 threadgroup. Use the minimum per-SA CU count. + */ + const unsigned max_waves_per_tg = 1024 / 64; /* LLVM only supports 1024 threads per block */ + ws->info.max_scratch_waves = MAX2(32 * ws->info.min_good_cu_per_sa * ws->info.max_sa_per_se * + ws->info.num_se, max_waves_per_tg); + ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false);