diff --git a/.pick_status.json b/.pick_status.json index e1d7f7a4239..54cd1033b41 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -924,7 +924,7 @@ "description": "rusticl/kernel: properly respect device thread limits per dimension", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index 1ddaee62dc5..5e50a63af77 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -898,7 +898,7 @@ impl Kernel { let total_threads = block.iter().take(work_dim).product::(); if threads != 1 && total_threads < subgroups { for i in 0..work_dim { - if grid[i] * total_threads < threads { + if grid[i] * total_threads < threads && grid[i] * block[i] <= dim_threads[i] { block[i] *= grid[i]; grid[i] = 1; // can only do it once as nothing is cleanly divisible