mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 11:30:21 +01:00
radv: add a missing async compute workaround for Tonga/Iceland
After digging into PAL code again, I figured that Tonga/Iceland are both affected by a hw bug related to async compute dispatches. The solution is to change the "threadgroup" dimension mode to the "thread" dimension mode unconditionally. This should fix a bunch of issues related to RADV_DEBUG=nocompute on these GPUs. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7551 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6334 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4679 Cc: mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26207>
This commit is contained in:
parent
f695a9fed2
commit
ec82b42944
3 changed files with 24 additions and 1 deletions
|
|
@ -1245,6 +1245,13 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->family == CHIP_BONAIRE ||
|
||||
info->family == CHIP_KABINI;
|
||||
|
||||
/* HW bug workaround with async compute dispatches when threadgroup > 4096.
|
||||
* The workaround is to change the "threadgroup" dimension mode to "thread"
|
||||
* dimension mode.
|
||||
*/
|
||||
info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
|
||||
info->family == CHIP_TONGA;
|
||||
|
||||
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
|
||||
* feature version wasn't bumped.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ struct radeon_info {
|
|||
bool has_two_planes_iterate256_bug;
|
||||
bool has_vgt_flush_ngg_legacy_bug;
|
||||
bool has_cs_regalloc_hang_bug;
|
||||
bool has_async_compute_threadgroup_bug;
|
||||
bool has_32bit_predication;
|
||||
bool has_3d_cube_border_color_mipmap;
|
||||
bool has_image_opcodes;
|
||||
|
|
|
|||
|
|
@ -9550,11 +9550,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
|
|||
radeon_emit(cs, dispatch_initiator);
|
||||
}
|
||||
} else {
|
||||
const unsigned *cs_block_size = compute_shader->info.cs.block_size;
|
||||
unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
|
||||
unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
|
||||
|
||||
if (info->unaligned) {
|
||||
const unsigned *cs_block_size = compute_shader->info.cs.block_size;
|
||||
unsigned remainder[3];
|
||||
|
||||
/* If aligned, these should be an entire block size,
|
||||
|
|
@ -9619,6 +9619,21 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
|
|||
predicating = false;
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.has_async_compute_threadgroup_bug &&
|
||||
cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (info->unaligned) {
|
||||
/* info->blocks is already in thread dimensions for unaligned dispatches. */
|
||||
blocks[i] = info->blocks[i];
|
||||
} else {
|
||||
/* Force the async compute dispatch to be in "thread" dim mode to workaround a hw bug. */
|
||||
blocks[i] *= cs_block_size[i];
|
||||
}
|
||||
|
||||
dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1);
|
||||
}
|
||||
}
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
|
||||
radeon_emit(cs, blocks[0]);
|
||||
radeon_emit(cs, blocks[1]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue