mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 17:58:26 +02:00
ac/info: add ac_fill_bug_info
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40656>
This commit is contained in:
parent
a26ba344a3
commit
84dcc8b940
2 changed files with 140 additions and 132 deletions
|
|
@ -840,6 +840,143 @@ ac_identify_chip(struct radeon_info *info, const struct drm_amdgpu_info_device *
|
|||
return true;
|
||||
}
|
||||
|
||||
void ac_fill_bug_info(struct radeon_info *info)
|
||||
{
|
||||
info->has_sqtt_rb_harvest_bug = (info->family == CHIP_NAVI23 ||
|
||||
info->family == CHIP_NAVI24 ||
|
||||
info->family == CHIP_REMBRANDT ||
|
||||
info->family == CHIP_VANGOGH) &&
|
||||
util_bitcount64(info->enabled_rb_mask) !=
|
||||
info->max_render_backends;
|
||||
|
||||
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
|
||||
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
|
||||
|
||||
info->has_gfx9_scissor_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
|
||||
info->has_htile_stencil_mipmap_bug = info->gfx_level == GFX10;
|
||||
|
||||
/* When drawing, if all samples covered in a cleared tile in HTILE are discarded (by the fragment
|
||||
* shader, alpha to coverage, etc.), the tile stays cleared, but on the chips with this bug, the
|
||||
* Z range in the tile still gets expanded by the depth test, and that may flip the upper bit of
|
||||
* the HTILE encoding (of the maximum Z without stencil, or the base Z with stencil), inverting
|
||||
* the clear value that texture reads will use for the tile.
|
||||
*
|
||||
* has_htile_tc_z_clear_bug_without/with_stencil indicate whether the TILE_STENCIL_DISABLE =
|
||||
* 1 and 0 HTILE encodings respectively are subject to this bug.
|
||||
*
|
||||
* One possible workaround is to use the depth/stencil HTILE that encodes the Z range as base and
|
||||
* delta, setting ZRANGE_PRECISION to 0 (base Z is min Z) when the depth is cleared to 0, and to
|
||||
* 1 (base Z is max Z) when it's cleared to 1, so the Z delta gets expanded, but the base Z,
|
||||
* which contains the TC clear value bit, stays the same.
|
||||
* See DepthStencilView::UpdateZRangePrecision in PAL.
|
||||
*
|
||||
* Affects dEQP-VK.dynamic_state.*.discard.depth on has_htile_tc_z_clear_bug_without_stencil = 1
|
||||
* chips as of the CTS commit 698abf5f6b7073562cc951617a58e5803c7ead3f (clearing a depth-only
|
||||
* image to 0, drawing geometry with Z = 1 to it discarding all fragments in the shader, then
|
||||
* reading it in vkCmdCopyImageToBuffer fetching 1 where 0 is supposed to be).
|
||||
*/
|
||||
info->has_htile_tc_z_clear_bug_without_stencil = info->gfx_level == GFX8 ||
|
||||
info->family == CHIP_GFX1013;
|
||||
info->has_htile_tc_z_clear_bug_with_stencil = info->has_htile_tc_z_clear_bug_without_stencil ||
|
||||
info->gfx_level == GFX9;
|
||||
|
||||
info->has_small_prim_filter_sample_loc_bug =
|
||||
(info->family >= CHIP_POLARIS10 && info->family <= CHIP_POLARIS12) ||
|
||||
info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* DB_DFSM_CONTROL.POPS_DRAIN_PS_ON_OVERLAP must be enabled for 8 or more coverage or
|
||||
* depth/stencil samples with POPS (PAL waMiscPopsMissedOverlap).
|
||||
*/
|
||||
info->has_pops_missed_overlap_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* Whether FORCE_STENCIL_VALID must be forced to 1 when a MSAA
|
||||
* depth/stencil image is bound and that ZPASS/ZFAIL differs.
|
||||
*/
|
||||
info->has_db_force_stencil_valid_bug = info->gfx_level == GFX12;
|
||||
|
||||
/* GFX6 hw bug when the IBO addr is 0 which causes invalid clamping (underflow).
|
||||
* Setting the IB addr to 2 or higher solves this issue.
|
||||
* See waMiscNullIb in PAL.
|
||||
*
|
||||
* Drawing from 0-sized index buffers causes hangs on gfx10.
|
||||
*/
|
||||
info->has_zero_index_buffer_bug = info->gfx_level == GFX6 || info->gfx_level == GFX10;
|
||||
|
||||
/* DB has a bug when ITERATE_256 is set to 1 that can cause a hang. The
|
||||
* workaround is to set DECOMPRESS_ON_Z_PLANES to 2 for 4X MSAA D/S images.
|
||||
*/
|
||||
info->has_two_planes_iterate256_bug = info->gfx_level == GFX10;
|
||||
|
||||
/* GFX10+Navi21: NGG->legacy transitions require VGT_FLUSH. */
|
||||
info->has_vgt_flush_ngg_legacy_bug = info->gfx_level == GFX10 ||
|
||||
info->family == CHIP_NAVI21;
|
||||
|
||||
/* GFX10-GFX10.3 (tested on NAVI10, NAVI21 and NAVI24 but likely all) are
|
||||
* affected by a hw bug when primitive restart is updated and no context
|
||||
* registers are written between draws. One workaround is to emit
|
||||
* SQ_NON_EVENT(0) which is a NOP packet that adds a small delay and seems
|
||||
* to fix it reliably.
|
||||
*/
|
||||
info->has_prim_restart_sync_bug = info->gfx_level == GFX10 ||
|
||||
info->gfx_level == GFX10_3;
|
||||
|
||||
/* First Navi2x chips have a hw bug that doesn't allow to write
|
||||
* depth/stencil from a FS for multi-pixel fragments.
|
||||
*/
|
||||
info->has_vrs_ds_export_bug = info->family == CHIP_NAVI21 ||
|
||||
info->family == CHIP_NAVI22 ||
|
||||
info->family == CHIP_VANGOGH;
|
||||
|
||||
/* GFX12 is affected by random GPU hangs when VRS rates are exported by the
|
||||
* last VGT stage under some conditions that are unclear. One possible
|
||||
* workaround is to emit BOP events after every draw that exports VRS
|
||||
* rates.
|
||||
*/
|
||||
info->has_vrs_export_bug = info->gfx_level == GFX12;
|
||||
|
||||
/* HW bug workaround when CS threadgroups > 256 threads and async compute
|
||||
* isn't used, i.e. only one compute job can run at a time. If async
|
||||
* compute is possible, the threadgroup size must be limited to 256 threads
|
||||
* on all queues to avoid the bug.
|
||||
* Only GFX6 and certain GFX7 chips are affected.
|
||||
*/
|
||||
info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
|
||||
info->family == CHIP_BONAIRE ||
|
||||
info->family == CHIP_KABINI;
|
||||
|
||||
/* HW bug workaround with async compute dispatches when threadgroup > 4096.
|
||||
* The workaround is to change the "threadgroup" dimension mode to "thread"
|
||||
* dimension mode.
|
||||
*/
|
||||
info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
|
||||
info->family == CHIP_TONGA;
|
||||
|
||||
/* GFX7 CP requires 32 bytes alignment for the indirect buffer arguments on
|
||||
* the compute queue.
|
||||
*/
|
||||
info->has_async_compute_align32_bug = info->gfx_level == GFX7;
|
||||
|
||||
/* Firmware bug with DISPATCH_TASKMESH_INDIRECT_MULTI_ACE packets.
|
||||
* On old MEC FW versions, it hangs the GPU when indirect count is zero.
|
||||
*/
|
||||
info->has_taskmesh_indirect0_bug = info->gfx_level == GFX10_3 &&
|
||||
info->mec_fw_version < 100;
|
||||
|
||||
info->has_export_conflict_bug = info->gfx_level == GFX11;
|
||||
|
||||
/* When LLVM is fixed to handle multiparts shaders, this value will depend
|
||||
* on the known good versions of LLVM. Until then, enable the equivalent WA
|
||||
* in the nir -> llvm backend.
|
||||
*/
|
||||
info->needs_llvm_wait_wa = info->gfx_level == GFX11;
|
||||
|
||||
info->never_stop_sq_perf_counters = info->gfx_level == GFX10 ||
|
||||
info->gfx_level == GFX10_3;
|
||||
info->never_send_perfcounter_stop = info->gfx_level == GFX11;
|
||||
}
|
||||
|
||||
enum ac_query_gpu_info_result
|
||||
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
bool require_pci_bus_info)
|
||||
|
|
@ -1119,112 +1256,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->cpdma_prefetch_writes_memory = info->gfx_level <= GFX8;
|
||||
|
||||
info->has_gfx9_scissor_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
|
||||
info->has_htile_stencil_mipmap_bug = info->gfx_level == GFX10;
|
||||
|
||||
/* When drawing, if all samples covered in a cleared tile in HTILE are discarded (by the fragment
|
||||
* shader, alpha to coverage, etc.), the tile stays cleared, but on the chips with this bug, the
|
||||
* Z range in the tile still gets expanded by the depth test, and that may flip the upper bit of
|
||||
* the HTILE encoding (of the maximum Z without stencil, or the base Z with stencil), inverting
|
||||
* the clear value that texture reads will use for the tile.
|
||||
*
|
||||
* has_htile_tc_z_clear_bug_without/with_stencil indicate whether the TILE_STENCIL_DISABLE =
|
||||
* 1 and 0 HTILE encodings respectively are subject to this bug.
|
||||
*
|
||||
* One possible workaround is to use the depth/stencil HTILE that encodes the Z range as base and
|
||||
* delta, setting ZRANGE_PRECISION to 0 (base Z is min Z) when the depth is cleared to 0, and to
|
||||
* 1 (base Z is max Z) when it's cleared to 1, so the Z delta gets expanded, but the base Z,
|
||||
* which contains the TC clear value bit, stays the same.
|
||||
* See DepthStencilView::UpdateZRangePrecision in PAL.
|
||||
*
|
||||
* Affects dEQP-VK.dynamic_state.*.discard.depth on has_htile_tc_z_clear_bug_without_stencil = 1
|
||||
* chips as of the CTS commit 698abf5f6b7073562cc951617a58e5803c7ead3f (clearing a depth-only
|
||||
* image to 0, drawing geometry with Z = 1 to it discarding all fragments in the shader, then
|
||||
* reading it in vkCmdCopyImageToBuffer fetching 1 where 0 is supposed to be).
|
||||
*/
|
||||
info->has_htile_tc_z_clear_bug_without_stencil = info->gfx_level == GFX8 ||
|
||||
info->family == CHIP_GFX1013;
|
||||
info->has_htile_tc_z_clear_bug_with_stencil = info->has_htile_tc_z_clear_bug_without_stencil ||
|
||||
info->gfx_level == GFX9;
|
||||
|
||||
info->has_small_prim_filter_sample_loc_bug =
|
||||
(info->family >= CHIP_POLARIS10 && info->family <= CHIP_POLARIS12) ||
|
||||
info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* DB_DFSM_CONTROL.POPS_DRAIN_PS_ON_OVERLAP must be enabled for 8 or more coverage or
|
||||
* depth/stencil samples with POPS (PAL waMiscPopsMissedOverlap).
|
||||
*/
|
||||
info->has_pops_missed_overlap_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* Whether FORCE_STENCIL_VALID must be forced to 1 when a MSAA
|
||||
* depth/stencil image is bound and that ZPASS/ZFAIL differs.
|
||||
*/
|
||||
info->has_db_force_stencil_valid_bug = info->gfx_level == GFX12;
|
||||
|
||||
/* GFX6 hw bug when the IBO addr is 0 which causes invalid clamping (underflow).
|
||||
* Setting the IB addr to 2 or higher solves this issue.
|
||||
* See waMiscNullIb in PAL.
|
||||
*
|
||||
* Drawing from 0-sized index buffers causes hangs on gfx10.
|
||||
*/
|
||||
info->has_zero_index_buffer_bug = info->gfx_level == GFX6 || info->gfx_level == GFX10;
|
||||
|
||||
/* DB has a bug when ITERATE_256 is set to 1 that can cause a hang. The
|
||||
* workaround is to set DECOMPRESS_ON_Z_PLANES to 2 for 4X MSAA D/S images.
|
||||
*/
|
||||
info->has_two_planes_iterate256_bug = info->gfx_level == GFX10;
|
||||
|
||||
/* GFX10+Navi21: NGG->legacy transitions require VGT_FLUSH. */
|
||||
info->has_vgt_flush_ngg_legacy_bug = info->gfx_level == GFX10 ||
|
||||
info->family == CHIP_NAVI21;
|
||||
|
||||
/* GFX10-GFX10.3 (tested on NAVI10, NAVI21 and NAVI24 but likely all) are
|
||||
* affected by a hw bug when primitive restart is updated and no context
|
||||
* registers are written between draws. One workaround is to emit
|
||||
* SQ_NON_EVENT(0) which is a NOP packet that adds a small delay and seems
|
||||
* to fix it reliably.
|
||||
*/
|
||||
info->has_prim_restart_sync_bug = info->gfx_level == GFX10 ||
|
||||
info->gfx_level == GFX10_3;
|
||||
|
||||
/* First Navi2x chips have a hw bug that doesn't allow to write
|
||||
* depth/stencil from a FS for multi-pixel fragments.
|
||||
*/
|
||||
info->has_vrs_ds_export_bug = info->family == CHIP_NAVI21 ||
|
||||
info->family == CHIP_NAVI22 ||
|
||||
info->family == CHIP_VANGOGH;
|
||||
|
||||
/* GFX12 is affected by random GPU hangs when VRS rates are exported by the
|
||||
* last VGT stage under some conditions that are unclear. One possible
|
||||
* workaround is to emit BOP events after every draw that exports VRS
|
||||
* rates.
|
||||
*/
|
||||
info->has_vrs_export_bug = info->gfx_level == GFX12;
|
||||
|
||||
/* HW bug workaround when CS threadgroups > 256 threads and async compute
|
||||
* isn't used, i.e. only one compute job can run at a time. If async
|
||||
* compute is possible, the threadgroup size must be limited to 256 threads
|
||||
* on all queues to avoid the bug.
|
||||
* Only GFX6 and certain GFX7 chips are affected.
|
||||
*/
|
||||
info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
|
||||
info->family == CHIP_BONAIRE ||
|
||||
info->family == CHIP_KABINI;
|
||||
|
||||
/* HW bug workaround with async compute dispatches when threadgroup > 4096.
|
||||
* The workaround is to change the "threadgroup" dimension mode to "thread"
|
||||
* dimension mode.
|
||||
*/
|
||||
info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
|
||||
info->family == CHIP_TONGA;
|
||||
|
||||
/* GFX7 CP requires 32 bytes alignment for the indirect buffer arguments on
|
||||
* the compute queue.
|
||||
*/
|
||||
info->has_async_compute_align32_bug = info->gfx_level == GFX7;
|
||||
|
||||
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
|
||||
* feature version wasn't bumped.
|
||||
*/
|
||||
|
|
@ -1234,25 +1265,11 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
(info->gfx_level == GFX9 &&
|
||||
info->me_fw_feature >= 52);
|
||||
|
||||
/* Firmware bug with DISPATCH_TASKMESH_INDIRECT_MULTI_ACE packets.
|
||||
* On old MEC FW versions, it hangs the GPU when indirect count is zero.
|
||||
*/
|
||||
info->has_taskmesh_indirect0_bug = info->gfx_level == GFX10_3 &&
|
||||
info->mec_fw_version < 100;
|
||||
|
||||
info->has_export_conflict_bug = info->gfx_level == GFX11;
|
||||
|
||||
/* On GFX8-9, CP DMA doesn't support NULL PRT pages:
|
||||
* it doesn't read 0 and doesn't discard writes, causing GPU hangs.
|
||||
*/
|
||||
info->cp_dma_supports_sparse = info->gfx_level >= GFX10;
|
||||
|
||||
/* When LLVM is fixed to handle multiparts shaders, this value will depend
|
||||
* on the known good versions of LLVM. Until then, enable the equivalent WA
|
||||
* in the nir -> llvm backend.
|
||||
*/
|
||||
info->needs_llvm_wait_wa = info->gfx_level == GFX11;
|
||||
|
||||
/* SDMA v1.0-3.x (GFX6-8) can't ignore page faults on unmapped sparse resources. */
|
||||
info->sdma_supports_sparse = info->sdma_ip_version >= SDMA_4_0;
|
||||
|
||||
|
|
@ -1287,6 +1304,8 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
}
|
||||
}
|
||||
|
||||
ac_fill_bug_info(info);
|
||||
|
||||
if (info->gfx_level >= GFX10_3 && info->max_se > 1) {
|
||||
uint32_t enabled_se_mask = 0;
|
||||
|
||||
|
|
@ -1419,18 +1438,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES",
|
||||
info->has_graphics || info->family < CHIP_GFX940);
|
||||
info->never_stop_sq_perf_counters = info->gfx_level == GFX10 ||
|
||||
info->gfx_level == GFX10_3;
|
||||
info->never_send_perfcounter_stop = info->gfx_level == GFX11;
|
||||
info->has_sqtt_rb_harvest_bug = (info->family == CHIP_NAVI23 ||
|
||||
info->family == CHIP_NAVI24 ||
|
||||
info->family == CHIP_REMBRANDT ||
|
||||
info->family == CHIP_VANGOGH) &&
|
||||
util_bitcount64(info->enabled_rb_mask) !=
|
||||
info->max_render_backends;
|
||||
|
||||
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
|
||||
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
|
||||
|
||||
info->mesh_fast_launch_2 = info->gfx_level >= GFX11;
|
||||
|
||||
|
|
|
|||
|
|
@ -496,6 +496,7 @@ ac_fill_hw_ip_info(struct radeon_info *info, const struct drm_amdgpu_info_device
|
|||
unsigned ip_type, const struct drm_amdgpu_info_hw_ip *ip_info);
|
||||
bool
|
||||
ac_identify_chip(struct radeon_info *info, const struct drm_amdgpu_info_device *device_info);
|
||||
void ac_fill_bug_info(struct radeon_info *info);
|
||||
|
||||
void ac_compute_driver_uuid(char *uuid, size_t size);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue