From 630d6d1f2ec7bc76216956e5353839813d2981bc Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 28 Aug 2024 10:02:15 -0400 Subject: [PATCH] tu: Add a750 flush workaround and re-enable UBWC for storage images This is closer to what the blob does. Part-of: --- src/freedreno/common/freedreno_dev_info.h | 6 +++++ src/freedreno/common/freedreno_devices.py | 7 ++---- src/freedreno/vulkan/tu_cmd_buffer.cc | 30 +++++++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 0e21627e7c9..97e0ef802af 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -270,6 +270,12 @@ struct fd_dev_info { /* Whether a single clear blit could be used for both sysmem and gmem.*/ bool has_generic_clear; + + /* a750 has a bug where writing and then reading a UBWC-compressed IBO + * requires flushing UCHE. This is reproducible in many CTS tests, for + * example dEQP-VK.image.load_store.with_format.2d.*. + */ + bool ubwc_coherency_quirk; } a7xx; }; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 3fb8863ccdd..64cf3d6825b 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -886,16 +886,13 @@ a7xx_750 = A7XXProps( sysmem_vpc_attr_buf_size = 0x20000, gmem_vpc_attr_buf_size = 0xc000, ubwc_unorm_snorm_int_compatible = True, - # a750 has a bug where writing and then reading a UBWC-compressed IBO - # requires flushing UCHE. This is reproducible in many CTS tests, for - # example dEQP-VK.image.load_store.with_format.2d.*. Disable this for - # now. - #supports_ibo_ubwc = True, + supports_ibo_ubwc = True, has_generic_clear = True, gs_vpc_adjacency_quirk = True, storage_8bit = True, ubwc_all_formats_compatible = True, has_compliant_dp4acc = True, + ubwc_coherency_quirk = True, ) a730_magic_regs = dict( diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index e047e8f8b40..aae12800ff0 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -6637,6 +6637,36 @@ tu_barrier(struct tu_cmd_buffer *cmd, struct tu_cache_state *cache = cmd->state.pass ? &cmd->state.renderpass_cache : &cmd->state.cache; + + /* a750 has a HW bug where writing a UBWC compressed image with a compute + * shader followed by reading it as a texture (or readonly image) requires + * a CACHE_CLEAN event. Some notes about this bug: + * - It only happens after a blit happens. + * - It's fast-clear related, it happens when the image is fast cleared + * before the write and the value read is (incorrectly) the fast clear + * color. + * - CACHE_FLUSH is supposed to be the same as CACHE_CLEAN + + * CACHE_INVALIDATE, but it doesn't work whereas CACHE_CLEAN + + * CACHE_INVALIDATE does. + * + * The srcAccess can be replaced by a OpMemoryBarrier(MakeAvailable), so + * we can't use that to insert the flush. Instead we use the shader source + * stage. + */ + if (cmd->device->physical_device->info->a7xx.ubwc_coherency_quirk && + (srcStage & + (VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))) { + cache->flush_bits |= TU_CMD_FLAG_CACHE_CLEAN; + cache->pending_flush_bits &= ~TU_CMD_FLAG_CACHE_CLEAN; + } + tu_flush_for_access(cache, src_flags, dst_flags); enum tu_stage src_stage = vk2tu_src_stage(srcStage);