tu: Add a750 flush workaround and re-enable UBWC for storage images

This is closer to what the blob does.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30896>
This commit is contained in:
Connor Abbott 2024-08-28 10:02:15 -04:00 committed by Marge Bot
parent 4442d61b16
commit 630d6d1f2e
3 changed files with 38 additions and 5 deletions

View file

@ -270,6 +270,12 @@ struct fd_dev_info {
/* Whether a single clear blit could be used for both sysmem and gmem.*/
bool has_generic_clear;
/* a750 has a bug where writing and then reading a UBWC-compressed IBO
* requires flushing UCHE. This is reproducible in many CTS tests, for
* example dEQP-VK.image.load_store.with_format.2d.*.
*/
bool ubwc_coherency_quirk;
} a7xx;
};

View file

@ -886,16 +886,13 @@ a7xx_750 = A7XXProps(
sysmem_vpc_attr_buf_size = 0x20000,
gmem_vpc_attr_buf_size = 0xc000,
ubwc_unorm_snorm_int_compatible = True,
# a750 has a bug where writing and then reading a UBWC-compressed IBO
# requires flushing UCHE. This is reproducible in many CTS tests, for
# example dEQP-VK.image.load_store.with_format.2d.*. Disable this for
# now.
#supports_ibo_ubwc = True,
supports_ibo_ubwc = True,
has_generic_clear = True,
gs_vpc_adjacency_quirk = True,
storage_8bit = True,
ubwc_all_formats_compatible = True,
has_compliant_dp4acc = True,
ubwc_coherency_quirk = True,
)
a730_magic_regs = dict(

View file

@ -6637,6 +6637,36 @@ tu_barrier(struct tu_cmd_buffer *cmd,
struct tu_cache_state *cache =
cmd->state.pass ? &cmd->state.renderpass_cache : &cmd->state.cache;
/* a750 has a HW bug where writing a UBWC compressed image with a compute
* shader followed by reading it as a texture (or readonly image) requires
* a CACHE_CLEAN event. Some notes about this bug:
* - It only happens after a blit happens.
* - It's fast-clear related, it happens when the image is fast cleared
* before the write and the value read is (incorrectly) the fast clear
* color.
* - CACHE_FLUSH is supposed to be the same as CACHE_CLEAN +
* CACHE_INVALIDATE, but it doesn't work whereas CACHE_CLEAN +
* CACHE_INVALIDATE does.
*
* The srcAccess can be replaced by a OpMemoryBarrier(MakeAvailable), so
* we can't use that to insert the flush. Instead we use the shader source
* stage.
*/
if (cmd->device->physical_device->info->a7xx.ubwc_coherency_quirk &&
(srcStage &
(VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT |
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))) {
cache->flush_bits |= TU_CMD_FLAG_CACHE_CLEAN;
cache->pending_flush_bits &= ~TU_CMD_FLAG_CACHE_CLEAN;
}
tu_flush_for_access(cache, src_flags, dst_flags);
enum tu_stage src_stage = vk2tu_src_stage(srcStage);