From 39b3a83ebfa84e894338acfd7c8a2b46c50362b8 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 18 Sep 2024 17:41:35 -0700 Subject: [PATCH] iris/gfx12.5+: Keep HIZ_CCS aux usage while sampling from resolved depth surfaces. This works around graphics corruption seen on MTL and DG2 platforms when sampling from a HIZ-CCS depth surface that was previously fast cleared and resolved for sampling. Apparently full resolves no longer guarantee that the CCS surface ends up in a pass-through state due to the behavior of the L3 cache in presence of compressible data. In order to work around the problem this makes sure that we use a CCS-enabled AUX mode for depth textures if the base surface has a CCS control surface, even if we are instructed to use ISL_AUX_USAGE_NONE. This appears to fix the corruption without the need to add extra L3 flushes after resolves (as was done in the Vulkan driver, see 5178ad761c9e8e). v2: Use ISL_AUX_USAGE_HIZ_CCS_WT instead of ISL_AUX_USAGE_HIZ_CCS usage to represent the requirements of sampling from a depth surface (Nanley). v3: Add some comments, remove redundant check, disallow creation of ISL_AUX_USAGE_NONE surface state for depth sampler views since the hardware is buggy (Nanley). v4: Preserve use of ISL_AUX_STATE_CLEAR when fast-clearing a surface (Nanley). v5: Set ISL_AUX_STATE_COMPRESSED_NO_CLEAR state after clearing a HiZ CCS WT resource on xe2+ (Nanley). Reviewed-by: Nanley Chery Part-of: --- src/gallium/drivers/iris/iris_clear.c | 17 +++++++++++++---- src/gallium/drivers/iris/iris_resolve.c | 20 ++++++++++++++++---- src/gallium/drivers/iris/iris_state.c | 13 ++++++++++++- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index c3b38f9a022..1d138fa3585 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -645,7 +645,8 @@ fast_clear_depth(struct iris_context *ice, iris_resource_get_aux_state(res, res_level, layer); if (aux_state != ISL_AUX_STATE_CLEAR && - aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) { + aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR && + aux_state != ISL_AUX_STATE_COMPRESSED_HIER_DEPTH) { /* This slice doesn't have any fast-cleared bits. */ continue; } @@ -705,9 +706,17 @@ fast_clear_depth(struct iris_context *ice, } } - iris_resource_set_aux_state(ice, res, level, box->z, box->depth, - devinfo->ver < 20 ? ISL_AUX_STATE_CLEAR : - ISL_AUX_STATE_COMPRESSED_NO_CLEAR); + if (res->aux.usage == ISL_AUX_USAGE_HIZ_CCS_WT) + iris_resource_set_aux_state( + ice, res, level, box->z, box->depth, + (devinfo->ver >= 20 ? ISL_AUX_STATE_COMPRESSED_NO_CLEAR : + ISL_AUX_STATE_COMPRESSED_CLEAR)); + else + iris_resource_set_aux_state( + ice, res, level, box->z, box->depth, + (devinfo->ver >= 20 ? ISL_AUX_STATE_COMPRESSED_HIER_DEPTH : + ISL_AUX_STATE_CLEAR)); + ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; } diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index f7ea7bc8870..344ed8f2036 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -425,8 +425,9 @@ flush_previous_aux_mode(struct iris_batch *batch, * to avoid extra cache flushing. */ void *v_aux_usage = (void *) (uintptr_t) - (aux_usage == ISL_AUX_USAGE_FCV_CCS_E ? - ISL_AUX_USAGE_CCS_E : aux_usage); + (aux_usage == ISL_AUX_USAGE_FCV_CCS_E ? ISL_AUX_USAGE_CCS_E : + aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT ? ISL_AUX_USAGE_HIZ_CCS : + aux_usage); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(batch->bo_aux_modes, bo->hash, bo); @@ -977,8 +978,19 @@ iris_resource_texture_aux_usage(struct iris_context *ice, case ISL_AUX_USAGE_HIZ_CCS: case ISL_AUX_USAGE_HIZ_CCS_WT: assert(res->surf.format == view_format); - return iris_sample_with_depth_aux(devinfo, res) ? - res->aux.usage : ISL_AUX_USAGE_NONE; + /* Even if iris_sample_with_depth_aux() tells us we can't keep + * HiZ enabled for sampling it is possible to perform a partial + * resolve (supported on Gfx12.5+) which makes the CCS surface + * consistent with the contents of the HiZ surface, allowing us + * to keep CCS enabled while sampling from it. This avoids the + * overhead of a full resolve, is beneficial for bandwidth + * consumption and avoids triggering the hardware bugs of full + * resolves on DG2/MTL. + */ + return (iris_sample_with_depth_aux(devinfo, res) ? res->aux.usage : + devinfo->verx10 >= 125 && res->aux.usage == ISL_AUX_USAGE_HIZ_CCS ? + ISL_AUX_USAGE_HIZ_CCS_WT : + ISL_AUX_USAGE_NONE); case ISL_AUX_USAGE_MCS: case ISL_AUX_USAGE_MCS_CCS: diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 1be8be0f72c..fba71b56a45 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3116,7 +3116,18 @@ iris_create_sampler_view(struct pipe_context *ctx, aux_usages = 1 << ISL_AUX_USAGE_NONE; } else if (isl_aux_usage_has_hiz(isv->res->aux.usage) && !iris_sample_with_depth_aux(devinfo, isv->res)) { - aux_usages = 1 << ISL_AUX_USAGE_NONE; + if (isv->res->aux.usage == ISL_AUX_USAGE_HIZ_CCS && + devinfo->verx10 >= 125) { + /* On Gfx12.5+ we can use partial resolves to maintain a + * depth surface CCS-compressed while sampling. We don't + * allow NONE though since the full resolves required to + * bring the surface to that state appear to be buggy on at + * least DG2 and MTL. + */ + aux_usages = 1 << ISL_AUX_USAGE_HIZ_CCS_WT; + } else { + aux_usages = 1 << ISL_AUX_USAGE_NONE; + } } else { aux_usages = 1 << ISL_AUX_USAGE_NONE | 1 << isv->res->aux.usage;