iris/gfx12.5+: Keep HIZ_CCS aux usage while sampling from resolved depth surfaces.

This works around graphics corruption seen on MTL and DG2 platforms
when sampling from a HIZ-CCS depth surface that was previously fast
cleared and resolved for sampling.  Apparently full resolves no longer
guarantee that the CCS surface ends up in a pass-through state due to
the behavior of the L3 cache in presence of compressible data.  In
order to work around the problem this makes sure that we use a
CCS-enabled AUX mode for depth textures if the base surface has a CCS
control surface, even if we are instructed to use ISL_AUX_USAGE_NONE.

This appears to fix the corruption without the need to add extra L3
flushes after resolves (as was done in the Vulkan driver, see
5178ad761c).

v2: Use ISL_AUX_USAGE_HIZ_CCS_WT instead of ISL_AUX_USAGE_HIZ_CCS
    usage to represent the requirements of sampling from a depth
    surface (Nanley).
v3: Add some comments, remove redundant check, disallow creation of
    ISL_AUX_USAGE_NONE surface state for depth sampler views since the
    hardware is buggy (Nanley).
v4: Preserve use of ISL_AUX_STATE_CLEAR when fast-clearing a surface
    (Nanley).
v5: Set ISL_AUX_STATE_COMPRESSED_NO_CLEAR state after clearing a HiZ
    CCS WT resource on xe2+ (Nanley).

Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31139>
This commit is contained in:
Francisco Jerez 2024-09-18 17:41:35 -07:00 committed by Marge Bot
parent 06e48229e6
commit 39b3a83ebf
3 changed files with 41 additions and 9 deletions

View file

@ -645,7 +645,8 @@ fast_clear_depth(struct iris_context *ice,
iris_resource_get_aux_state(res, res_level, layer); iris_resource_get_aux_state(res, res_level, layer);
if (aux_state != ISL_AUX_STATE_CLEAR && if (aux_state != ISL_AUX_STATE_CLEAR &&
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) { aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR &&
aux_state != ISL_AUX_STATE_COMPRESSED_HIER_DEPTH) {
/* This slice doesn't have any fast-cleared bits. */ /* This slice doesn't have any fast-cleared bits. */
continue; continue;
} }
@ -705,9 +706,17 @@ fast_clear_depth(struct iris_context *ice,
} }
} }
iris_resource_set_aux_state(ice, res, level, box->z, box->depth, if (res->aux.usage == ISL_AUX_USAGE_HIZ_CCS_WT)
devinfo->ver < 20 ? ISL_AUX_STATE_CLEAR : iris_resource_set_aux_state(
ISL_AUX_STATE_COMPRESSED_NO_CLEAR); ice, res, level, box->z, box->depth,
(devinfo->ver >= 20 ? ISL_AUX_STATE_COMPRESSED_NO_CLEAR :
ISL_AUX_STATE_COMPRESSED_CLEAR));
else
iris_resource_set_aux_state(
ice, res, level, box->z, box->depth,
(devinfo->ver >= 20 ? ISL_AUX_STATE_COMPRESSED_HIER_DEPTH :
ISL_AUX_STATE_CLEAR));
ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER;
ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
} }

View file

@ -425,8 +425,9 @@ flush_previous_aux_mode(struct iris_batch *batch,
* to avoid extra cache flushing. * to avoid extra cache flushing.
*/ */
void *v_aux_usage = (void *) (uintptr_t) void *v_aux_usage = (void *) (uintptr_t)
(aux_usage == ISL_AUX_USAGE_FCV_CCS_E ? (aux_usage == ISL_AUX_USAGE_FCV_CCS_E ? ISL_AUX_USAGE_CCS_E :
ISL_AUX_USAGE_CCS_E : aux_usage); aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT ? ISL_AUX_USAGE_HIZ_CCS :
aux_usage);
struct hash_entry *entry = struct hash_entry *entry =
_mesa_hash_table_search_pre_hashed(batch->bo_aux_modes, bo->hash, bo); _mesa_hash_table_search_pre_hashed(batch->bo_aux_modes, bo->hash, bo);
@ -977,8 +978,19 @@ iris_resource_texture_aux_usage(struct iris_context *ice,
case ISL_AUX_USAGE_HIZ_CCS: case ISL_AUX_USAGE_HIZ_CCS:
case ISL_AUX_USAGE_HIZ_CCS_WT: case ISL_AUX_USAGE_HIZ_CCS_WT:
assert(res->surf.format == view_format); assert(res->surf.format == view_format);
return iris_sample_with_depth_aux(devinfo, res) ? /* Even if iris_sample_with_depth_aux() tells us we can't keep
res->aux.usage : ISL_AUX_USAGE_NONE; * HiZ enabled for sampling it is possible to perform a partial
* resolve (supported on Gfx12.5+) which makes the CCS surface
* consistent with the contents of the HiZ surface, allowing us
* to keep CCS enabled while sampling from it. This avoids the
* overhead of a full resolve, is beneficial for bandwidth
* consumption and avoids triggering the hardware bugs of full
* resolves on DG2/MTL.
*/
return (iris_sample_with_depth_aux(devinfo, res) ? res->aux.usage :
devinfo->verx10 >= 125 && res->aux.usage == ISL_AUX_USAGE_HIZ_CCS ?
ISL_AUX_USAGE_HIZ_CCS_WT :
ISL_AUX_USAGE_NONE);
case ISL_AUX_USAGE_MCS: case ISL_AUX_USAGE_MCS:
case ISL_AUX_USAGE_MCS_CCS: case ISL_AUX_USAGE_MCS_CCS:

View file

@ -3116,7 +3116,18 @@ iris_create_sampler_view(struct pipe_context *ctx,
aux_usages = 1 << ISL_AUX_USAGE_NONE; aux_usages = 1 << ISL_AUX_USAGE_NONE;
} else if (isl_aux_usage_has_hiz(isv->res->aux.usage) && } else if (isl_aux_usage_has_hiz(isv->res->aux.usage) &&
!iris_sample_with_depth_aux(devinfo, isv->res)) { !iris_sample_with_depth_aux(devinfo, isv->res)) {
aux_usages = 1 << ISL_AUX_USAGE_NONE; if (isv->res->aux.usage == ISL_AUX_USAGE_HIZ_CCS &&
devinfo->verx10 >= 125) {
/* On Gfx12.5+ we can use partial resolves to maintain a
* depth surface CCS-compressed while sampling. We don't
* allow NONE though since the full resolves required to
* bring the surface to that state appear to be buggy on at
* least DG2 and MTL.
*/
aux_usages = 1 << ISL_AUX_USAGE_HIZ_CCS_WT;
} else {
aux_usages = 1 << ISL_AUX_USAGE_NONE;
}
} else { } else {
aux_usages = 1 << ISL_AUX_USAGE_NONE | aux_usages = 1 << ISL_AUX_USAGE_NONE |
1 << isv->res->aux.usage; 1 << isv->res->aux.usage;