mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 13:40:16 +01:00
anv: Place images into the aux-map when safe to do so
At image bind time, if an image's addresses can be placed into the aux-map without causing conflicts with a pre-existing mapping, do so. The code aux management code in the binding function operates on a per-plane basis. So, use the per-plane CCS memory range from the image rather than the CCS memory region for the entire BO. Another way to avoid aux-map conflicts is to rely solely on having a dedicated allocation for an image. Unfortunately, not all workloads change their behavior when drivers report a preference for dedicated allocations. In particular, 3DMark Wild Life Extreme does not make more dedicated allocations and such a solution was measured to perform ~16% worse than this solution. With this solution, I did not measure a loss of CCS on that benchmark. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6304 Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (v1) Reviewed-by: Jianxun Zhang <jianxun.zhang@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25003>
This commit is contained in:
parent
207db22117
commit
ee6e2bc4a3
4 changed files with 26 additions and 148 deletions
|
|
@ -1508,15 +1508,6 @@ anv_device_alloc_bo(struct anv_device *device,
|
|||
return vk_errorf(device, VK_ERROR_UNKNOWN, "vm bind failed");
|
||||
}
|
||||
|
||||
if (new_bo._ccs_size > 0) {
|
||||
assert(device->info->has_aux_map);
|
||||
const bool mapped =
|
||||
intel_aux_map_add_mapping(device->aux_map_ctx, new_bo.offset,
|
||||
intel_canonical_address(new_bo.offset + new_bo.size),
|
||||
new_bo.size, 0 /* format_bits */);
|
||||
assert(mapped);
|
||||
}
|
||||
|
||||
assert(new_bo.gem_handle);
|
||||
|
||||
/* If we just got this gem_handle from anv_bo_init_new then we know no one
|
||||
|
|
@ -1867,12 +1858,11 @@ anv_device_release_bo(struct anv_device *device,
|
|||
}
|
||||
assert(bo->refcount == 0);
|
||||
|
||||
if (bo->_ccs_size > 0) {
|
||||
assert(device->physical->has_implicit_ccs);
|
||||
assert(device->info->has_aux_map);
|
||||
assert(bo->has_implicit_ccs);
|
||||
/* Unmap the entire BO. In the case that some addresses lacked an aux-map
|
||||
* entry, the unmapping function will add table entries for them.
|
||||
*/
|
||||
if (anv_bo_allows_aux_map(device, bo))
|
||||
intel_aux_map_unmap_range(device->aux_map_ctx, bo->offset, bo->size);
|
||||
}
|
||||
|
||||
/* Memset the BO just in case. The refcount being zero should be enough to
|
||||
* prevent someone from assuming the data is valid but it's safer to just
|
||||
|
|
|
|||
|
|
@ -2273,8 +2273,25 @@ VkResult anv_BindImageMemory2(
|
|||
if (device->info->has_flat_ccs && bo->vram_only)
|
||||
continue;
|
||||
|
||||
/* Add the plane to the aux map when applicable. */
|
||||
if (anv_bo_allows_aux_map(device, bo)) {
|
||||
continue;
|
||||
const struct anv_address main_addr =
|
||||
anv_image_address(image,
|
||||
&image->planes[p].primary_surface.memory_range);
|
||||
const struct anv_address aux_addr =
|
||||
anv_image_address(image,
|
||||
&image->planes[p].compr_ctrl_memory_range);
|
||||
const struct isl_surf *surf =
|
||||
&image->planes[p].primary_surface.isl;
|
||||
const uint64_t format_bits =
|
||||
intel_aux_map_format_bits_for_isl_surf(surf);
|
||||
const bool mapped =
|
||||
intel_aux_map_add_mapping(device->aux_map_ctx,
|
||||
anv_address_physical(main_addr),
|
||||
anv_address_physical(aux_addr),
|
||||
surf->size_B, format_bits);
|
||||
if (mapped)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Do nothing prior to gfx12. There are no special requirements. */
|
||||
|
|
|
|||
|
|
@ -4855,7 +4855,10 @@ static inline const struct anv_image_memory_range *
|
|||
anv_image_get_aux_memory_range(const struct anv_image *image,
|
||||
uint32_t plane)
|
||||
{
|
||||
return &image->planes[plane].aux_surface.memory_range;
|
||||
if (image->planes[plane].aux_surface.memory_range.size > 0)
|
||||
return &image->planes[plane].aux_surface.memory_range;
|
||||
else
|
||||
return &image->planes[plane].compr_ctrl_memory_range;
|
||||
}
|
||||
|
||||
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
|
||||
|
|
|
|||
|
|
@ -339,113 +339,6 @@ add_surface_state_relocs(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
|
||||
|
||||
#if GFX_VER == 12
|
||||
static void
|
||||
anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_image *image,
|
||||
VkImageAspectFlagBits aspect,
|
||||
uint32_t base_level, uint32_t level_count,
|
||||
uint32_t base_layer, uint32_t layer_count)
|
||||
{
|
||||
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
|
||||
|
||||
const struct anv_surface *surface = &image->planes[plane].primary_surface;
|
||||
uint64_t base_address =
|
||||
anv_address_physical(anv_image_address(image, &surface->memory_range));
|
||||
|
||||
const struct isl_surf *isl_surf = &image->planes[plane].primary_surface.isl;
|
||||
uint64_t format_bits = intel_aux_map_format_bits_for_isl_surf(isl_surf);
|
||||
|
||||
/* We're about to live-update the AUX-TT. We really don't want anyone else
|
||||
* trying to read it while we're doing this. We could probably get away
|
||||
* with not having this stall in some cases if we were really careful but
|
||||
* it's better to play it safe. Full stall the GPU.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"before update AUX-TT");
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
for (uint32_t a = 0; a < layer_count; a++) {
|
||||
const uint32_t layer = base_layer + a;
|
||||
|
||||
uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
|
||||
for (uint32_t l = 0; l < level_count; l++) {
|
||||
const uint32_t level = base_level + l;
|
||||
|
||||
uint32_t logical_array_layer, logical_z_offset_px;
|
||||
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
|
||||
logical_array_layer = 0;
|
||||
|
||||
/* If the given miplevel does not have this layer, then any higher
|
||||
* miplevels won't either because miplevels only get smaller the
|
||||
* higher the LOD.
|
||||
*/
|
||||
assert(layer < image->vk.extent.depth);
|
||||
if (layer >= u_minify(image->vk.extent.depth, level))
|
||||
break;
|
||||
logical_z_offset_px = layer;
|
||||
} else {
|
||||
assert(layer < image->vk.array_layers);
|
||||
logical_array_layer = layer;
|
||||
logical_z_offset_px = 0;
|
||||
}
|
||||
|
||||
uint64_t slice_start_offset_B, slice_end_offset_B;
|
||||
isl_surf_get_image_range_B_tile(isl_surf, level,
|
||||
logical_array_layer,
|
||||
logical_z_offset_px,
|
||||
&slice_start_offset_B,
|
||||
&slice_end_offset_B);
|
||||
|
||||
start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
|
||||
end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
|
||||
}
|
||||
|
||||
struct intel_aux_map_context *ctx = cmd_buffer->device->aux_map_ctx;
|
||||
/* It depends on what the purpose you use that figure from AUX module,
|
||||
* alignment, page size of main surface, or actually granularity...
|
||||
*/
|
||||
uint64_t main_page_size = intel_aux_map_get_alignment(ctx);
|
||||
start_offset_B = ROUND_DOWN_TO(start_offset_B, main_page_size);
|
||||
end_offset_B = align64(end_offset_B, main_page_size);
|
||||
|
||||
for (uint64_t offset = start_offset_B;
|
||||
offset < end_offset_B; offset += main_page_size) {
|
||||
uint64_t address = base_address + offset;
|
||||
|
||||
uint64_t aux_entry_addr64, *aux_entry_map;
|
||||
struct intel_aux_map_context *ctx = cmd_buffer->device->aux_map_ctx;
|
||||
aux_entry_map = intel_aux_map_get_entry(ctx, address, &aux_entry_addr64);
|
||||
|
||||
struct anv_address aux_entry_address = {
|
||||
.bo = NULL,
|
||||
.offset = aux_entry_addr64,
|
||||
};
|
||||
|
||||
const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
|
||||
uint64_t new_aux_entry =
|
||||
(old_aux_entry & intel_aux_get_meta_address_mask(ctx)) |
|
||||
format_bits;
|
||||
|
||||
if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
|
||||
new_aux_entry |= INTEL_AUX_MAP_ENTRY_VALID_BIT;
|
||||
|
||||
mi_store(&b, mi_mem64(aux_entry_address), mi_imm(new_aux_entry));
|
||||
}
|
||||
}
|
||||
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
|
||||
"after update AUX-TT");
|
||||
}
|
||||
#endif /* GFX_VER == 12 */
|
||||
|
||||
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
|
||||
* the initial layout is undefined, the HiZ buffer and depth buffer will
|
||||
* represent the same data at the end of this operation.
|
||||
|
|
@ -463,16 +356,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
|
||||
return;
|
||||
|
||||
#if GFX_VER == 12
|
||||
if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
|
||||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
|
||||
cmd_buffer->device->physical->has_implicit_ccs &&
|
||||
cmd_buffer->device->info->has_aux_map) {
|
||||
anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
0, 1, base_layer, layer_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If will_full_fast_clear is set, the caller promises to fast-clear the
|
||||
* largest portion of the specified range as it can. For depth images,
|
||||
* that means the entire image because we don't support multi-LOD HiZ.
|
||||
|
|
@ -541,9 +424,6 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
|
||||
cmd_buffer->device->physical->has_implicit_ccs &&
|
||||
cmd_buffer->device->info->has_aux_map) {
|
||||
anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
base_level, level_count, base_layer, layer_count);
|
||||
|
||||
/* If will_full_fast_clear is set, the caller promises to fast-clear the
|
||||
* largest portion of the specified range as it can.
|
||||
*/
|
||||
|
|
@ -1160,18 +1040,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VER == 12
|
||||
if (initial_layout_undefined) {
|
||||
if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
|
||||
anv_image_init_aux_tt(cmd_buffer, image, aspect,
|
||||
base_level, level_count,
|
||||
base_layer, layer_count);
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map));
|
||||
#endif
|
||||
|
||||
if (must_init_fast_clear_state) {
|
||||
if (base_level == 0 && base_layer == 0) {
|
||||
set_image_fast_clear_state(cmd_buffer, image, aspect,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue