mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
anv: Place images into the aux-map when safe to do so
At image bind time, if an image's addresses can be placed into the aux-map without causing conflicts with a pre-existing mapping, do so. The code aux management code in the binding function operates on a per-plane basis. So, use the per-plane CCS memory range from the image rather than the CCS memory region for the entire BO. Another way to avoid aux-map conflicts is to rely solely on having a dedicated allocation for an image. Unfortunately, not all workloads change their behavior when drivers report a preference for dedicated allocations. In particular, 3DMark Wild Life Extreme does not make more dedicated allocations and such a solution was measured to perform ~16% worse than this solution. With this solution, I did not measure a loss of CCS on that benchmark. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6304 Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (v1) Reviewed-by: Jianxun Zhang <jianxun.zhang@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25003>
This commit is contained in:
parent
207db22117
commit
ee6e2bc4a3
4 changed files with 26 additions and 148 deletions
|
|
@ -1508,15 +1508,6 @@ anv_device_alloc_bo(struct anv_device *device,
|
||||||
return vk_errorf(device, VK_ERROR_UNKNOWN, "vm bind failed");
|
return vk_errorf(device, VK_ERROR_UNKNOWN, "vm bind failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_bo._ccs_size > 0) {
|
|
||||||
assert(device->info->has_aux_map);
|
|
||||||
const bool mapped =
|
|
||||||
intel_aux_map_add_mapping(device->aux_map_ctx, new_bo.offset,
|
|
||||||
intel_canonical_address(new_bo.offset + new_bo.size),
|
|
||||||
new_bo.size, 0 /* format_bits */);
|
|
||||||
assert(mapped);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(new_bo.gem_handle);
|
assert(new_bo.gem_handle);
|
||||||
|
|
||||||
/* If we just got this gem_handle from anv_bo_init_new then we know no one
|
/* If we just got this gem_handle from anv_bo_init_new then we know no one
|
||||||
|
|
@ -1867,12 +1858,11 @@ anv_device_release_bo(struct anv_device *device,
|
||||||
}
|
}
|
||||||
assert(bo->refcount == 0);
|
assert(bo->refcount == 0);
|
||||||
|
|
||||||
if (bo->_ccs_size > 0) {
|
/* Unmap the entire BO. In the case that some addresses lacked an aux-map
|
||||||
assert(device->physical->has_implicit_ccs);
|
* entry, the unmapping function will add table entries for them.
|
||||||
assert(device->info->has_aux_map);
|
*/
|
||||||
assert(bo->has_implicit_ccs);
|
if (anv_bo_allows_aux_map(device, bo))
|
||||||
intel_aux_map_unmap_range(device->aux_map_ctx, bo->offset, bo->size);
|
intel_aux_map_unmap_range(device->aux_map_ctx, bo->offset, bo->size);
|
||||||
}
|
|
||||||
|
|
||||||
/* Memset the BO just in case. The refcount being zero should be enough to
|
/* Memset the BO just in case. The refcount being zero should be enough to
|
||||||
* prevent someone from assuming the data is valid but it's safer to just
|
* prevent someone from assuming the data is valid but it's safer to just
|
||||||
|
|
|
||||||
|
|
@ -2273,8 +2273,25 @@ VkResult anv_BindImageMemory2(
|
||||||
if (device->info->has_flat_ccs && bo->vram_only)
|
if (device->info->has_flat_ccs && bo->vram_only)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* Add the plane to the aux map when applicable. */
|
||||||
if (anv_bo_allows_aux_map(device, bo)) {
|
if (anv_bo_allows_aux_map(device, bo)) {
|
||||||
continue;
|
const struct anv_address main_addr =
|
||||||
|
anv_image_address(image,
|
||||||
|
&image->planes[p].primary_surface.memory_range);
|
||||||
|
const struct anv_address aux_addr =
|
||||||
|
anv_image_address(image,
|
||||||
|
&image->planes[p].compr_ctrl_memory_range);
|
||||||
|
const struct isl_surf *surf =
|
||||||
|
&image->planes[p].primary_surface.isl;
|
||||||
|
const uint64_t format_bits =
|
||||||
|
intel_aux_map_format_bits_for_isl_surf(surf);
|
||||||
|
const bool mapped =
|
||||||
|
intel_aux_map_add_mapping(device->aux_map_ctx,
|
||||||
|
anv_address_physical(main_addr),
|
||||||
|
anv_address_physical(aux_addr),
|
||||||
|
surf->size_B, format_bits);
|
||||||
|
if (mapped)
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do nothing prior to gfx12. There are no special requirements. */
|
/* Do nothing prior to gfx12. There are no special requirements. */
|
||||||
|
|
|
||||||
|
|
@ -4855,7 +4855,10 @@ static inline const struct anv_image_memory_range *
|
||||||
anv_image_get_aux_memory_range(const struct anv_image *image,
|
anv_image_get_aux_memory_range(const struct anv_image *image,
|
||||||
uint32_t plane)
|
uint32_t plane)
|
||||||
{
|
{
|
||||||
return &image->planes[plane].aux_surface.memory_range;
|
if (image->planes[plane].aux_surface.memory_range.size > 0)
|
||||||
|
return &image->planes[plane].aux_surface.memory_range;
|
||||||
|
else
|
||||||
|
return &image->planes[plane].compr_ctrl_memory_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
|
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
|
||||||
|
|
|
||||||
|
|
@ -339,113 +339,6 @@ add_surface_state_relocs(struct anv_cmd_buffer *cmd_buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
|
|
||||||
|
|
||||||
#if GFX_VER == 12
|
|
||||||
static void
|
|
||||||
anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
|
|
||||||
const struct anv_image *image,
|
|
||||||
VkImageAspectFlagBits aspect,
|
|
||||||
uint32_t base_level, uint32_t level_count,
|
|
||||||
uint32_t base_layer, uint32_t layer_count)
|
|
||||||
{
|
|
||||||
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
|
|
||||||
|
|
||||||
const struct anv_surface *surface = &image->planes[plane].primary_surface;
|
|
||||||
uint64_t base_address =
|
|
||||||
anv_address_physical(anv_image_address(image, &surface->memory_range));
|
|
||||||
|
|
||||||
const struct isl_surf *isl_surf = &image->planes[plane].primary_surface.isl;
|
|
||||||
uint64_t format_bits = intel_aux_map_format_bits_for_isl_surf(isl_surf);
|
|
||||||
|
|
||||||
/* We're about to live-update the AUX-TT. We really don't want anyone else
|
|
||||||
* trying to read it while we're doing this. We could probably get away
|
|
||||||
* with not having this stall in some cases if we were really careful but
|
|
||||||
* it's better to play it safe. Full stall the GPU.
|
|
||||||
*/
|
|
||||||
anv_add_pending_pipe_bits(cmd_buffer,
|
|
||||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
|
||||||
"before update AUX-TT");
|
|
||||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
|
||||||
|
|
||||||
struct mi_builder b;
|
|
||||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
|
||||||
|
|
||||||
for (uint32_t a = 0; a < layer_count; a++) {
|
|
||||||
const uint32_t layer = base_layer + a;
|
|
||||||
|
|
||||||
uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
|
|
||||||
for (uint32_t l = 0; l < level_count; l++) {
|
|
||||||
const uint32_t level = base_level + l;
|
|
||||||
|
|
||||||
uint32_t logical_array_layer, logical_z_offset_px;
|
|
||||||
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
|
|
||||||
logical_array_layer = 0;
|
|
||||||
|
|
||||||
/* If the given miplevel does not have this layer, then any higher
|
|
||||||
* miplevels won't either because miplevels only get smaller the
|
|
||||||
* higher the LOD.
|
|
||||||
*/
|
|
||||||
assert(layer < image->vk.extent.depth);
|
|
||||||
if (layer >= u_minify(image->vk.extent.depth, level))
|
|
||||||
break;
|
|
||||||
logical_z_offset_px = layer;
|
|
||||||
} else {
|
|
||||||
assert(layer < image->vk.array_layers);
|
|
||||||
logical_array_layer = layer;
|
|
||||||
logical_z_offset_px = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t slice_start_offset_B, slice_end_offset_B;
|
|
||||||
isl_surf_get_image_range_B_tile(isl_surf, level,
|
|
||||||
logical_array_layer,
|
|
||||||
logical_z_offset_px,
|
|
||||||
&slice_start_offset_B,
|
|
||||||
&slice_end_offset_B);
|
|
||||||
|
|
||||||
start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
|
|
||||||
end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct intel_aux_map_context *ctx = cmd_buffer->device->aux_map_ctx;
|
|
||||||
/* It depends on what the purpose you use that figure from AUX module,
|
|
||||||
* alignment, page size of main surface, or actually granularity...
|
|
||||||
*/
|
|
||||||
uint64_t main_page_size = intel_aux_map_get_alignment(ctx);
|
|
||||||
start_offset_B = ROUND_DOWN_TO(start_offset_B, main_page_size);
|
|
||||||
end_offset_B = align64(end_offset_B, main_page_size);
|
|
||||||
|
|
||||||
for (uint64_t offset = start_offset_B;
|
|
||||||
offset < end_offset_B; offset += main_page_size) {
|
|
||||||
uint64_t address = base_address + offset;
|
|
||||||
|
|
||||||
uint64_t aux_entry_addr64, *aux_entry_map;
|
|
||||||
struct intel_aux_map_context *ctx = cmd_buffer->device->aux_map_ctx;
|
|
||||||
aux_entry_map = intel_aux_map_get_entry(ctx, address, &aux_entry_addr64);
|
|
||||||
|
|
||||||
struct anv_address aux_entry_address = {
|
|
||||||
.bo = NULL,
|
|
||||||
.offset = aux_entry_addr64,
|
|
||||||
};
|
|
||||||
|
|
||||||
const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
|
|
||||||
uint64_t new_aux_entry =
|
|
||||||
(old_aux_entry & intel_aux_get_meta_address_mask(ctx)) |
|
|
||||||
format_bits;
|
|
||||||
|
|
||||||
if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
|
|
||||||
new_aux_entry |= INTEL_AUX_MAP_ENTRY_VALID_BIT;
|
|
||||||
|
|
||||||
mi_store(&b, mi_mem64(aux_entry_address), mi_imm(new_aux_entry));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
anv_add_pending_pipe_bits(cmd_buffer,
|
|
||||||
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
|
|
||||||
"after update AUX-TT");
|
|
||||||
}
|
|
||||||
#endif /* GFX_VER == 12 */
|
|
||||||
|
|
||||||
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
|
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
|
||||||
* the initial layout is undefined, the HiZ buffer and depth buffer will
|
* the initial layout is undefined, the HiZ buffer and depth buffer will
|
||||||
* represent the same data at the end of this operation.
|
* represent the same data at the end of this operation.
|
||||||
|
|
@ -463,16 +356,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||||
if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
|
if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
#if GFX_VER == 12
|
|
||||||
if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
|
|
||||||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
|
|
||||||
cmd_buffer->device->physical->has_implicit_ccs &&
|
|
||||||
cmd_buffer->device->info->has_aux_map) {
|
|
||||||
anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
|
|
||||||
0, 1, base_layer, layer_count);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* If will_full_fast_clear is set, the caller promises to fast-clear the
|
/* If will_full_fast_clear is set, the caller promises to fast-clear the
|
||||||
* largest portion of the specified range as it can. For depth images,
|
* largest portion of the specified range as it can. For depth images,
|
||||||
* that means the entire image because we don't support multi-LOD HiZ.
|
* that means the entire image because we don't support multi-LOD HiZ.
|
||||||
|
|
@ -541,9 +424,6 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
|
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
|
||||||
cmd_buffer->device->physical->has_implicit_ccs &&
|
cmd_buffer->device->physical->has_implicit_ccs &&
|
||||||
cmd_buffer->device->info->has_aux_map) {
|
cmd_buffer->device->info->has_aux_map) {
|
||||||
anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
|
|
||||||
base_level, level_count, base_layer, layer_count);
|
|
||||||
|
|
||||||
/* If will_full_fast_clear is set, the caller promises to fast-clear the
|
/* If will_full_fast_clear is set, the caller promises to fast-clear the
|
||||||
* largest portion of the specified range as it can.
|
* largest portion of the specified range as it can.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1160,18 +1040,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GFX_VER == 12
|
|
||||||
if (initial_layout_undefined) {
|
|
||||||
if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
|
|
||||||
anv_image_init_aux_tt(cmd_buffer, image, aspect,
|
|
||||||
base_level, level_count,
|
|
||||||
base_layer, layer_count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (must_init_fast_clear_state) {
|
if (must_init_fast_clear_state) {
|
||||||
if (base_level == 0 && base_layer == 0) {
|
if (base_level == 0 && base_layer == 0) {
|
||||||
set_image_fast_clear_state(cmd_buffer, image, aspect,
|
set_image_fast_clear_state(cmd_buffer, image, aspect,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue