diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 6ba343dda81..ca3692ef1b8 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -4376,6 +4376,69 @@ isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf, } } +bool +isl_surf_image_has_unique_tiles(const struct isl_surf *surf, + uint32_t level, + uint32_t start_layer, + uint32_t num_layers, + uint64_t *start_tile_B, + uint64_t *end_tile_B) +{ + /* Get the memory range of the specified subresource range. */ + bool dim_is_3d = surf->dim == ISL_SURF_DIM_3D; + uint32_t end_layer = start_layer + num_layers - 1; + isl_surf_get_image_range_B_tile(surf, level, + dim_is_3d ? 0 : start_layer, + dim_is_3d ? start_layer : 0, + start_tile_B, end_tile_B); + if (num_layers > 1) { + /* end_tile_B may be incorrect, recompute it with end_layer. */ + UNUSED uint64_t unused_start_tile_B; + isl_surf_get_image_range_B_tile(surf, level, + dim_is_3d ? 0 : end_layer, + dim_is_3d ? end_layer : 0, + &unused_start_tile_B, end_tile_B); + } + + /* Check if the memory range of other subresource ranges overlap. */ + for (int lod = 0; lod < surf->levels; lod++) { + int surf_layers = dim_is_3d ? u_minify(surf->logical_level0_px.d, lod) : + surf->logical_level0_px.a; + for (int layer = 0; layer < surf_layers; layer++) { + + /* Skip the subresource range of interest. */ + if (level == lod && layer >= start_layer && layer <= end_layer) + continue; + + uint64_t start_tile_B_i, end_tile_B_i; + isl_surf_get_image_range_B_tile(surf, level, + dim_is_3d ? 0 : layer, + dim_is_3d ? layer : 0, + &start_tile_B_i, &end_tile_B_i); + + /* Check if the specified range is in this subresource. */ + if (*start_tile_B >= start_tile_B_i && + *start_tile_B <= end_tile_B_i) + return false; + + if (*end_tile_B >= start_tile_B_i && + *end_tile_B <= end_tile_B_i) + return false; + + /* Check if this subresource is in the specified range. */ + if (start_tile_B_i >= *start_tile_B && + start_tile_B_i <= *end_tile_B) + return false; + + if (end_tile_B_i >= *start_tile_B && + end_tile_B_i <= *end_tile_B) + return false; + } + } + + return true; +} + void isl_surf_get_image_range_B_tile(const struct isl_surf *surf, uint32_t level, diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index c83b28cd412..c6dae605d92 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -3041,6 +3041,17 @@ isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf, uint32_t *x_offset_el, uint32_t *y_offset_el); +/* Returns whether or not a subresource range maps to a tile-aligned memory + * range which doesn't overlap other subresources. + */ +bool +isl_surf_image_has_unique_tiles(const struct isl_surf *surf, + uint32_t level, + uint32_t start_layer, + uint32_t num_layers, + uint64_t *start_tile_B, + uint64_t *end_tile_B); + /** * Calculate the range in bytes occupied by a subimage, to the nearest tile. * diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 36ee77a138c..edc924e4aaf 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2917,21 +2917,19 @@ anv_get_image_subresource_layout(struct anv_device *device, } const uint32_t level = subresource->imageSubresource.mipLevel; + bool subresource_has_unique_tiles = false; if (isl_surf) { /* ISL tries to give us a single layer but the Vulkan API expect the * entire 3D size. */ const uint32_t layer = subresource->imageSubresource.arrayLayer; - const uint32_t z = u_minify(isl_surf->logical_level0_px.d, level) - 1; - uint64_t z0_start_tile_B, z0_end_tile_B; - uint64_t zX_start_tile_B, zX_end_tile_B; - isl_surf_get_image_range_B_tile(isl_surf, level, layer, 0, - &z0_start_tile_B, &z0_end_tile_B); - isl_surf_get_image_range_B_tile(isl_surf, level, layer, z, - &zX_start_tile_B, &zX_end_tile_B); - - layout->subresourceLayout.offset = mem_range->offset + z0_start_tile_B; - layout->subresourceLayout.size = zX_end_tile_B - z0_start_tile_B; + const uint32_t layers = u_minify(isl_surf->logical_level0_px.d, level); + uint64_t start_tile_B, end_tile_B; + subresource_has_unique_tiles = + isl_surf_image_has_unique_tiles(isl_surf, level, layer, layers, + &start_tile_B, &end_tile_B); + layout->subresourceLayout.offset = mem_range->offset + start_tile_B; + layout->subresourceLayout.size = end_tile_B - start_tile_B; layout->subresourceLayout.rowPitch = row_pitch_B; layout->subresourceLayout.depthPitch = isl_surf_get_array_pitch(isl_surf); @@ -2951,7 +2949,7 @@ anv_get_image_subresource_layout(struct anv_device *device, if (host_memcpy_size) { if (!isl_surf) { host_memcpy_size->size = 0; - } else if (anv_image_can_host_memcpy(image)) { + } else if (subresource_has_unique_tiles) { host_memcpy_size->size = layout->subresourceLayout.size; } else { /* If we cannot do straight memcpy of the image, compute a linear diff --git a/src/intel/vulkan/anv_image_host_copy.c b/src/intel/vulkan/anv_image_host_copy.c index 69f63309a2a..597ea1edba0 100644 --- a/src/intel/vulkan/anv_image_host_copy.c +++ b/src/intel/vulkan/anv_image_host_copy.c @@ -34,42 +34,6 @@ vk_extent3d_to_el(enum isl_format format, VkExtent3D extent) }; } -static void -anv_memcpy_image_memory(struct anv_device *device, - const struct isl_surf *surf, - const struct anv_image_binding *binding, - uint64_t binding_offset, - void *mem_ptr, - uint32_t level, - uint32_t base_img_array_layer, - uint32_t base_img_z_offset_px, - uint32_t array_layer, - uint32_t z_offset_px, - bool mem_to_img) -{ - uint64_t start_tile_B, end_tile_B; - isl_surf_get_image_range_B_tile(surf, level, - base_img_array_layer, - base_img_z_offset_px, - &start_tile_B, &end_tile_B); - uint32_t array_pitch_B = isl_surf_get_array_pitch(surf); - - uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer, - base_img_z_offset_px + z_offset_px); - uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer); - - void *img_ptr = binding->host_map + binding->map_delta + binding_offset; - if (mem_to_img) { - memcpy(img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B, - mem_ptr + mem_depth_or_layer * array_pitch_B, - end_tile_B - start_tile_B); - } else { - memcpy(mem_ptr + mem_depth_or_layer * array_pitch_B, - img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B, - end_tile_B - start_tile_B); - } -} - static void get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z, uint32_t *out_x0_el, uint32_t *out_y0_el) @@ -252,6 +216,9 @@ anv_CopyMemoryToImageEXT( &image->bindings[anv_surf->memory_range.binding]; assert(binding->host_map != NULL); + void *img_ptr = binding->host_map + binding->map_delta + + anv_surf->memory_range.offset; + const void *mem_ptr = region->pHostPointer; /* Memory distance between each row */ uint64_t mem_row_pitch_B = @@ -272,16 +239,20 @@ anv_CopyMemoryToImageEXT( vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource); for (uint32_t a = 0; a < layer_count; a++) { for (uint32_t z = 0; z < region->imageExtent.depth; z++) { + assert((region->imageOffset.z == 0 && z == 0) || + (region->imageSubresource.baseArrayLayer == 0 && a == 0)); + uint64_t mem_row_offset = (z + a) * mem_height_pitch_B; + uint64_t start_tile_B, end_tile_B; if ((pCopyMemoryToImageInfo->flags & VK_HOST_IMAGE_COPY_MEMCPY_EXT) && - anv_image_can_host_memcpy(image)) { - anv_memcpy_image_memory(device, surf, binding, - anv_surf->memory_range.offset, - (void *)region->pHostPointer, - region->imageSubresource.mipLevel, - region->imageSubresource.baseArrayLayer, - region->imageOffset.z, - a, z, true /* mem_to_img */); + isl_surf_image_has_unique_tiles(surf, + region->imageSubresource.mipLevel, + region->imageOffset.z + z + + region->imageSubresource.baseArrayLayer + a, 1, + &start_tile_B, &end_tile_B)) { + memcpy(img_ptr + start_tile_B, + mem_ptr + mem_row_offset, + end_tile_B - start_tile_B); } else { anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, @@ -322,6 +293,9 @@ anv_CopyImageToMemoryEXT( &image->bindings[anv_surf->memory_range.binding]; assert(binding->host_map != NULL); + const void *img_ptr = binding->host_map + binding->map_delta + + anv_surf->memory_range.offset; + void *mem_ptr = region->pHostPointer; VkOffset3D offset_el = vk_offset3d_to_el(surf->format, region->imageOffset); @@ -342,16 +316,20 @@ anv_CopyImageToMemoryEXT( vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource); for (uint32_t a = 0; a < layer_count; a++) { for (uint32_t z = 0; z < region->imageExtent.depth; z++) { + assert((region->imageOffset.z == 0 && z == 0) || + (region->imageSubresource.baseArrayLayer == 0 && a == 0)); + uint64_t mem_row_offset = (z + a) * mem_height_pitch_B; + uint64_t start_tile_B, end_tile_B; if ((pCopyImageToMemoryInfo->flags & VK_HOST_IMAGE_COPY_MEMCPY_EXT) && - anv_image_can_host_memcpy(image)) { - anv_memcpy_image_memory(device, surf, binding, - anv_surf->memory_range.offset, - region->pHostPointer, - region->imageSubresource.mipLevel, - region->imageSubresource.baseArrayLayer, - region->imageOffset.z, - a, z, false /* mem_to_img */); + isl_surf_image_has_unique_tiles(surf, + region->imageSubresource.mipLevel, + region->imageOffset.z + z + + region->imageSubresource.baseArrayLayer + a, 1, + &start_tile_B, &end_tile_B)) { + memcpy(mem_ptr + mem_row_offset, + img_ptr + start_tile_B, + end_tile_B - start_tile_B); } else { anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a2bda201b02..72682e51570 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -5689,22 +5689,6 @@ anv_image_format_is_d16_or_s8(const struct anv_image *image) image->vk.format == VK_FORMAT_S8_UINT; } -static inline bool -anv_image_can_host_memcpy(const struct anv_image *image) -{ - const struct isl_surf *surf = &image->planes[0].primary_surface.isl; - struct isl_tile_info tile_info; - isl_surf_get_tile_info(surf, &tile_info); - - const bool array_pitch_aligned_to_tile = - surf->array_pitch_el_rows % tile_info.logical_extent_el.height == 0; - - return image->vk.tiling != VK_IMAGE_TILING_LINEAR && - image->n_planes == 1 && - array_pitch_aligned_to_tile && - image->vk.mip_levels == 1; -} - /* The ordering of this enum is important */ enum anv_fast_clear_type { /** Image does not have/support any fast-clear blocks */