/* Copyright © 2024 Intel Corporation * SPDX-License-Identifier: MIT */ #include #include #include "anv_private.h" #include "util/macros.h" #include "util/texcompress_astc.h" #include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "vk_util.h" #define TMP_BUFFER_SIZE 4096 static inline VkOffset3D vk_offset3d_to_el(enum isl_format format, VkOffset3D offset) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(format); return (VkOffset3D) { .x = offset.x / fmt_layout->bw, .y = offset.y / fmt_layout->bh, .z = offset.z / fmt_layout->bd, }; } static inline VkOffset3D vk_el_to_offset3d(enum isl_format format, VkOffset3D offset) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(format); return (VkOffset3D) { .x = offset.x * fmt_layout->bw, .y = offset.y * fmt_layout->bh, .z = offset.z * fmt_layout->bd, }; } static inline VkExtent3D vk_extent3d_to_el(enum isl_format format, VkExtent3D extent) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(format); return (VkExtent3D) { .width = DIV_ROUND_UP(extent.width, fmt_layout->bw), .height = DIV_ROUND_UP(extent.height, fmt_layout->bh), .depth = DIV_ROUND_UP(extent.depth, fmt_layout->bd), }; } static inline VkExtent3D vk_el_to_extent3d(enum isl_format format, VkExtent3D extent) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(format); return (VkExtent3D) { .width = extent.width * fmt_layout->bw, .height = extent.height * fmt_layout->bh, .depth = extent.depth * fmt_layout->bd, }; } static void get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z, uint32_t *out_x0_el, uint32_t *out_y0_el) { ASSERTED uint32_t z0_el, a0_el; if (surf->dim == ISL_SURF_DIM_3D) { isl_surf_get_image_offset_el(surf, level, 0, z, out_x0_el, out_y0_el, &z0_el, &a0_el); } else { isl_surf_get_image_offset_el(surf, level, z, 0, out_x0_el, out_y0_el, &z0_el, &a0_el); } assert(z0_el == 0 && a0_el == 0); } /* Compute extent parameters for use with tiled_memcpy functions. * xs are in units of bytes and ys are in units of strides. */ static inline void tile_extents(const struct isl_surf *surf, const VkOffset3D *offset_el, const VkExtent3D *extent_el, unsigned level, int z, uint32_t *x1_B, uint32_t *x2_B, uint32_t *y1_el, uint32_t *y2_el) { const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); const unsigned cpp = fmtl->bpb / 8; /* z contains offset->z */ assert (z >= offset_el->z); unsigned x0_el, y0_el; get_image_offset_el(surf, level, z, &x0_el, &y0_el); *x1_B = (offset_el->x + x0_el) * cpp; *y1_el = offset_el->y + y0_el; *x2_B = (offset_el->x + extent_el->width + x0_el) * cpp; *y2_el = offset_el->y + extent_el->height + y0_el; } static void anv_copy_image_memory(struct anv_device *device, const struct isl_surf *surf, const struct anv_image_binding *binding, uint64_t binding_offset, void *mem_ptr, uint64_t mem_row_pitch_B, const VkOffset3D *offset_el, const VkExtent3D *extent_el, uint32_t level, uint32_t base_img_array_layer, uint32_t base_img_z_offset_px, uint32_t array_layer, uint32_t z_offset_px, bool mem_to_img) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(surf->format); const uint32_t bs = fmt_layout->bpb / 8; void *img_ptr = binding->host_map + binding->map_delta + binding_offset; uint64_t start_tile_B, end_tile_B; isl_surf_get_image_range_B_tile(surf, level, base_img_array_layer + array_layer, base_img_z_offset_px + z_offset_px, &start_tile_B, &end_tile_B); #ifdef SUPPORT_INTEL_INTEGRATED_GPUS const bool need_invalidate_flush = (binding->address.bo->flags & ANV_BO_ALLOC_HOST_COHERENT) == 0 && device->physical->memory.need_flush; if (need_invalidate_flush && !mem_to_img) util_flush_inval_range(img_ptr + start_tile_B, end_tile_B - start_tile_B); #endif uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer, base_img_z_offset_px + z_offset_px); if (surf->tiling == ISL_TILING_LINEAR) { uint64_t img_col_offset = offset_el->x * bs; uint64_t row_copy_size = extent_el->width * bs; for (uint32_t h_el = 0; h_el < extent_el->height; h_el++) { uint64_t mem_row_offset = h_el * mem_row_pitch_B; uint64_t img_row = h_el + offset_el->y; uint64_t img_offset = start_tile_B + img_row * surf->row_pitch_B + img_col_offset; assert((img_offset + row_copy_size) <= binding->memory_range.size); if (mem_to_img) memcpy(img_ptr + img_offset, mem_ptr + mem_row_offset, row_copy_size); else memcpy(mem_ptr + mem_row_offset, img_ptr + img_offset, row_copy_size); } } else { uint32_t x1, x2, y1, y2; tile_extents(surf, offset_el, extent_el, level, img_depth_or_layer, &x1, &x2, &y1, &y2); if (mem_to_img) { isl_memcpy_linear_to_tiled(x1, x2, y1, y2, img_ptr, mem_ptr, surf->row_pitch_B, mem_row_pitch_B, false, surf->tiling, ISL_MEMCPY); } else { isl_memcpy_tiled_to_linear(x1, x2, y1, y2, mem_ptr, img_ptr, mem_row_pitch_B, surf->row_pitch_B, false, surf->tiling, #if defined(USE_SSE41) util_get_cpu_caps()->has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD : #endif ISL_MEMCPY); } } #ifdef SUPPORT_INTEL_INTEGRATED_GPUS if (need_invalidate_flush && mem_to_img) util_flush_range(img_ptr + start_tile_B, end_tile_B - start_tile_B); #endif } static uint64_t calc_mem_row_pitch_B(enum isl_format format, uint64_t api_row_length_px, const VkExtent3D *extent_px) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(format); const uint32_t bs = fmt_layout->bpb / 8; return api_row_length_px != 0 ? (bs * DIV_ROUND_UP(api_row_length_px, fmt_layout->bw)) : (bs * DIV_ROUND_UP(extent_px->width, fmt_layout->bw)); } static uint64_t calc_mem_height_pitch_B(enum isl_format format, uint64_t row_pitch_B, uint64_t api_height_px, const VkExtent3D *extent_px) { const struct isl_format_layout *fmt_layout = isl_format_get_layout(format); return api_height_px != 0 ? (row_pitch_B * DIV_ROUND_UP(api_height_px, fmt_layout->bh)) : (row_pitch_B * DIV_ROUND_UP(extent_px->height, fmt_layout->bh)); } /* TODO: Get rid of this. * * For three component RGB images created with optimal layout, we actually * create an RGBX or RGBA(with swizzle ALPHA_ONE), as the HW cannot handle * tiling of non-power of 2 formats. This is a problem for host image copy, as * the isl_memcpy functions are not prepared to deal with the RGB <-> RGBX * conversion necessary. */ static bool needs_temp_copy(struct anv_image *image, VkHostImageCopyFlags flags) { if (image->vk.tiling != VK_IMAGE_TILING_OPTIMAL || flags & VK_HOST_IMAGE_COPY_MEMCPY_BIT) return false; /* Skip depth/stencil formats */ if (vk_format_is_depth_or_stencil(image->vk.format)) return false; /* Need temp copy for RGB formats (3 components) */ bool is_rgb = util_format_get_nr_components(vk_format_to_pipe_format(image->vk.format)) == 3; /* Need temp copy for emulated formats (ASTC) */ bool is_emulated = image->emu_plane_format != VK_FORMAT_UNDEFINED; return is_rgb || is_emulated; } /* Callback typedef for converting data through intermediate buffer */ typedef void (*intermediate_conversion_fn)( const uint8_t *src, uint64_t src_stride_B, uint8_t *dst, uint64_t dst_stride_B, const VkExtent3D *extent, const void *user_data); /* Data structure for RGB conversion parameters */ struct rgb_conversion_params { int src_bpp; int dst_bpp; }; /* RGB<->RGBA conversion callback */ static void rgb_rgba_conversion_callback(const uint8_t *src, uint64_t src_stride_B, uint8_t *dst, uint64_t dst_stride_B, const VkExtent3D *extent, const void *user_data) { const struct rgb_conversion_params *params = user_data; int bpp = MIN2(params->src_bpp, params->dst_bpp); for (int y = 0; y < extent->height; y++) { const uint8_t *row_src = src + y * src_stride_B; uint8_t *row_dst = dst + y * dst_stride_B; for (int x = 0; x < extent->width; x++) { memcpy(row_dst, row_src, bpp); row_src += params->src_bpp; row_dst += params->dst_bpp; } } } /* ASTC decompression callback */ static void astc_decompression_callback(const uint8_t *src, uint64_t src_stride_B, uint8_t *dst, uint64_t dst_stride_B, const VkExtent3D *extent, const void *user_data) { const struct util_format_description *desc = user_data; _mesa_unpack_astc_2d_ldr(dst, dst_stride_B, src, src_stride_B, extent->width, extent->height, desc->format); } static void copy_intermediate(struct anv_device *device, const void *mem_ptr, uint32_t mem_row_pitch_B, enum isl_format mem_format, struct anv_image *image, const struct anv_surface *anv_surf, const void *region_ptr, bool mem_to_img, void *tmp_mem, uint32_t a, uint32_t z, intermediate_conversion_fn callback, const void *callback_data) { /* Extract region fields based on direction. * Both VkMemoryToImageCopy and VkImageToMemoryCopy have compatible layouts * for the fields we need (imageSubresource, imageOffset, imageExtent). */ const VkMemoryToImageCopy *mem_to_img_region = region_ptr; const VkImageToMemoryCopy *img_to_mem_region = region_ptr; const VkImageSubresourceLayers *imageSubresource = mem_to_img ? &mem_to_img_region->imageSubresource : &img_to_mem_region->imageSubresource; VkOffset3D imageOffset = mem_to_img ? mem_to_img_region->imageOffset : img_to_mem_region->imageOffset; VkExtent3D imageExtent = mem_to_img ? mem_to_img_region->imageExtent : img_to_mem_region->imageExtent; const struct isl_surf *surf = &anv_surf->isl; const struct anv_image_binding *binding = &image->bindings[anv_surf->memory_range.binding]; const struct isl_format_layout *mem_fmt_layout = isl_format_get_layout(mem_format); uint32_t mem_bpp = mem_fmt_layout->bpb / 8; /* There is no requirement that the extent be aligned to the texel block size. */ VkOffset3D offset_el = vk_offset3d_to_el(mem_format, imageOffset); VkExtent3D extent_el = vk_extent3d_to_el(mem_format, imageExtent); struct isl_tile_info tile; isl_surf_get_tile_info(surf, &tile); uint32_t tile_width_B = tile.phys_extent_B.w; uint32_t tile_width_el = tile.logical_extent_el.w; uint32_t tile_height_el = tile.logical_extent_el.h; if (tile_width_el == 1 && tile_height_el == 1) { tile_width_el = MIN2(4096 / (mem_fmt_layout->bpb / 8), extent_el.width); tile_height_el = 4096 / (tile_width_el * (mem_fmt_layout->bpb / 8)); tile_width_B = tile_width_el / mem_fmt_layout->bw; } for (uint32_t y_el = 0; y_el < extent_el.height; y_el += tile_height_el) { for (uint32_t x_el = 0; x_el < extent_el.width; x_el += tile_width_el) { VkOffset3D offset = { .x = offset_el.x + x_el, .y = offset_el.y + y_el, }; VkExtent3D extent = { .width = MIN2(extent_el.width - x_el, tile_width_el), .height = MIN2(extent_el.height - y_el, tile_height_el), .depth = 1, }; VkOffset3D src_offset = { .x = x_el, .y = y_el, }; const uint8_t *mem_ptr_offset = mem_ptr + (src_offset.x * mem_bpp) + (src_offset.y * mem_row_pitch_B); if (mem_to_img) { callback(mem_ptr_offset, mem_row_pitch_B, tmp_mem, tile_width_B, &extent, callback_data); anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, tmp_mem, tile_width_B, &offset, &extent, imageSubresource->mipLevel, imageSubresource->baseArrayLayer, imageOffset.z, a, z, mem_to_img); } else { anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, tmp_mem, tile_width_B, &offset, &extent, imageSubresource->mipLevel, imageSubresource->baseArrayLayer, imageOffset.z, a, z, mem_to_img); callback(tmp_mem, tile_width_B, (uint8_t *)mem_ptr_offset, mem_row_pitch_B, &extent, callback_data); } } } } VkResult anv_CopyMemoryToImage( VkDevice _device, const VkCopyMemoryToImageInfo* pCopyMemoryToImageInfo) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image, image, pCopyMemoryToImageInfo->dstImage); const bool use_memcpy = (pCopyMemoryToImageInfo->flags & VK_HOST_IMAGE_COPY_MEMCPY) != 0; const bool temp_copy = needs_temp_copy(image, pCopyMemoryToImageInfo->flags); const bool is_emulated = image->emu_plane_format != VK_FORMAT_UNDEFINED; void *tmp_mem = NULL; if (temp_copy || is_emulated) { tmp_mem = vk_alloc(&device->vk.alloc, TMP_BUFFER_SIZE, 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (tmp_mem == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } for (uint32_t r = 0; r < pCopyMemoryToImageInfo->regionCount; r++) { const VkMemoryToImageCopy *region = &pCopyMemoryToImageInfo->pRegions[r]; const uint32_t plane = anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask); const struct anv_surface *anv_surf = &image->planes[plane].primary_surface; const struct isl_surf *surf = &anv_surf->isl; const struct anv_image_binding *binding = &image->bindings[anv_surf->memory_range.binding]; assert(binding->host_map != NULL); void *img_ptr = binding->host_map + binding->map_delta + anv_surf->memory_range.offset; const struct anv_format *anv_format = anv_get_format(device->physical, image->vk.format); struct anv_format_plane anv_plane_format = anv_format->planes[plane]; const struct util_format_description *desc = NULL; if (is_emulated) { desc = vk_format_description(image->vk.format); if (unlikely(desc->layout != UTIL_FORMAT_LAYOUT_ASTC)) UNREACHABLE("Unsupported emulated format"); } /* We can use the image format to figure out all the pitches if using * memcpy, otherwise memory & image might have different formats, so use * the API format of the image. */ enum isl_format mem_format = use_memcpy ? surf->format : anv_plane_format.isl_format; /* Memory distance between each row */ uint64_t mem_row_pitch_B = calc_mem_row_pitch_B(mem_format, region->memoryRowLength, ®ion->imageExtent); /* Memory distance between each slice (1 3D level or 1 array layer) */ uint64_t mem_height_pitch_B = calc_mem_height_pitch_B(mem_format, mem_row_pitch_B, region->memoryImageHeight, ®ion->imageExtent); VkOffset3D offset_el = vk_offset3d_to_el(surf->format, region->imageOffset); VkExtent3D extent_el = vk_extent3d_to_el(surf->format, region->imageExtent); uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource); for (uint32_t a = 0; a < layer_count; a++) { for (uint32_t z = 0; z < region->imageExtent.depth; z++) { assert((region->imageOffset.z == 0 && z == 0) || (region->imageSubresource.baseArrayLayer == 0 && a == 0)); uint64_t mem_row_offset = (z + a) * mem_height_pitch_B; const void *mem_ptr = region->pHostPointer + mem_row_offset; uint64_t start_tile_B, end_tile_B; if (use_memcpy && isl_surf_image_has_unique_tiles( surf, region->imageSubresource.mipLevel, region->imageOffset.z + z + region->imageSubresource.baseArrayLayer + a, 1, &start_tile_B, &end_tile_B)) { memcpy(img_ptr + start_tile_B, mem_ptr, end_tile_B - start_tile_B); } else { if (is_emulated) { anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, (void *)mem_ptr, mem_row_pitch_B, &offset_el, &extent_el, region->imageSubresource.mipLevel, region->imageSubresource.baseArrayLayer, region->imageOffset.z, a, z, true /* mem_to_img */); copy_intermediate( device, mem_ptr, mem_row_pitch_B, mem_format, image, &image->planes[image->n_planes].primary_surface, region, true, /* mem_to_img */ tmp_mem, a, z, astc_decompression_callback, desc); } else if (temp_copy) { mem_format = anv_get_format_plane(device->physical, image->vk.format, plane, VK_IMAGE_TILING_LINEAR).isl_format; const struct isl_format_layout *mem_fmt_layout = isl_format_get_layout(mem_format); const struct isl_format_layout *surf_fmt_layout = isl_format_get_layout(surf->format); struct rgb_conversion_params params = { .src_bpp = mem_fmt_layout->bpb / 8, .dst_bpp = surf_fmt_layout->bpb / 8, }; copy_intermediate( device, mem_ptr, mem_row_pitch_B, mem_format, image, anv_surf, region, true, /* mem_to_img */ tmp_mem, a, z, rgb_rgba_conversion_callback, ¶ms); } else { anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, (void *)mem_ptr, mem_row_pitch_B, &offset_el, &extent_el, region->imageSubresource.mipLevel, region->imageSubresource.baseArrayLayer, region->imageOffset.z, a, z, true /* mem_to_img */); } } } } } vk_free(&device->vk.alloc, tmp_mem); return VK_SUCCESS; } VkResult anv_CopyImageToMemory( VkDevice _device, const VkCopyImageToMemoryInfo* pCopyImageToMemoryInfo) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image, image, pCopyImageToMemoryInfo->srcImage); const bool use_memcpy = (pCopyImageToMemoryInfo->flags & VK_HOST_IMAGE_COPY_MEMCPY) != 0; const bool temp_copy = needs_temp_copy(image, pCopyImageToMemoryInfo->flags); void *tmp_mem = NULL; /* Allocate single tile size memory for temporary copy. */ if (temp_copy) { tmp_mem = vk_alloc(&device->vk.alloc, TMP_BUFFER_SIZE, 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!tmp_mem) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } for (uint32_t r = 0; r < pCopyImageToMemoryInfo->regionCount; r++) { const VkImageToMemoryCopy *region = &pCopyImageToMemoryInfo->pRegions[r]; const uint32_t plane = anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask); const struct anv_surface *anv_surf = &image->planes[plane].primary_surface; const struct isl_surf *surf = &anv_surf->isl; const struct anv_image_binding *binding = &image->bindings[anv_surf->memory_range.binding]; assert(binding->host_map != NULL); const void *img_ptr = binding->host_map + binding->map_delta + anv_surf->memory_range.offset; const struct anv_format *anv_format = anv_get_format(device->physical, image->vk.format); struct anv_format_plane anv_plane_format = anv_format->planes[plane]; /* We can use the image format to figure out all the pitches if using * memcpy, otherwise memory & image might have different formats, so use * the API format of the image. */ enum isl_format mem_format = use_memcpy ? surf->format : anv_plane_format.isl_format; if (temp_copy) { mem_format = anv_get_format_plane(device->physical, image->vk.format, plane, VK_IMAGE_TILING_LINEAR).isl_format; } VkOffset3D offset_el = vk_offset3d_to_el(surf->format, region->imageOffset); VkExtent3D extent_el = vk_extent3d_to_el(surf->format, region->imageExtent); /* Memory distance between each row */ uint64_t mem_row_pitch_B = calc_mem_row_pitch_B(mem_format, region->memoryRowLength, ®ion->imageExtent); /* Memory distance between each slice (1 3D level or 1 array layer) */ uint64_t mem_height_pitch_B = calc_mem_height_pitch_B(mem_format, mem_row_pitch_B, region->memoryImageHeight, ®ion->imageExtent); uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource); for (uint32_t a = 0; a < layer_count; a++) { for (uint32_t z = 0; z < region->imageExtent.depth; z++) { assert((region->imageOffset.z == 0 && z == 0) || (region->imageSubresource.baseArrayLayer == 0 && a == 0)); uint64_t mem_row_offset = (z + a) * mem_height_pitch_B; void *mem_ptr = region->pHostPointer + mem_row_offset; uint64_t start_tile_B, end_tile_B; if ((pCopyImageToMemoryInfo->flags & VK_HOST_IMAGE_COPY_MEMCPY) && isl_surf_image_has_unique_tiles(surf, region->imageSubresource.mipLevel, region->imageOffset.z + z + region->imageSubresource.baseArrayLayer + a, 1, &start_tile_B, &end_tile_B)) { memcpy(mem_ptr, img_ptr + start_tile_B, end_tile_B - start_tile_B); } else { if (temp_copy) { /* RGBA->RGB conversion with callback */ const struct isl_format_layout *surf_fmt_layout = isl_format_get_layout(surf->format); const struct isl_format_layout *mem_fmt_layout = isl_format_get_layout(mem_format); struct rgb_conversion_params params = { .src_bpp = surf_fmt_layout->bpb / 8, .dst_bpp = mem_fmt_layout->bpb / 8, }; copy_intermediate( device, mem_ptr, mem_row_pitch_B, mem_format, image, &image->planes[plane].primary_surface, region, false, /* mem_to_img */ tmp_mem, a, z, rgb_rgba_conversion_callback, ¶ms); } else { anv_copy_image_memory(device, surf, binding, anv_surf->memory_range.offset, mem_ptr, mem_row_pitch_B, &offset_el, &extent_el, region->imageSubresource.mipLevel, region->imageSubresource.baseArrayLayer, region->imageOffset.z, a, z, false /* mem_to_img */); } } } } } vk_free(&device->vk.alloc, tmp_mem); return VK_SUCCESS; } /* This functions copies from one image to another through an intermediate * linear buffer. */ static void copy_image_to_image(struct anv_device *device, struct anv_image *src_image, struct anv_image *dst_image, int src_plane, int dst_plane, const VkImageCopy2 *region, void *tmp_map, void *emu_tmp_map) { const struct anv_surface *src_anv_surf = &src_image->planes[src_plane].primary_surface; const struct anv_surface *dst_anv_surf = &dst_image->planes[dst_plane].primary_surface; const struct isl_surf *src_surf = &src_anv_surf->isl; const struct isl_surf *dst_surf = &dst_anv_surf->isl; const struct anv_image_binding *src_binding = &src_image->bindings[src_anv_surf->memory_range.binding]; const struct anv_image_binding *dst_binding = &dst_image->bindings[dst_anv_surf->memory_range.binding]; struct isl_tile_info src_tile; struct isl_tile_info dst_tile; isl_surf_get_tile_info(src_surf, &src_tile); isl_surf_get_tile_info(dst_surf, &dst_tile); uint32_t tile_width_el, tile_height_el; if (src_tile.phys_extent_B.w > dst_tile.phys_extent_B.w) { tile_width_el = src_tile.logical_extent_el.w; tile_height_el = src_tile.logical_extent_el.h; } else { tile_width_el = dst_tile.logical_extent_el.w; tile_height_el = dst_tile.logical_extent_el.h; } /* Only decompress if we're writing to the emulated (decompressed) plane */ const bool is_emulated = (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) && (&dst_image->planes[dst_plane] == &dst_image->planes[dst_image->n_planes]); /* There is no requirement that the extent be aligned to the texel block * size. */ VkOffset3D src_offset_el = vk_offset3d_to_el(src_surf->format, region->srcOffset); VkOffset3D dst_offset_el = vk_offset3d_to_el(src_surf->format, region->dstOffset); VkExtent3D extent_el = vk_extent3d_to_el(src_surf->format, region->extent); uint32_t linear_stride_B; /* linear-to-linear case */ if (tile_width_el == 1 && tile_height_el == 1) { tile_width_el = MIN2(4096 / (src_tile.format_bpb / 8), extent_el.width); tile_height_el = 4096 / (tile_width_el * (src_tile.format_bpb / 8)); linear_stride_B = tile_width_el * src_tile.format_bpb / 8; } else { linear_stride_B = src_tile.logical_extent_el.w * src_tile.format_bpb / 8; } uint32_t layer_count = vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource); for (uint32_t a = 0; a < layer_count; a++) { for (uint32_t z = 0; z < region->extent.depth; z++) { for (uint32_t y_el = 0; y_el < extent_el.height; y_el += tile_height_el) { for (uint32_t x_el = 0; x_el < extent_el.width; x_el += tile_width_el) { VkOffset3D src_offset = { .x = src_offset_el.x + x_el, .y = src_offset_el.y + y_el, }; VkOffset3D dst_offset = { .x = dst_offset_el.x + x_el, .y = dst_offset_el.y + y_el, }; VkExtent3D extent = { .width = MIN2(extent_el.width - x_el, tile_width_el), .height = MIN2(extent_el.height - y_el, tile_height_el), .depth = 1, }; anv_copy_image_memory(device, src_surf, src_binding, src_anv_surf->memory_range.offset, tmp_map, linear_stride_B, &src_offset, &extent, region->srcSubresource.mipLevel, region->srcSubresource.baseArrayLayer, region->srcOffset.z, a, z, false /* mem_to_img */); if (is_emulated) { const struct VkMemoryToImageCopy mem_copy = { .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY, .pNext = NULL, .pHostPointer = tmp_map, .memoryRowLength = linear_stride_B, .memoryImageHeight = 0, .imageSubresource = region->dstSubresource, .imageOffset = vk_el_to_offset3d(src_surf->format, dst_offset), .imageExtent = vk_el_to_extent3d(src_surf->format, extent), }; VkFormat format = dst_image->vk.format; copy_intermediate( device, tmp_map, linear_stride_B, dst_image->planes[dst_plane].primary_surface.isl.format, dst_image, &dst_image->planes[dst_plane].primary_surface, &mem_copy, true, /* mem_to_img */ emu_tmp_map, a, z, astc_decompression_callback, &format); } else { anv_copy_image_memory(device, dst_surf, dst_binding, dst_anv_surf->memory_range.offset, tmp_map, linear_stride_B, &dst_offset, &extent, region->dstSubresource.mipLevel, region->dstSubresource.baseArrayLayer, region->dstOffset.z, a, z, true /* mem_to_img */); } } } } } } VkResult anv_CopyImageToImage( VkDevice _device, const VkCopyImageToImageInfo* pCopyImageToImageInfo) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToImageInfo->srcImage); ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageToImageInfo->dstImage); /* Work with a tile's worth of data */ void *tmp_map = vk_alloc(&device->vk.alloc, 2 * TMP_BUFFER_SIZE, 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (tmp_map == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); void *emu_tmp_map = tmp_map + TMP_BUFFER_SIZE; for (uint32_t r = 0; r < pCopyImageToImageInfo->regionCount; r++) { const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[r]; VkImageAspectFlags src_mask = region->srcSubresource.aspectMask, dst_mask = region->dstSubresource.aspectMask; assert(anv_image_aspects_compatible(src_mask, dst_mask)); if (util_bitcount(src_mask) > 1) { anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) { int plane = anv_image_aspect_to_plane(src_image, 1UL << aspect_bit); copy_image_to_image(device, src_image, dst_image, plane, plane, region, tmp_map, emu_tmp_map); } } else { int src_plane = anv_image_aspect_to_plane(src_image, src_mask); int dst_plane = anv_image_aspect_to_plane(dst_image, dst_mask); copy_image_to_image(device, src_image, dst_image, src_plane, dst_plane, region, tmp_map, emu_tmp_map); } } vk_free(&device->vk.alloc, tmp_map); return VK_SUCCESS; } VkResult anv_TransitionImageLayout( VkDevice device, uint32_t transitionCount, const VkHostImageLayoutTransitionInfo* pTransitions) { /* Our layout transitions are mostly about resolving the auxiliary surface * into the main surface. Since we disable the auxiliary surface, there is * nothing here for us to do. */ return VK_SUCCESS; }