diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 2ef131c90c9..e00716e04ec 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -3040,6 +3040,53 @@ ac_surface_addr_from_coord(struct ac_addrlib *addrlib, const struct radeon_info return output.addr; } +void +ac_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info, + const struct radeon_surf *surf, const struct ac_surf_info *surf_info, + unsigned level, unsigned layer, struct ac_surf_nbc_view *out) +{ + /* Only implemented for GFX10+ */ + assert(info->gfx_level >= GFX10); + + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT input = {0}; + input.size = sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT); + input.swizzleMode = surf->u.gfx9.swizzle_mode; + input.resourceType = surf->u.gfx9.resource_type; + switch (surf->bpe) { + case 8: + input.format = ADDR_FMT_BC1; + break; + case 16: + input.format = ADDR_FMT_BC3; + break; + default: + assert(0); + } + input.width = surf_info->width; + input.height = surf_info->height; + input.numSlices = surf_info->array_size; + input.numMipLevels = surf_info->levels; + input.pipeBankXor = surf->tile_swizzle; + input.slice = layer; + input.mipId = level; + + ADDR_E_RETURNCODE res; + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT output = {0}; + output.size = sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT); + res = Addr2ComputeNonBlockCompressedView(addrlib->handle, &input, &output); + if (res == ADDR_OK) { + out->base_address_offset = output.offset; + out->tile_swizzle = output.pipeBankXor; + out->width = output.unalignedWidth; + out->height = output.unalignedHeight; + out->max_mip = output.numMipLevels; + out->level = output.mipId; + out->valid = true; + } else { + out->valid = false; + } +} + void ac_surface_print_info(FILE *out, const struct radeon_info *info, const struct radeon_surf *surf) { diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 60ebcc5719c..48a8c4c6628 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -415,6 +415,17 @@ struct ac_surf_config { unsigned is_cube : 1; }; +/* Output parameters for ac_surface_compute_nbc_view */ +struct ac_surf_nbc_view { + bool valid; + uint32_t width; + uint32_t height; + uint32_t level; + uint32_t max_mip; /* Used for max_mip in the resource descriptor */ + uint8_t tile_swizzle; + uint64_t base_address_offset; +}; + struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, uint64_t *max_alignment); void ac_addrlib_destroy(struct ac_addrlib *addrlib); void *ac_addrlib_get_handle(struct ac_addrlib *addrlib); @@ -474,6 +485,10 @@ uint64_t ac_surface_addr_from_coord(struct ac_addrlib *addrlib, const struct rad const struct radeon_surf *surf, const struct ac_surf_info *surf_info, unsigned level, unsigned x, unsigned y, unsigned layer, bool is_3d); +void ac_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info, + const struct radeon_surf *surf, + const struct ac_surf_info *surf_info, unsigned level, + unsigned layer, struct ac_surf_nbc_view *out); void ac_surface_print_info(FILE *out, const struct radeon_info *info, const struct radeon_surf *surf); diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 58af590c482..3f1e0c4e69b 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -764,12 +764,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im const struct legacy_surf_level *base_level_info, unsigned plane_id, unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil, bool is_storage_image, bool disable_compression, - bool enable_write_compression, uint32_t *state) + bool enable_write_compression, uint32_t *state, + const struct ac_surf_nbc_view *nbc_view) { struct radv_image_plane *plane = &image->planes[plane_id]; struct radv_image_binding *binding = image->disjoint ? &image->bindings[plane_id] : &image->bindings[0]; uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0; uint64_t va = gpu_address; + uint8_t swizzle = plane->surface.tile_swizzle; enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; uint64_t meta_va = 0; if (gfx_level >= GFX9) { @@ -777,12 +779,16 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im va += plane->surface.u.gfx9.zs.stencil_offset; else va += plane->surface.u.gfx9.surf_offset; + if (nbc_view && nbc_view->valid) { + va += nbc_view->base_address_offset; + swizzle = nbc_view->tile_swizzle; + } } else va += (uint64_t)base_level_info->offset_256B * 256; state[0] = va >> 8; if (gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) - state[0] |= plane->surface.tile_swizzle; + state[0] |= swizzle; state[1] &= C_008F14_BASE_ADDRESS_HI; state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); @@ -953,7 +959,8 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, uint32_t *fmask_state, - VkImageCreateFlags img_create_flags) + VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view) { const struct util_format_description *desc; enum pipe_swizzle swizzle[4]; @@ -1032,6 +1039,8 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima unsigned max_mip = image->info.samples > 1 ? util_logbase2(image->info.samples) : image->info.levels - 1; + if (nbc_view && nbc_view->valid) + max_mip = nbc_view->max_mip; if (device->physical_device->rad_info.gfx_level >= GFX11) { state[1] |= S_00A004_MAX_MIP(max_mip); @@ -1326,12 +1335,13 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag const VkComponentMapping *mapping, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, - uint32_t *fmask_state, VkImageCreateFlags img_create_flags) + uint32_t *fmask_state, VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view) { if (device->physical_device->rad_info.gfx_level >= GFX10) { gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, last_level, first_layer, last_layer, width, height, - depth, min_lod, state, fmask_state, img_create_flags); + depth, min_lod, state, fmask_state, img_create_flags, nbc_view); } else { si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, last_level, first_layer, last_layer, width, height, @@ -1351,11 +1361,11 @@ radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, image->vk.format, &fixedmapping, 0, image->info.levels - 1, 0, image->info.array_size - 1, image->info.width, image->info.height, - image->info.depth, 0.0f, desc, NULL, 0); + image->info.depth, 0.0f, desc, NULL, 0, NULL); si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0, image->planes[0].surface.blk_w, false, false, false, false, - desc); + desc, NULL); ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, image->info.levels, desc, &md->size_metadata, md->metadata); @@ -1926,13 +1936,27 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ return VK_SUCCESS; } +static inline void +compute_non_block_compressed_view(const struct radv_device *device, + const struct radv_image_view *iview, + struct ac_surf_nbc_view *nbc_view) +{ + const struct radv_image *image = iview->image; + const struct radeon_surf *surf = &image->planes[0].surface; + struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); + + ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &image->info, + iview->vk.base_mip_level, iview->vk.base_array_layer, nbc_view); +} + static void radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, VkFormat vk_format, const VkComponentMapping *components, float min_lod, bool is_storage_image, bool disable_compression, bool enable_compression, unsigned plane_id, - unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags) + unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view) { struct radv_image *image = iview->image; struct radv_image_plane *plane = &image->planes[plane_id]; @@ -1952,8 +1976,15 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format); - if (device->physical_device->rad_info.gfx_level >= GFX9) + if (device->physical_device->rad_info.gfx_level >= GFX9) { hw_level = iview->vk.base_mip_level; + if (nbc_view->valid) { + hw_level = nbc_view->level; + iview->extent.width = nbc_view->width; + iview->extent.height = nbc_view->height; + } + } + radv_make_texture_descriptor( device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level, hw_level + iview->vk.level_count - 1, iview->vk.base_array_layer, @@ -1962,7 +1993,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height), iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id], descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor, - img_create_flags); + img_create_flags, nbc_view); const struct legacy_surf_level *base_level_info = NULL; if (device->physical_device->rad_info.gfx_level <= GFX9) { @@ -1978,7 +2009,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->vk.base_mip_level, iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image, disable_compression, enable_write_compression, - descriptor->plane_descriptors[descriptor_plane_id]); + descriptor->plane_descriptors[descriptor_plane_id], nbc_view); } static unsigned @@ -2057,6 +2088,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; uint32_t plane_count = 1; float min_lod = 0.0f; + struct ac_surf_nbc_view nbc_view = {0}; const struct VkImageViewMinLodCreateInfoEXT *min_lod_info = vk_find_struct_const(pCreateInfo->pNext, IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT); @@ -2179,13 +2211,22 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, lvl_width = round_up_u32(lvl_width * view_bw, img_bw); lvl_height = round_up_u32(lvl_height * view_bh, img_bh); - lvl_width <<= range->baseMipLevel; - lvl_height <<= range->baseMipLevel; - - iview->extent.width = CLAMP(lvl_width, iview->extent.width, + iview->extent.width = CLAMP(lvl_width << range->baseMipLevel, iview->extent.width, iview->image->planes[0].surface.u.gfx9.base_mip_width); - iview->extent.height = CLAMP(lvl_height, iview->extent.height, + iview->extent.height = CLAMP(lvl_height << range->baseMipLevel, iview->extent.height, iview->image->planes[0].surface.u.gfx9.base_mip_height); + + /* If the hardware-computed extent is still be too small, on GFX10 + * we can attempt another workaround provided by addrlib that + * changes the descriptor's base level, and adjusts the address and + * extents accordingly. + */ + if (device->physical_device->rad_info.gfx_level >= GFX10 && + (radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width || + radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) && + iview->vk.layer_count == 1) { + compute_non_block_compressed_view(device, iview, &nbc_view); + } } } } @@ -2199,10 +2240,10 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, VkFormat format = vk_format_get_plane_format(iview->vk.view_format, i); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false, disable_compression, enable_compression, iview->plane_id + i, - i, img_create_flags); + i, img_create_flags, &nbc_view); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true, disable_compression, enable_compression, iview->plane_id + i, - i, img_create_flags); + i, img_create_flags, &nbc_view); } } diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 4fd4a6f6267..3ee22cd0020 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -1308,6 +1308,9 @@ create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffe * radv_image_view_init). Some texels are unaddressable and cannot be copied * to/from by a compute shader. Here we will perform a buffer copy to copy the * texels that the hardware missed. + * + * GFX10 will not use this workaround because it can be fixed by adjusting its + * image view descriptors instead. */ static void fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, @@ -1322,8 +1325,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radeon_info *rad_info = &device->physical_device->rad_info; struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); - if (rad_info->gfx_level < GFX9 || image->vk.mip_levels == 1 || - !vk_format_is_block_compressed(image->vk.format)) + /* GFX10 will use a different workaround unless this is not a 2D image */ + if (rad_info->gfx_level < GFX9 || + (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) || + image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format)) return; /* The physical extent of the base mip */