From b653669fc5a6cc5d55d57d437e18f45fe98a3bd2 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 17 Oct 2023 12:25:20 -0700 Subject: [PATCH] anv: add gen9 astc workaround gen9 does not handle denorms in void extent blocks correctly. We need to flush them to zero. Signed-off-by: Chia-I Wu Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/vulkan/anv_astc_emu.c | 290 +++++++++++++++++++++++++++++++- src/intel/vulkan/anv_device.c | 4 + src/intel/vulkan/anv_image.c | 19 ++- src/intel/vulkan/anv_private.h | 18 +- 4 files changed, 319 insertions(+), 12 deletions(-) diff --git a/src/intel/vulkan/anv_astc_emu.c b/src/intel/vulkan/anv_astc_emu.c index 90fbd3b80f4..e48f3c8d6d2 100644 --- a/src/intel/vulkan/anv_astc_emu.c +++ b/src/intel/vulkan/anv_astc_emu.c @@ -5,6 +5,8 @@ #include "anv_private.h" +#include "compiler/nir/nir_builder.h" + static void astc_emu_init_image_view(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *iview, @@ -56,6 +58,261 @@ astc_emu_init_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, anv_descriptor_set_write(device, &push_set->set, write_count, writes); } +static void +astc_emu_init_flush_denorm_shader(nir_builder *b) +{ + b->shader->info.workgroup_size[0] = 8; + b->shader->info.workgroup_size[1] = 8; + + const struct glsl_type *src_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT); + nir_variable *src_var = + nir_variable_create(b->shader, nir_var_uniform, src_type, "src"); + src_var->data.descriptor_set = 0; + src_var->data.binding = 0; + + const struct glsl_type *dst_type = + glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_UINT); + nir_variable *dst_var = + nir_variable_create(b->shader, nir_var_uniform, dst_type, "dst"); + dst_var->data.descriptor_set = 0; + dst_var->data.binding = 1; + + nir_def *zero = nir_imm_int(b, 0); + nir_def *consts = nir_load_push_constant(b, 4, 32, zero, .range = 16); + nir_def *offset = nir_channels(b, consts, 0x3); + nir_def *extent = nir_channels(b, consts, 0x3 << 2); + + nir_def *coord = nir_load_global_invocation_id(b, 32); + coord = nir_iadd(b, nir_channels(b, coord, 0x3), offset); + + nir_def *cond = nir_ilt(b, coord, extent); + cond = nir_iand(b, nir_channel(b, cond, 0), nir_channel(b, cond, 1)); + nir_push_if(b, cond); + { + const struct glsl_type *val_type = glsl_vector_type(GLSL_TYPE_UINT, 4); + nir_variable *val_var = + nir_variable_create(b->shader, nir_var_shader_temp, val_type, "val"); + + coord = nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1), + zero); + nir_def *val = + nir_txf_deref(b, nir_build_deref_var(b, src_var), coord, zero); + nir_store_var(b, val_var, val, 0xf); + + /* A void-extent block has this layout + * + * struct astc_void_extent_block { + * uint16_t header; + * uint16_t dontcare0; + * uint16_t dontcare1; + * uint16_t dontcare2; + * uint16_t R; + * uint16_t G; + * uint16_t B; + * uint16_t A; + * }; + * + * where the lower 12 bits are 0xdfc for 2D LDR. + */ + nir_def *block_mode = nir_iand_imm(b, nir_channel(b, val, 0), 0xfff); + nir_push_if(b, nir_ieq_imm(b, block_mode, 0xdfc)); + { + nir_def *color = nir_channels(b, val, 0x3 << 2); + nir_def *comps = nir_unpack_64_4x16(b, nir_pack_64_2x32(b, color)); + + /* flush denorms */ + comps = nir_bcsel(b, nir_ult_imm(b, comps, 4), + nir_imm_intN_t(b, 0, 16), comps); + + color = nir_unpack_64_2x32(b, nir_pack_64_4x16(b, comps)); + val = nir_vec4(b, nir_channel(b, val, 0), nir_channel(b, val, 1), + nir_channel(b, color, 0), nir_channel(b, color, 1)); + nir_store_var(b, val_var, val, 0x3 << 2); + } + nir_pop_if(b, NULL); + + nir_def *dst = &nir_build_deref_var(b, dst_var)->def; + coord = nir_pad_vector(b, coord, 4); + val = nir_load_var(b, val_var); + nir_image_deref_store(b, dst, coord, nir_undef(b, 1, 32), val, zero, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = true); + } + nir_pop_if(b, NULL); +} + +static VkResult +astc_emu_init_flush_denorm_pipeline_locked(struct anv_device *device) +{ + struct anv_device_astc_emu *astc_emu = &device->astc_emu; + VkDevice _device = anv_device_to_handle(device); + VkResult result = VK_SUCCESS; + + if (astc_emu->ds_layout == VK_NULL_HANDLE) { + const VkDescriptorSetLayoutCreateInfo ds_layout_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }, + }; + result = anv_CreateDescriptorSetLayout(_device, &ds_layout_create_info, + NULL, &astc_emu->ds_layout); + if (result != VK_SUCCESS) + goto out; + } + + if (astc_emu->pipeline_layout == VK_NULL_HANDLE) { + const VkPipelineLayoutCreateInfo pipeline_layout_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &astc_emu->ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &(VkPushConstantRange){ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .size = sizeof(uint32_t) * 4, + }, + }; + result = anv_CreatePipelineLayout(_device, &pipeline_layout_create_info, + NULL, &astc_emu->pipeline_layout); + if (result != VK_SUCCESS) + goto out; + } + + if (astc_emu->pipeline == VK_NULL_HANDLE) { + const struct nir_shader_compiler_options *options = + device->physical->compiler->nir_options[MESA_SHADER_COMPUTE]; + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, options, "astc_emu_flush_denorm"); + astc_emu_init_flush_denorm_shader(&b); + + const VkComputePipelineCreateInfo pipeline_create_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = vk_shader_module_handle_from_nir(b.shader), + .pName = "main", + }, + .layout = astc_emu->pipeline_layout, + }; + result = anv_CreateComputePipelines(_device, VK_NULL_HANDLE, 1, + &pipeline_create_info, NULL, + &astc_emu->pipeline); + ralloc_free(b.shader); + + if (result != VK_SUCCESS) + goto out; + } + +out: + return result; +} + +static VkResult +astc_emu_init_flush_denorm_pipeline(struct anv_device *device) +{ + struct anv_device_astc_emu *astc_emu = &device->astc_emu; + VkResult result = VK_SUCCESS; + + simple_mtx_lock(&astc_emu->mutex); + if (!astc_emu->pipeline) + result = astc_emu_init_flush_denorm_pipeline_locked(device); + simple_mtx_unlock(&astc_emu->mutex); + + return result; +} + +static void +astc_emu_flush_denorm_slice(struct anv_cmd_buffer *cmd_buffer, + VkFormat astc_format, + VkImageLayout layout, + VkImageView src_view, + VkImageView dst_view, + VkRect2D rect) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_device_astc_emu *astc_emu = &device->astc_emu; + VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer); + + VkResult result = astc_emu_init_flush_denorm_pipeline(device); + if (result != VK_SUCCESS) { + anv_batch_set_error(&cmd_buffer->batch, result); + return; + } + + const uint32_t push_const[] = { + rect.offset.x, + rect.offset.y, + rect.offset.x + rect.extent.width, + rect.offset.y + rect.extent.height, + }; + + const VkWriteDescriptorSet set_writes[] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &(VkDescriptorImageInfo){ + .imageView = src_view, + .imageLayout = layout, + }, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &(VkDescriptorImageInfo){ + .imageView = dst_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }; + struct anv_push_descriptor_set push_set; + astc_emu_init_push_descriptor_set(cmd_buffer, + &push_set, + astc_emu->ds_layout, + ARRAY_SIZE(set_writes), + set_writes); + VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set); + + anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, + astc_emu->pipeline); + anv_CmdPushConstants(cmd_buffer_, astc_emu->pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, + sizeof(push_const), push_const); + anv_CmdBindDescriptorSets(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, + astc_emu->pipeline_layout, 0, 1, &set, + 0, NULL); + + /* each workgroup processes 8x8 texel blocks */ + rect.extent.width = DIV_ROUND_UP(rect.extent.width, 8); + rect.extent.height = DIV_ROUND_UP(rect.extent.height, 8); + + anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0, + rect.extent.width, + rect.extent.height, + 1); + + anv_push_descriptor_set_finish(&push_set); +} + static void astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer, VkFormat astc_format, @@ -128,6 +385,9 @@ anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer, VkOffset3D block_offset, VkExtent3D block_extent) { + const bool flush_denorms = + cmd_buffer->device->physical->flush_astc_ldr_void_extent_denorms; + assert(image->emu_plane_format != VK_FORMAT_UNDEFINED); const VkRect2D rect = { @@ -165,14 +425,22 @@ anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT, subresource->mipLevel, slice_base + i); astc_emu_init_image_view(cmd_buffer, &dst_view, image, - VK_FORMAT_R8G8B8A8_UINT, + flush_denorms ? VK_FORMAT_R32G32B32A32_UINT + : VK_FORMAT_R8G8B8A8_UINT, VK_IMAGE_USAGE_STORAGE_BIT, subresource->mipLevel, slice_base + i); - astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout, - anv_image_view_to_handle(&src_view), - anv_image_view_to_handle(&dst_view), - rect); + if (flush_denorms) { + astc_emu_flush_denorm_slice(cmd_buffer, image->vk.format, layout, + anv_image_view_to_handle(&src_view), + anv_image_view_to_handle(&dst_view), + rect); + } else { + astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout, + anv_image_view_to_handle(&src_view), + anv_image_view_to_handle(&dst_view), + rect); + } } anv_cmd_buffer_restore_state(cmd_buffer, &saved); @@ -184,6 +452,9 @@ anv_device_init_astc_emu(struct anv_device *device) struct anv_device_astc_emu *astc_emu = &device->astc_emu; VkResult result = VK_SUCCESS; + if (device->physical->flush_astc_ldr_void_extent_denorms) + simple_mtx_init(&astc_emu->mutex, mtx_plain); + if (device->physical->emu_astc_ldr) { result = vk_texcompress_astc_init(&device->vk, &device->vk.alloc, VK_NULL_HANDLE, @@ -198,6 +469,15 @@ anv_device_finish_astc_emu(struct anv_device *device) { struct anv_device_astc_emu *astc_emu = &device->astc_emu; + if (device->physical->flush_astc_ldr_void_extent_denorms) { + VkDevice _device = anv_device_to_handle(device); + + anv_DestroyPipeline(_device, astc_emu->pipeline, NULL); + anv_DestroyPipelineLayout(_device, astc_emu->pipeline_layout, NULL); + anv_DestroyDescriptorSetLayout(_device, astc_emu->ds_layout, NULL); + simple_mtx_destroy(&astc_emu->mutex); + } + if (astc_emu->texcompress) { vk_texcompress_astc_finish(&device->vk, &device->vk.alloc, astc_emu->texcompress); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 2904216c6b8..055f9a79fbb 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1362,6 +1362,10 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, if (!device->has_astc_ldr && driQueryOptionb(&device->instance->dri_options, "vk_require_astc")) device->emu_astc_ldr = true; + if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) { + device->flush_astc_ldr_void_extent_denorms = + device->has_astc_ldr && !device->emu_astc_ldr; + } result = anv_physical_device_init_heaps(device, fd); if (result != VK_SUCCESS) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 24d27b571d3..6ffe7cbd5c0 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2982,14 +2982,21 @@ anv_image_fill_surface_state(struct anv_device *device, uint32_t plane = anv_image_aspect_to_plane(image, aspect); if (image->emu_plane_format != VK_FORMAT_UNDEFINED) { const uint16_t view_bpb = isl_format_get_layout(view_in->format)->bpb; - enum isl_format format = - image->planes[plane].primary_surface.isl.format; + const uint16_t plane_bpb = isl_format_get_layout( + image->planes[plane].primary_surface.isl.format)->bpb; - /* redirect to the hidden plane if not size-compatible */ - if (isl_format_get_layout(format)->bpb != view_bpb) { + /* We should redirect to the hidden plane when the original view format + * is compressed or when the view usage is storage. But we don't always + * have visibility to the original view format so we also check for size + * compatibility. + */ + if (isl_format_is_compressed(view_in->format) || + (view_usage & ISL_SURF_USAGE_STORAGE_BIT) || + view_bpb != plane_bpb) { plane = image->n_planes; - format = image->planes[plane].primary_surface.isl.format; - assert(isl_format_get_layout(format)->bpb == view_bpb); + assert(isl_format_get_layout( + image->planes[plane].primary_surface.isl.format)->bpb == + view_bpb); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 83f9851cf1e..15a746126ab 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -891,6 +891,8 @@ struct anv_physical_device { /** True if HW supports ASTC LDR */ bool has_astc_ldr; + /** True if denorms in void extents should be flushed to zero */ + bool flush_astc_ldr_void_extent_denorms; /** True if ASTC LDR is supported via emulation */ bool emu_astc_ldr; @@ -1462,6 +1464,12 @@ enum anv_rt_bvh_build_method { struct anv_device_astc_emu { struct vk_texcompress_astc_state *texcompress; + + /* for flush_astc_ldr_void_extent_denorms */ + simple_mtx_t mutex; + VkDescriptorSetLayout ds_layout; + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; }; struct anv_device { @@ -4474,6 +4482,14 @@ vk_format_from_android(unsigned android_format, unsigned android_usage); static inline VkFormat anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format) { + if (pdevice->flush_astc_ldr_void_extent_denorms) { + const struct util_format_description *desc = + vk_format_description(format); + if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC && + desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) + return format; + } + if (pdevice->emu_astc_ldr) return vk_texcompress_astc_emulation_format(format); @@ -4598,7 +4614,7 @@ struct anv_image { /** * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC - * LDR emulation. + * LDR workaround or emulation. */ VkFormat emu_plane_format;