mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
radv: fix barrier in radv_decompress_dcc_compute shader
ACO doesn't create a waitcnt for barriers between texture samples and
image stores because texture samples are supposed to use read-only
memory. It could also schedule the barrier to above the texture sample.
We also have use a larger memory scope to avoid an ACO optimization.
Tested on GFX8 with Sachsa Willems deferred sample. With some DCC
decompressions and the compute path forced.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Cc: 21.1 <mesa-stable>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9496>
(cherry picked from commit ec70882238)
This commit is contained in:
parent
da74e86cb7
commit
d90e5fe88a
4 changed files with 24 additions and 31 deletions
|
|
@ -1066,7 +1066,7 @@
|
|||
"description": "radv: fix barrier in radv_decompress_dcc_compute shader",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"master_sha": null,
|
||||
"because_sha": null
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1658,7 +1658,8 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
|
|||
static void
|
||||
radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
|
||||
VkFormat vk_format, const VkComponentMapping *components,
|
||||
bool is_storage_image, bool disable_compression, unsigned plane_id,
|
||||
bool is_storage_image, bool disable_compression,
|
||||
bool enable_compression, unsigned plane_id,
|
||||
unsigned descriptor_plane_id)
|
||||
{
|
||||
struct radv_image *image = iview->image;
|
||||
|
|
@ -1699,7 +1700,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
|
|||
}
|
||||
|
||||
bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
|
||||
if (is_storage_image && !enable_write_compression)
|
||||
if (is_storage_image && !(enable_write_compression || enable_compression))
|
||||
disable_compression = true;
|
||||
si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
|
||||
iview->base_mip, blk_w, is_stencil, is_storage_image,
|
||||
|
|
@ -1898,13 +1899,16 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
|
||||
|
||||
bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
|
||||
bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
|
||||
for (unsigned i = 0;
|
||||
i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
|
||||
VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
|
||||
radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
|
||||
disable_compression, iview->plane_id + i, i);
|
||||
disable_compression, enable_compression, iview->plane_id + i,
|
||||
i);
|
||||
radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
|
||||
disable_compression, iview->plane_id + i, i);
|
||||
disable_compression, enable_compression, iview->plane_id + i,
|
||||
i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,8 +31,6 @@
|
|||
static nir_shader *
|
||||
build_dcc_decompress_compute_shader(struct radv_device *dev)
|
||||
{
|
||||
const struct glsl_type *buf_type =
|
||||
glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
|
||||
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
|
||||
|
||||
nir_builder b =
|
||||
|
|
@ -42,7 +40,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
|
|||
b.shader->info.cs.local_size[0] = 16;
|
||||
b.shader->info.cs.local_size[1] = 16;
|
||||
b.shader->info.cs.local_size[2] = 1;
|
||||
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
|
||||
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img");
|
||||
input_img->data.descriptor_set = 0;
|
||||
input_img->data.binding = 0;
|
||||
|
||||
|
|
@ -57,30 +55,20 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
|
|||
b.shader->info.cs.local_size[2], 0);
|
||||
|
||||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
|
||||
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
|
||||
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
||||
tex->op = nir_texop_txf;
|
||||
tex->src[0].src_type = nir_tex_src_coord;
|
||||
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
|
||||
tex->src[1].src_type = nir_tex_src_lod;
|
||||
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
tex->src[2].src_type = nir_tex_src_texture_deref;
|
||||
tex->src[2].src = nir_src_for_ssa(input_img_deref);
|
||||
tex->dest_type = nir_type_float32;
|
||||
tex->is_array = false;
|
||||
tex->coord_components = 2;
|
||||
nir_ssa_def *data =
|
||||
nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
|
||||
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0));
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
|
||||
nir_builder_instr_insert(&b, &tex->instr);
|
||||
|
||||
nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP,
|
||||
.memory_scope = NIR_SCOPE_WORKGROUP, .memory_semantics = NIR_MEMORY_ACQ_REL,
|
||||
.memory_modes = nir_var_mem_ssbo);
|
||||
/* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
|
||||
* creating a vmcnt(0) because it expects the L1 cache to keep memory
|
||||
* operations in-order for the same workgroup. The vmcnt(0) seems
|
||||
* necessary however. */
|
||||
nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
|
||||
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
|
||||
nir_ssa_undef(&b, 1, 32), &tex->dest.ssa, nir_imm_int(&b, 0));
|
||||
nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0));
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +84,7 @@ create_dcc_compress_compute(struct radv_device *device)
|
|||
.bindingCount = 2,
|
||||
.pBindings = (VkDescriptorSetLayoutBinding[]){
|
||||
{.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = NULL},
|
||||
|
|
@ -849,7 +837,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag
|
|||
.baseArrayLayer = subresourceRange->baseArrayLayer + s,
|
||||
.layerCount = 1},
|
||||
},
|
||||
NULL);
|
||||
&(struct radv_image_view_extra_create_info){.enable_compression = true});
|
||||
radv_image_view_init(
|
||||
&store_iview, cmd_buffer->device,
|
||||
&(VkImageViewCreateInfo){
|
||||
|
|
@ -873,7 +861,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag
|
|||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.pImageInfo =
|
||||
(VkDescriptorImageInfo[]){
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2175,6 +2175,7 @@ bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits
|
|||
|
||||
struct radv_image_view_extra_create_info {
|
||||
bool disable_compression;
|
||||
bool enable_compression;
|
||||
};
|
||||
|
||||
void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue