diff --git a/src/amd/vulkan/meta/radv_meta.h b/src/amd/vulkan/meta/radv_meta.h index 8f92a85718a..170d0c044ba 100644 --- a/src/amd/vulkan/meta/radv_meta.h +++ b/src/amd/vulkan/meta/radv_meta.h @@ -324,6 +324,8 @@ nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(struct radv_ nir_shader *radv_meta_nir_build_dcc_decompress_compute_shader(struct radv_device *dev); +nir_shader *radv_meta_nir_build_fmask_copy_compute_shader(struct radv_device *dev, int samples); + uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value); diff --git a/src/amd/vulkan/meta/radv_meta_fmask_copy.c b/src/amd/vulkan/meta/radv_meta_fmask_copy.c index 55b4efc660e..c54e7cabf16 100644 --- a/src/amd/vulkan/meta/radv_meta_fmask_copy.c +++ b/src/amd/vulkan/meta/radv_meta_fmask_copy.c @@ -3,85 +3,10 @@ * * SPDX-License-Identifier: MIT */ -#include "nir/nir_builder.h" + #include "radv_formats.h" #include "radv_meta.h" -static nir_shader * -build_fmask_copy_compute_shader(struct radv_device *dev, int samples) -{ - const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT); - const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT); - - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples); - - b.shader->info.workgroup_size[0] = 8; - b.shader->info.workgroup_size[1] = 8; - - nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); - input_img->data.descriptor_set = 0; - input_img->data.binding = 0; - - nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); - output_img->data.descriptor_set = 0; - output_img->data.binding = 1; - - nir_def *invoc_id = nir_load_local_invocation_id(&b); - nir_def *wg_id = nir_load_workgroup_id(&b); - nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], - b.shader->info.workgroup_size[2]); - - nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - - /* Get coordinates. */ - nir_def *src_coord = nir_trim_vector(&b, global_id, 2); - nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32), - nir_undef(&b, 1, 32)); - - nir_tex_src frag_mask_srcs[] = {{ - .src_type = nir_tex_src_coord, - .src = nir_src_for_ssa(src_coord), - }}; - nir_def *frag_mask = - nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL, - ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs); - - /* Get the maximum sample used in this fragment. */ - nir_def *max_sample_index = nir_imm_int(&b, 0); - for (uint32_t s = 0; s < samples; s++) { - /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */ - max_sample_index = nir_umax(&b, max_sample_index, - nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4))); - } - - nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter"); - nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1); - - nir_loop *loop = nir_push_loop(&b); - { - nir_def *sample_id = nir_load_var(&b, counter); - - nir_tex_src frag_fetch_srcs[] = {{ - .src_type = nir_tex_src_coord, - .src = nir_src_for_ssa(src_coord), - }, - { - .src_type = nir_tex_src_ms_index, - .src = nir_src_for_ssa(sample_id), - }}; - nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img), - NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs); - - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval, - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS); - - radv_break_on_count(&b, counter, max_sample_index); - } - nir_pop_loop(&b, loop); - - return b.shader; -} - static VkResult get_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out) { @@ -139,7 +64,7 @@ get_pipeline(struct radv_device *device, uint32_t samples_log2, VkPipeline *pipe return VK_SUCCESS; } - nir_shader *cs = build_fmask_copy_compute_shader(device, samples); + nir_shader *cs = radv_meta_nir_build_fmask_copy_compute_shader(device, samples); const VkPipelineShaderStageCreateInfo stage_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, diff --git a/src/amd/vulkan/nir/radv_meta_nir.c b/src/amd/vulkan/nir/radv_meta_nir.c index 33b901ff7f9..a3b398df2a0 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.c +++ b/src/amd/vulkan/nir/radv_meta_nir.c @@ -1031,3 +1031,78 @@ radv_meta_nir_build_dcc_decompress_compute_shader(struct radv_device *dev) nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); return b.shader; } + +nir_shader * +radv_meta_nir_build_fmask_copy_compute_shader(struct radv_device *dev, int samples) +{ + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT); + const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT); + + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples); + + b.shader->info.workgroup_size[0] = 8; + b.shader->info.workgroup_size[1] = 8; + + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 1; + + nir_def *invoc_id = nir_load_local_invocation_id(&b); + nir_def *wg_id = nir_load_workgroup_id(&b); + nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], + b.shader->info.workgroup_size[2]); + + nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + /* Get coordinates. */ + nir_def *src_coord = nir_trim_vector(&b, global_id, 2); + nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32), + nir_undef(&b, 1, 32)); + + nir_tex_src frag_mask_srcs[] = {{ + .src_type = nir_tex_src_coord, + .src = nir_src_for_ssa(src_coord), + }}; + nir_def *frag_mask = + nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL, + ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs); + + /* Get the maximum sample used in this fragment. */ + nir_def *max_sample_index = nir_imm_int(&b, 0); + for (uint32_t s = 0; s < samples; s++) { + /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */ + max_sample_index = nir_umax(&b, max_sample_index, + nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4))); + } + + nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter"); + nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1); + + nir_loop *loop = nir_push_loop(&b); + { + nir_def *sample_id = nir_load_var(&b, counter); + + nir_tex_src frag_fetch_srcs[] = {{ + .src_type = nir_tex_src_coord, + .src = nir_src_for_ssa(src_coord), + }, + { + .src_type = nir_tex_src_ms_index, + .src = nir_src_for_ssa(sample_id), + }}; + nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img), + NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs); + + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS); + + radv_break_on_count(&b, counter, max_sample_index); + } + nir_pop_loop(&b, loop); + + return b.shader; +}