diff --git a/src/amd/vulkan/meta/radv_meta.h b/src/amd/vulkan/meta/radv_meta.h index 8f338ef967b..d271657997c 100644 --- a/src/amd/vulkan/meta/radv_meta.h +++ b/src/amd/vulkan/meta/radv_meta.h @@ -316,6 +316,8 @@ void radv_meta_nir_build_clear_depthstencil_shaders(struct radv_device *dev, str nir_shader *radv_meta_nir_build_clear_htile_mask_shader(struct radv_device *dev); nir_shader *radv_meta_nir_build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa); +nir_shader *radv_meta_nir_build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf); + uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value); diff --git a/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c b/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c index 7a96085b1e5..c810b258ed9 100644 --- a/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c +++ b/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c @@ -4,95 +4,11 @@ * SPDX-License-Identifier: MIT */ -#define AC_SURFACE_INCLUDE_NIR #include "ac_surface.h" - #include "radv_meta.h" #include "vk_common_entrypoints.h" #include "vk_format.h" -static nir_shader * -build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile"); - b.shader->info.workgroup_size[0] = 8; - b.shader->info.workgroup_size[1] = 8; - - /* Get coordinates. */ - nir_def *global_id = get_global_ids(&b, 2); - - nir_def *addr = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); - nir_def *htile_va = nir_pack_64_2x32(&b, nir_channels(&b, addr, 0x3)); - - nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16); - - /* Multiply the coordinates by the HTILE block size. */ - nir_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset); - - /* Load constants. */ - nir_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 16), .range = 28); - nir_def *htile_pitch = nir_channel(&b, constants, 0); - nir_def *htile_slice_size = nir_channel(&b, constants, 1); - nir_def *read_htile_value = nir_channel(&b, constants, 2); - - /* Get the HTILE addr from coordinates. */ - nir_def *zero = nir_imm_int(&b, 0); - nir_def *htile_offset = - ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size, - nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); - - /* Set up the input VRS image descriptor. */ - const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); - nir_variable *input_vrs_img = nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image"); - input_vrs_img->data.descriptor_set = 0; - input_vrs_img->data.binding = 0; - - /* Load the VRS rates from the 2D image. */ - nir_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL); - - /* Extract the X/Y rates and clamp them because the maximum supported VRS rate is 2x2 (1x1 in - * hardware). - * - * VRS rate X = min(value >> 2, 1) - * VRS rate Y = min(value & 3, 1) - */ - nir_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2); - x_rate = nir_umin(&b, x_rate, nir_imm_int(&b, 1)); - - nir_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3); - y_rate = nir_umin(&b, y_rate, nir_imm_int(&b, 1)); - - /* Compute the final VRS rate. */ - nir_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6)); - - /* Load the HTILE value if requested, otherwise use the default value. */ - nir_variable *htile_value = nir_local_variable_create(b.impl, glsl_int_type(), "htile_value"); - - nir_push_if(&b, nir_ieq_imm(&b, read_htile_value, 1)); - { - /* Load the existing HTILE 32-bit value for this 8x8 pixels area. */ - nir_def *input_value = nir_build_load_global(&b, 1, 32, nir_iadd(&b, htile_va, nir_u2u64(&b, htile_offset))); - - /* Clear the 4-bit VRS rates. */ - nir_store_var(&b, htile_value, nir_iand_imm(&b, input_value, 0xfffff33f), 0x1); - } - nir_push_else(&b, NULL); - { - nir_store_var(&b, htile_value, nir_imm_int(&b, 0xfffff33f), 0x1); - } - nir_pop_if(&b, NULL); - - /* Set the VRS rates loaded from the image. */ - nir_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates); - - /* Store the updated HTILE 32-bit which contains the VRS rates. */ - nir_build_store_global(&b, output_value, nir_iadd(&b, htile_va, nir_u2u64(&b, htile_offset)), - .access = ACCESS_NON_READABLE); - - return b.shader; -} - static VkResult get_pipeline(struct radv_device *device, struct radv_image *image, VkPipeline *pipeline_out, VkPipelineLayout *layout_out) @@ -132,7 +48,7 @@ get_pipeline(struct radv_device *device, struct radv_image *image, VkPipeline *p return VK_SUCCESS; } - nir_shader *cs = build_copy_vrs_htile_shader(device, &image->planes[0].surface); + nir_shader *cs = radv_meta_nir_build_copy_vrs_htile_shader(device, &image->planes[0].surface); const VkPipelineShaderStageCreateInfo stage_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, diff --git a/src/amd/vulkan/nir/radv_meta_nir.c b/src/amd/vulkan/nir/radv_meta_nir.c index 3889127e4c4..b8a7c1feaf1 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.c +++ b/src/amd/vulkan/nir/radv_meta_nir.c @@ -7,6 +7,9 @@ * SPDX-License-Identifier: MIT */ +#define AC_SURFACE_INCLUDE_NIR +#include "ac_surface.h" + #include "../meta/radv_meta.h" #include "nir_builder.h" @@ -820,3 +823,85 @@ radv_meta_nir_build_clear_dcc_comp_to_single_shader(struct radv_device *dev, boo return b.shader; } + +nir_shader * +radv_meta_nir_build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf) +{ + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile"); + b.shader->info.workgroup_size[0] = 8; + b.shader->info.workgroup_size[1] = 8; + + /* Get coordinates. */ + nir_def *global_id = get_global_ids(&b, 2); + + nir_def *addr = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); + nir_def *htile_va = nir_pack_64_2x32(&b, nir_channels(&b, addr, 0x3)); + + nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16); + + /* Multiply the coordinates by the HTILE block size. */ + nir_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset); + + /* Load constants. */ + nir_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 16), .range = 28); + nir_def *htile_pitch = nir_channel(&b, constants, 0); + nir_def *htile_slice_size = nir_channel(&b, constants, 1); + nir_def *read_htile_value = nir_channel(&b, constants, 2); + + /* Get the HTILE addr from coordinates. */ + nir_def *zero = nir_imm_int(&b, 0); + nir_def *htile_offset = + ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size, + nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); + + /* Set up the input VRS image descriptor. */ + const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); + nir_variable *input_vrs_img = nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image"); + input_vrs_img->data.descriptor_set = 0; + input_vrs_img->data.binding = 0; + + /* Load the VRS rates from the 2D image. */ + nir_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL); + + /* Extract the X/Y rates and clamp them because the maximum supported VRS rate is 2x2 (1x1 in + * hardware). + * + * VRS rate X = min(value >> 2, 1) + * VRS rate Y = min(value & 3, 1) + */ + nir_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2); + x_rate = nir_umin(&b, x_rate, nir_imm_int(&b, 1)); + + nir_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3); + y_rate = nir_umin(&b, y_rate, nir_imm_int(&b, 1)); + + /* Compute the final VRS rate. */ + nir_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6)); + + /* Load the HTILE value if requested, otherwise use the default value. */ + nir_variable *htile_value = nir_local_variable_create(b.impl, glsl_int_type(), "htile_value"); + + nir_push_if(&b, nir_ieq_imm(&b, read_htile_value, 1)); + { + /* Load the existing HTILE 32-bit value for this 8x8 pixels area. */ + nir_def *input_value = nir_build_load_global(&b, 1, 32, nir_iadd(&b, htile_va, nir_u2u64(&b, htile_offset))); + + /* Clear the 4-bit VRS rates. */ + nir_store_var(&b, htile_value, nir_iand_imm(&b, input_value, 0xfffff33f), 0x1); + } + nir_push_else(&b, NULL); + { + nir_store_var(&b, htile_value, nir_imm_int(&b, 0xfffff33f), 0x1); + } + nir_pop_if(&b, NULL); + + /* Set the VRS rates loaded from the image. */ + nir_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates); + + /* Store the updated HTILE 32-bit which contains the VRS rates. */ + nir_build_store_global(&b, output_value, nir_iadd(&b, htile_va, nir_u2u64(&b, htile_offset)), + .access = ACCESS_NON_READABLE); + + return b.shader; +}