diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index 2fb14839e87..e72ba555915 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -22,12 +22,12 @@ #define RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE (1u << 29) #define RADV_INSTANCE_TRIANGLE_FLIP_FACING (1u << 28) -#define RADV_BLAS_POINTER_FORCE_OPAQUE (1ul << 54) -#define RADV_BLAS_POINTER_FORCE_NON_OPAQUE (1ul << 55) -#define RADV_BLAS_POINTER_DISABLE_TRI_CULL (1ul << 56) -#define RADV_BLAS_POINTER_FLIP_FACING (1ul << 57) -#define RADV_BLAS_POINTER_SKIP_TRIANGLES (1ul << 62) -#define RADV_BLAS_POINTER_SKIP_AABBS (1ul << 63) +#define RADV_BLAS_POINTER_FORCE_OPAQUE (1ul << 54) +#define RADV_BLAS_POINTER_FORCE_NON_OPAQUE (1ul << 55) +#define RADV_BLAS_POINTER_DISABLE_TRI_CULL (1ul << 56) +#define RADV_BLAS_POINTER_FLIP_FACING (1ul << 57) +#define RADV_BLAS_POINTER_SKIP_TRIANGLES (1ul << 62) +#define RADV_BLAS_POINTER_SKIP_AABBS (1ul << 63) #ifdef VULKAN #define VK_UUID_SIZE 16 diff --git a/src/amd/vulkan/meta/radv_meta_blit2d.c b/src/amd/vulkan/meta/radv_meta_blit2d.c index 4cd7d05d768..c7b79a246cb 100644 --- a/src/amd/vulkan/meta/radv_meta_blit2d.c +++ b/src/amd/vulkan/meta/radv_meta_blit2d.c @@ -73,11 +73,10 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, VkPipelineLayout layout, str .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, .data.pUniformTexelBuffer = - &(VkDescriptorAddressInfoEXT){ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, - .address = src_buf->addr + src_buf->offset, - .range = src_buf->size - src_buf->offset, - .format = depth_format ? depth_format : src_buf->format}, + &(VkDescriptorAddressInfoEXT){.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, + .address = src_buf->addr + src_buf->offset, + .range = src_buf->size - src_buf->offset, + .format = depth_format ? depth_format : src_buf->format}, }}); const VkPushConstantsInfoKHR pc_info = { diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c b/src/amd/vulkan/meta/radv_meta_buffer.c index 0ab447018ce..e0e4bc01101 100644 --- a/src/amd/vulkan/meta/radv_meta_buffer.c +++ b/src/amd/vulkan/meta/radv_meta_buffer.c @@ -7,10 +7,10 @@ * SPDX-License-Identifier: MIT */ +#include "nir/radv_meta_nir.h" #include "radv_cp_dma.h" #include "radv_debug.h" #include "radv_meta.h" -#include "nir/radv_meta_nir.h" #include "radv_sdma.h" #include "radv_cs.h" diff --git a/src/amd/vulkan/meta/radv_meta_clear.c b/src/amd/vulkan/meta/radv_meta_clear.c index 308043860ee..8fef3e9d1f7 100644 --- a/src/amd/vulkan/meta/radv_meta_clear.c +++ b/src/amd/vulkan/meta/radv_meta_clear.c @@ -700,7 +700,7 @@ radv_get_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image /* Preserve VRS rates during clears but not during initialization. */ if (is_clear && radv_image_has_vrs_htile(device, image)) { - mask &= ~(0x3 << 6); /* VRS X-rate */ + mask &= ~(0x3 << 6); /* VRS X-rate */ mask &= ~(0x3 << 10); /* VRS Y-rate */ } diff --git a/src/amd/vulkan/meta/radv_meta_decompress.c b/src/amd/vulkan/meta/radv_meta_decompress.c index 4d4b4ee55a2..d81761fd765 100644 --- a/src/amd/vulkan/meta/radv_meta_decompress.c +++ b/src/amd/vulkan/meta/radv_meta_decompress.c @@ -17,7 +17,8 @@ struct radv_htile_expand_key { }; static VkResult -get_pipeline_gfx(struct radv_device *device, struct radv_image *image, VkPipeline *pipeline_out, VkPipelineLayout *layout_out) +get_pipeline_gfx(struct radv_device *device, struct radv_image *image, VkPipeline *pipeline_out, + VkPipelineLayout *layout_out) { const uint32_t samples = image->vk.samples; struct radv_htile_expand_key key; @@ -137,9 +138,9 @@ get_pipeline_gfx(struct radv_device *device, struct radv_image *image, VkPipelin }; struct vk_meta_rendering_info render = { - .depth_attachment_format = VK_FORMAT_D32_SFLOAT_S8_UINT, - .stencil_attachment_format = VK_FORMAT_D32_SFLOAT_S8_UINT, - }; + .depth_attachment_format = VK_FORMAT_D32_SFLOAT_S8_UINT, + .stencil_attachment_format = VK_FORMAT_D32_SFLOAT_S8_UINT, + }; result = vk_meta_create_graphics_pipeline(&device->vk, &device->meta_state.device, &pipeline_create_info, &render, &key, sizeof(key), pipeline_out); diff --git a/src/amd/vulkan/nir/radv_meta_nir.c b/src/amd/vulkan/nir/radv_meta_nir.c index 0b60db3c95c..444f60afa4b 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.c +++ b/src/amd/vulkan/nir/radv_meta_nir.c @@ -7,12 +7,11 @@ * SPDX-License-Identifier: MIT */ - -#include "ac_surface.h" -#include "ac_nir_surface.h" -#include "nir/nir_format_convert.h" -#include "nir_builder.h" #include "radv_meta_nir.h" +#include "nir/nir_format_convert.h" +#include "ac_nir_surface.h" +#include "ac_surface.h" +#include "nir_builder.h" #include "radv_device.h" #include "radv_physical_device.h" @@ -63,7 +62,7 @@ radv_meta_nir_build_fs_noop(struct radv_device *dev) static void radv_meta_nir_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples, - nir_variable *input_img, nir_variable *color, nir_def *img_coord) + nir_variable *input_img, nir_variable *color, nir_def *img_coord) { const struct radv_physical_device *pdev = radv_device_physical(device); nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img); @@ -626,7 +625,7 @@ radv_meta_nir_build_itoi_compute_shader(struct radv_device *dev, bool src_3d, bo const struct glsl_type *buf_type = glsl_sampler_type(src_dim, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(dst_dim, false, GLSL_TYPE_FLOAT); nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_cs-%dd-%dd-%d", src_3d ? 3 : 2, - dst_3d ? 3 : 2, samples); + dst_3d ? 3 : 2, samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); @@ -725,8 +724,8 @@ radv_meta_nir_build_cleari_compute_shader(struct radv_device *dev, bool is_3d, i : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); - nir_builder b = - radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples); + nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, + is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -832,9 +831,9 @@ radv_meta_nir_build_clear_depthstencil_shaders(struct radv_device *dev, struct n { nir_builder vs_b = radv_meta_nir_init_shader( dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs"); - nir_builder fs_b = - radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, - unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); + nir_builder fs_b = radv_meta_nir_init_shader( + dev, MESA_SHADER_FRAGMENT, + unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); const struct glsl_type *position_out_type = glsl_vec4_type(); @@ -908,7 +907,7 @@ radv_meta_nir_build_clear_dcc_comp_to_single_shader(struct radv_device *dev, boo const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT); nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s", - is_msaa ? "multisampled" : "singlesampled"); + is_msaa ? "multisampled" : "singlesampled"); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -1367,8 +1366,8 @@ radv_meta_nir_build_depth_stencil_resolve_compute_shader(struct radv_device *dev const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type); nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d", - index == RADV_META_DEPTH_RESOLVE ? "depth" : "stencil", - get_resolve_mode_str(resolve_mode), samples); + index == RADV_META_DEPTH_RESOLVE ? "depth" : "stencil", + get_resolve_mode_str(resolve_mode), samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -1436,8 +1435,8 @@ radv_meta_nir_build_resolve_fragment_shader(struct radv_device *dev, bool is_int const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); - nir_builder b = - radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float"); + nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s", samples, + is_integer ? "int" : "float"); nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; @@ -1471,8 +1470,8 @@ radv_meta_nir_build_depth_stencil_resolve_fragment_shader(struct radv_device *de const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs_%s-%s-%d", - index == RADV_META_DEPTH_RESOLVE ? "depth" : "stencil", - get_resolve_mode_str(resolve_mode), samples); + index == RADV_META_DEPTH_RESOLVE ? "depth" : "stencil", + get_resolve_mode_str(resolve_mode), samples); nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; diff --git a/src/amd/vulkan/nir/radv_meta_nir.h b/src/amd/vulkan/nir/radv_meta_nir.h index e6a86571d57..f046560ed0f 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.h +++ b/src/amd/vulkan/nir/radv_meta_nir.h @@ -10,8 +10,8 @@ #ifndef RADV_META_NIR_H #define RADV_META_NIR_H -#include "vulkan/vulkan_core.h" #include "compiler/shader_enums.h" +#include "vulkan/vulkan_core.h" #include "nir_defines.h" #ifdef __cplusplus diff --git a/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c b/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c index c8e5a7ef67a..3a8d25b1ad1 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c +++ b/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c @@ -256,6 +256,5 @@ radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_graphics_sta .rast_prim = rast_prim, }; - return nir_shader_intrinsics_pass(shader, lower_load_barycentric_coord, - nir_metadata_none, &state); + return nir_shader_intrinsics_pass(shader, lower_load_barycentric_coord, nir_metadata_none, &state); } diff --git a/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c b/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c index 2f498e2d159..46118a09d22 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c +++ b/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c @@ -90,8 +90,7 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data) /* sample_pos -= 0.5 */ sample_pos = nir_fadd_imm(b, sample_pos, -0.5f); - res2 = nir_load_barycentric_at_offset(b, 32, sample_pos, - .interp_mode = nir_intrinsic_interp_mode(intrin)); + res2 = nir_load_barycentric_at_offset(b, 32, sample_pos, .interp_mode = nir_intrinsic_interp_mode(intrin)); } nir_pop_if(b, NULL); @@ -105,8 +104,8 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data) /* sample_pos -= 0.5 */ sample_pos = nir_fadd_imm(b, sample_pos, -0.5f); - new_dest = nir_load_barycentric_at_offset(b, 32, sample_pos, - .interp_mode = nir_intrinsic_interp_mode(intrin)); + new_dest = + nir_load_barycentric_at_offset(b, 32, sample_pos, .interp_mode = nir_intrinsic_interp_mode(intrin)); } } @@ -122,7 +121,7 @@ bool radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state) { - struct ctx ctx = { .fs_stage = fs_stage, .gfx_state = gfx_state }; + struct ctx ctx = {.fs_stage = fs_stage, .gfx_state = gfx_state}; return nir_shader_intrinsics_pass(nir, pass, nir_metadata_none, &ctx); } diff --git a/src/amd/vulkan/nir/radv_nir_lower_intrinsics_early.c b/src/amd/vulkan/nir/radv_nir_lower_intrinsics_early.c index 747283e5f3a..f4b3eabc96e 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_intrinsics_early.c +++ b/src/amd/vulkan/nir/radv_nir_lower_intrinsics_early.c @@ -41,6 +41,5 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data) bool radv_nir_lower_intrinsics_early(nir_shader *nir, bool lower_view_index_to_zero) { - return nir_shader_intrinsics_pass(nir, pass, nir_metadata_control_flow, - &lower_view_index_to_zero); + return nir_shader_intrinsics_pass(nir, pass, nir_metadata_control_flow, &lower_view_index_to_zero); } diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index 2f727fc2828..0b454533406 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -143,7 +143,7 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir) */ NIR_PASS(_, nir, nir_lower_array_deref_of_vec, nir_var_shader_in | nir_var_shader_out, NULL, nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load | - nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store); + nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store); if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS(_, nir, nir_lower_tess_level_array_vars_to_vec); @@ -230,15 +230,16 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s if (nir->info.stage == MESA_SHADER_VERTEX) { if (info->vs.as_ls) { NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq, - info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); + info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); return true; } else if (info->vs.as_es) { - NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, info->gs_inputs_read); + NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, + info->gs_inputs_read); return true; } } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq, - info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); + info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); nir_tcs_info tcs_info; nir_gather_tcs_info(nir, &tcs_info, nir->info.tess._primitive_mode, nir->info.tess.spacing); @@ -254,7 +255,8 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s NIR_PASS(_, nir, ac_nir_lower_tes_inputs_to_mem, map_input); if (info->tes.as_es) { - NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, info->gs_inputs_read); + NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, + info->gs_inputs_read); } return true; diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 5188eaf1ac6..0891ee00f20 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -175,7 +175,8 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray } static void -lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht, const struct radv_physical_device *pdev) +lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht, + const struct radv_physical_device *pdev) { struct ray_query_vars *vars = ralloc(ht, struct ray_query_vars); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index 3917119cbe1..766092a048e 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -6,8 +6,8 @@ #include "nir/radv_nir_rt_common.h" #include "bvh/bvh.h" -#include "radv_debug.h" #include "nir_builder.h" +#include "radv_debug.h" static nir_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and); @@ -550,7 +550,8 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s dword3 |= BITFIELD_BIT(118 - 96); } - return nir_vec4(b, nir_imm_intN_t(b, dword0, 32), dword1, nir_imm_intN_t(b, dword2, 32), nir_imm_intN_t(b, dword3, 32)); + return nir_vec4(b, nir_imm_intN_t(b, dword0, 32), dword1, nir_imm_intN_t(b, dword2, 32), + nir_imm_intN_t(b, dword3, 32)); } static void @@ -573,17 +574,18 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const intersection.frontface = nir_fgt_imm(b, div, 0); nir_def *not_cull; if (pdev->info.gfx_level < GFX11) { - nir_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), - RADV_INSTANCE_TRIANGLE_FLIP_FACING); + nir_def *switch_ccw = + nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING); intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw); not_cull = ray_flags->no_skip_triangles; - nir_def *not_facing_cull = nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, - ray_flags->no_cull_back); + nir_def *not_facing_cull = + nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back); - not_cull = nir_iand(b, not_cull, nir_ior(b, not_facing_cull, - nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), - RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE))); + not_cull = nir_iand(b, not_cull, + nir_ior(b, not_facing_cull, + nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), + RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE))); } else { not_cull = nir_imm_true(b); } @@ -717,7 +719,7 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3 static nir_def * build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def *base_addr, nir_def *ptr_flags, - bool overwrite) + bool overwrite) { if (pdev->info.gfx_level < GFX11) return base_addr; @@ -781,10 +783,11 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc .no_skip_aabbs = radv_test_flag(b, args, SpvRayFlagsSkipAABBsKHRMask, false), }; - nir_def *ptr_flags = nir_iand_imm(b, args->flags, ~(SpvRayFlagsTerminateOnFirstHitKHRMask | SpvRayFlagsSkipClosestHitShaderKHRMask)); + nir_def *ptr_flags = + nir_iand_imm(b, args->flags, ~(SpvRayFlagsTerminateOnFirstHitKHRMask | SpvRayFlagsSkipClosestHitShaderKHRMask)); - nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, pdev, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), - 0x1); + nir_store_deref(b, args->vars.bvh_base, + build_bvh_base(b, pdev, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1); nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); @@ -931,16 +934,15 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc * meaningless. */ uint32_t forced_opaqueness_mask = SpvRayFlagsOpaqueKHRMask | SpvRayFlagsNoOpaqueKHRMask; - nir_def *instance_flag_mask = nir_bcsel(b, nir_test_mask(b, ptr_flags, forced_opaqueness_mask), - nir_imm_int64(b, ~((uint64_t)forced_opaqueness_mask << 54ull)), - nir_imm_int64(b, ~0ull)); + nir_def *instance_flag_mask = + nir_bcsel(b, nir_test_mask(b, ptr_flags, forced_opaqueness_mask), + nir_imm_int64(b, ~((uint64_t)forced_opaqueness_mask << 54ull)), nir_imm_int64(b, ~0ull)); nir_def *instance_pointer = nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)); instance_pointer = nir_iand(b, instance_pointer, instance_flag_mask); - nir_store_deref( - b, args->vars.bvh_base, - build_bvh_base(b, pdev, instance_pointer, ptr_flags, false), 0x1); + nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, pdev, instance_pointer, ptr_flags, false), + 0x1); /* Push the instance root node onto the stack */ if (args->use_bvh_stack_rtn) { diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8c997135598..2f0645535a2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -644,11 +644,9 @@ radv_cmd_buffer_annotate(struct radv_cmd_buffer *cmd_buffer, const char *annotat device->ws->cs_annotate(cmd_buffer->cs, annotation); } -#define RADV_TASK_SHADER_SENSITIVE_STAGES (\ - VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT |\ - VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT |\ - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |\ - VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT) +#define RADV_TASK_SHADER_SENSITIVE_STAGES \ + (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | \ + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT) static void radv_gang_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask, @@ -5032,7 +5030,8 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v /* try to use a pre-compiled prolog first */ struct radv_shader_part *prolog = NULL; - if (cmd_buffer->state.can_use_simple_vertex_input && !as_ls && !misaligned_mask && !vi_state->alpha_adjust_lo && !vi_state->alpha_adjust_hi) { + if (cmd_buffer->state.can_use_simple_vertex_input && !as_ls && !misaligned_mask && !vi_state->alpha_adjust_lo && + !vi_state->alpha_adjust_hi) { if (!instance_rate_inputs) { prolog = device->simple_vs_prologs[num_attributes - 1]; } else if (num_attributes <= 16 && !*nontrivial_divisors && !zero_divisors && @@ -5525,8 +5524,8 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_LOGIC_OP_ENABLE)) radv_emit_color_blend(cmd_buffer); - if (states & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_LINE_RASTERIZATION_MODE | - RADV_DYNAMIC_POLYGON_MODE | RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)) + if (states & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_LINE_RASTERIZATION_MODE | RADV_DYNAMIC_POLYGON_MODE | + RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)) radv_emit_rasterization_samples(cmd_buffer); /* RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE is handled by radv_emit_db_shader_control. */ @@ -5770,7 +5769,8 @@ radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t idx, struct } ALWAYS_INLINE static void -radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, const unsigned i, const bool uses_dynamic_inputs, uint32_t *desc) +radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, const unsigned i, + const bool uses_dynamic_inputs, uint32_t *desc) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -5779,9 +5779,9 @@ radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const str if (uses_dynamic_inputs && !(vi_state->attribute_mask & BITFIELD_BIT(i))) { /* No vertex attribute description given: assume that the shader doesn't use this - * location (vb_desc_usage_mask can be larger than attribute usage) and use a null - * descriptor to avoid hangs (prologs load all attributes, even if there are holes). - */ + * location (vb_desc_usage_mask can be larger than attribute usage) and use a null + * descriptor to avoid hangs (prologs load all attributes, even if there are holes). + */ memset(desc, 0, 4 * 4); return; } @@ -5795,7 +5795,7 @@ radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const str rsrc_word3 = vbo_info.non_trivial_format; } else { rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (pdev->info.gfx_level >= GFX10) { rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT); @@ -5808,9 +5808,9 @@ radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const str if (!vbo_info.va) { if (uses_dynamic_inputs) { /* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need - * to include the format/word3 so that the alpha channel is 1 for formats without an - * alpha channel. - */ + * to include the format/word3 so that the alpha channel is 1 for formats without an + * alpha channel. + */ desc[0] = 0; desc[1] = S_008F04_STRIDE(16); desc[2] = 0; @@ -5835,22 +5835,22 @@ radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const str } else { num_records = (num_records - attrib_end) / stride + 1; /* If attrib_offset>stride, then the compiler will increase the vertex index by - * attrib_offset/stride and decrease the offset by attrib_offset%stride. This is - * only allowed with static strides. - */ + * attrib_offset/stride and decrease the offset by attrib_offset%stride. This is + * only allowed with static strides. + */ num_records += vbo_info.attrib_index_offset; } /* GFX10 uses OOB_SELECT_RAW if stride==0, so convert num_records from elements into - * into bytes in that case. GFX8 always uses bytes. - */ + * into bytes in that case. GFX8 always uses bytes. + */ if (num_records && (chip == GFX8 || (chip != GFX9 && !stride))) { num_records = (num_records - 1) * stride + attrib_end; } else if (!num_records) { /* On GFX9, it seems bounds checking is disabled if both - * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and - * GFX10.3 but it doesn't hurt. - */ + * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and + * GFX10.3 but it doesn't hurt. + */ if (uses_dynamic_inputs) { desc[0] = 0; desc[1] = S_008F04_STRIDE(16); @@ -5869,9 +5869,9 @@ radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const str if (chip >= GFX10) { /* OOB_SELECT chooses the out-of-bounds check: - * - 1: index >= NUM_RECORDS (Structured) - * - 3: offset >= NUM_RECORDS (Raw) - */ + * - 1: index >= NUM_RECORDS (Structured) + * - 3: offset >= NUM_RECORDS (Raw) + */ int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW; rsrc_word3 |= S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(chip < GFX11); } @@ -5887,7 +5887,8 @@ radv_write_vertex_descriptor(const struct radv_cmd_buffer *cmd_buffer, const str } ALWAYS_INLINE static void -radv_write_vertex_descriptors_dynamic(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, void *vb_ptr) +radv_write_vertex_descriptors_dynamic(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, + void *vb_ptr) { unsigned desc_index = 0; for (unsigned i = 0; i < vs->info.vs.num_attributes; i++) { @@ -5900,7 +5901,7 @@ ALWAYS_INLINE static void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, void *vb_ptr) { unsigned desc_index = 0; - u_foreach_bit(i, vs->info.vs.vb_desc_usage_mask) { + u_foreach_bit (i, vs->info.vs.vb_desc_usage_mask) { uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4]; radv_write_vertex_descriptor(cmd_buffer, vs, i, false, desc); } @@ -5919,7 +5920,8 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer) /* Mesh shaders don't have vertex descriptors. */ assert(!cmd_buffer->state.mesh_shading); - unsigned vb_desc_alloc_size = (uses_dynamic_inputs ? vs->info.vs.num_attributes : util_bitcount(vs->info.vs.vb_desc_usage_mask)) * 16; + unsigned vb_desc_alloc_size = + (uses_dynamic_inputs ? vs->info.vs.num_attributes : util_bitcount(vs->info.vs.vb_desc_usage_mask)) * 16; unsigned vb_offset; void *vb_ptr; uint64_t va; @@ -8199,8 +8201,7 @@ radv_CmdSetPrimitiveTopology(VkCommandBuffer commandBuffer, VkPrimitiveTopology state->dirty |= RADV_CMD_DIRTY_GUARDBAND; /* for line stipple/mode */ - if (radv_prim_is_lines(state->dynamic.vk.ia.primitive_topology) != - radv_prim_is_lines(primitive_topology)) + if (radv_prim_is_lines(state->dynamic.vk.ia.primitive_topology) != radv_prim_is_lines(primitive_topology)) state->dirty |= RADV_DYNAMIC_RASTERIZATION_SAMPLES; state->dynamic.vk.ia.primitive_topology = primitive_topology; @@ -13537,16 +13538,15 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t dep_count, const VkDep * VK_QUEUE_FAMILY_EXTERNAL, VK_QUEUE_FAMILY_FOREIGN_EXT, or a valid * queue family */ - if (src_qf_index == dst_qf_index) - { + if (src_qf_index == dst_qf_index) { src_qf_index = VK_QUEUE_FAMILY_IGNORED; dst_qf_index = VK_QUEUE_FAMILY_IGNORED; } - radv_handle_image_transition( - cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout, dep_info->pImageMemoryBarriers[i].newLayout, - src_qf_index, dst_qf_index, &dep_info->pImageMemoryBarriers[i].subresourceRange, - sample_locs_info ? &sample_locations : NULL); + radv_handle_image_transition(cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout, + dep_info->pImageMemoryBarriers[i].newLayout, src_qf_index, dst_qf_index, + &dep_info->pImageMemoryBarriers[i].subresourceRange, + sample_locs_info ? &sample_locations : NULL); } } diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 8c34bdf2454..c3d9593e200 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -429,12 +429,12 @@ struct radv_cmd_state { bool rb_noncoherent_dirty; /* Conditional rendering info. */ - uint8_t predication_op; /* 32-bit or 64-bit predicate value */ - int predication_type; /* -1: disabled, 0: normal, 1: inverted */ + uint8_t predication_op; /* 32-bit or 64-bit predicate value */ + int predication_type; /* -1: disabled, 0: normal, 1: inverted */ uint64_t user_predication_va; /* User predication VA. */ uint64_t emulated_predication_va; /* Emulated VA if no 32-bit predication support. */ - uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */ - bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */ + uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */ + bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */ bool saved_user_cond_render; bool is_user_cond_render_suspended; diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 078cb6725d2..afed4e06c90 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -119,7 +119,6 @@ void radv_check_trap_handler(struct radv_queue *queue); bool radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info); - ALWAYS_INLINE static bool radv_device_fault_detection_enabled(const struct radv_device *device) { diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 5b32e27960e..849f8b0436c 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -6,9 +6,9 @@ */ #include "radv_descriptor_set.h" -#include "radv_descriptors.h" #include "radv_cmd_buffer.h" #include "radv_descriptor_pool.h" +#include "radv_descriptors.h" #include "radv_entrypoints.h" #include "radv_sampler.h" #include "sid.h" diff --git a/src/amd/vulkan/radv_descriptor_update_template.c b/src/amd/vulkan/radv_descriptor_update_template.c index 840844b5671..a75c346ca82 100644 --- a/src/amd/vulkan/radv_descriptor_update_template.c +++ b/src/amd/vulkan/radv_descriptor_update_template.c @@ -83,17 +83,17 @@ radv_CreateDescriptorUpdateTemplate(VkDevice _device, const VkDescriptorUpdateTe dst_stride = binding_layout->size / 4; } - templ->entry[i] = (struct radv_descriptor_update_template_entry){ - .descriptor_type = entry->descriptorType, - .descriptor_count = entry->descriptorCount, - .src_offset = entry->offset, - .src_stride = entry->stride, - .dst_offset = dst_offset, - .dst_stride = dst_stride, - .buffer_offset = buffer_offset, - .has_sampler = !binding_layout->immutable_samplers_offset, - .has_ycbcr_sampler = binding_layout->has_ycbcr_sampler, - .immutable_samplers = immutable_samplers}; + templ->entry[i] = + (struct radv_descriptor_update_template_entry){.descriptor_type = entry->descriptorType, + .descriptor_count = entry->descriptorCount, + .src_offset = entry->offset, + .src_stride = entry->stride, + .dst_offset = dst_offset, + .dst_stride = dst_stride, + .buffer_offset = buffer_offset, + .has_sampler = !binding_layout->immutable_samplers_offset, + .has_ycbcr_sampler = binding_layout->has_ycbcr_sampler, + .immutable_samplers = immutable_samplers}; } *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 2220e2e1478..0aff835a785 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -927,13 +927,13 @@ fail: /* For MSAA sample positions. */ #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ - ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \ - (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | (((unsigned)(s2y)&0xf) << 20) | \ - (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28)) + ((((unsigned)(s0x) & 0xf) << 0) | (((unsigned)(s0y) & 0xf) << 4) | (((unsigned)(s1x) & 0xf) << 8) | \ + (((unsigned)(s1y) & 0xf) << 12) | (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \ + (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28)) /* For obtaining location coordinates from registers */ -#define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0))) -#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index)*4)) & 0xf) +#define SEXT4(x) ((int)((x) | ((x) & 0x8 ? 0xfffffff0 : 0))) +#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index) * 4)) & 0xf) #define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2) #define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1) diff --git a/src/amd/vulkan/radv_device.h b/src/amd/vulkan/radv_device.h index d27f20794e5..89f7eb41dcd 100644 --- a/src/amd/vulkan/radv_device.h +++ b/src/amd/vulkan/radv_device.h @@ -366,7 +366,6 @@ void radv_initialise_color_surface(struct radv_device *device, struct radv_color void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, struct radv_ds_buffer_info *ds); - void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds, struct radv_image_view *iview, VkImageAspectFlags ds_aspects); diff --git a/src/amd/vulkan/radv_device_memory.c b/src/amd/vulkan/radv_device_memory.c index a3de89bd9f3..a172ce60805 100644 --- a/src/amd/vulkan/radv_device_memory.c +++ b/src/amd/vulkan/radv_device_memory.c @@ -16,13 +16,11 @@ #include "radv_image.h" #include "radv_rmv.h" -#include "vk_log.h" #include "vk_debug_utils.h" +#include "vk_log.h" static void -radv_device_memory_emit_report(struct radv_device *device, - struct radv_device_memory *mem, - bool is_alloc, +radv_device_memory_emit_report(struct radv_device *device, struct radv_device_memory *mem, bool is_alloc, VkResult result) { if (likely(!device->vk.memory_reports)) @@ -32,17 +30,15 @@ radv_device_memory_emit_report(struct radv_device *device, if (result != VK_SUCCESS) { type = VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_ALLOCATION_FAILED_EXT; } else if (is_alloc) { - type = mem->import_handle_type - ? VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_IMPORT_EXT - : VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_ALLOCATE_EXT; + type = mem->import_handle_type ? VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_IMPORT_EXT + : VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_ALLOCATE_EXT; } else { - type = mem->import_handle_type - ? VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_UNIMPORT_EXT - : VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_FREE_EXT; + type = mem->import_handle_type ? VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_UNIMPORT_EXT + : VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_FREE_EXT; } - vk_emit_device_memory_report(&device->vk, type, mem->bo->obj_id, mem->bo->size, - VK_OBJECT_TYPE_DEVICE_MEMORY, (uintptr_t)(mem), mem->heap_index); + vk_emit_device_memory_report(&device->vk, type, mem->bo->obj_id, mem->bo->size, VK_OBJECT_TYPE_DEVICE_MEMORY, + (uintptr_t)(mem), mem->heap_index); } void diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index d31a9ac7a7e..db02355c112 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -19,7 +19,7 @@ #include "vk_shader_module.h" #define PKT3_INDIRECT_BUFFER_BYTES 16 -#define DGC_VBO_INFO_SIZE (sizeof(struct radv_vbo_info) + 4 /* vbo_offsets */) +#define DGC_VBO_INFO_SIZE (sizeof(struct radv_vbo_info) + 4 /* vbo_offsets */) /* The DGC command buffer layout is quite complex, here's some explanations: * diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index efc1bd321a4..586ae8e609a 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -630,7 +630,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns VkFormat format = radv_image_get_plane_format(pdev, image, plane_id); const struct util_format_description *desc = radv_format_description(format); const VkImageAlignmentControlCreateInfoMESA *alignment = - vk_find_struct_const(pCreateInfo->pNext, IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA); + vk_find_struct_const(pCreateInfo->pNext, IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA); bool is_depth, is_stencil; is_depth = util_format_has_depth(desc); @@ -721,9 +721,8 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns bool is_4k_capable; if (!vk_format_is_depth_or_stencil(image_format)) { - is_4k_capable = - !(pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && (flags & RADEON_SURF_DISABLE_DCC) && - (flags & RADEON_SURF_NO_FMASK); + is_4k_capable = !(pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && + (flags & RADEON_SURF_DISABLE_DCC) && (flags & RADEON_SURF_NO_FMASK); } else { /* Depth-stencil format without DEPTH_STENCIL usage does not work either. */ is_4k_capable = false; @@ -1180,14 +1179,13 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in * to sample it later with a linear filter, it will get garbage after the height it wants, * so we let the user specify the width/height unaligned, and align them preallocation. */ - if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) { if (!device->vk.enabled_features.videoMaintenance1) assert(profile_list); - const bool is_linear = image->vk.tiling == VK_IMAGE_TILING_LINEAR || - image->planes[0].surface.modifier == DRM_FORMAT_MOD_LINEAR; + const bool is_linear = + image->vk.tiling == VK_IMAGE_TILING_LINEAR || image->planes[0].surface.modifier == DRM_FORMAT_MOD_LINEAR; /* Only linear decode target requires the custom alignment. */ if (is_linear || !(image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) { @@ -1222,8 +1220,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE; } - if (plane > 0 && image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) { + if (plane > 0 && + image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) { image->planes[plane].surface.flags |= RADEON_SURF_FORCE_SWIZZLE_MODE; image->planes[plane].surface.u.gfx9.swizzle_mode = image->planes[0].surface.u.gfx9.swizzle_mode; } @@ -1813,7 +1811,7 @@ radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImag if (status) *status->pResult = VK_SUCCESS; - /* Ignore this struct on Android, we cannot access swapchain structures there. */ + /* Ignore this struct on Android, we cannot access swapchain structures there. */ #ifdef RADV_USE_WSI_PLATFORM if (!mem) { const VkBindImageMemorySwapchainInfoKHR *swapchain_info = diff --git a/src/amd/vulkan/radv_image_view.c b/src/amd/vulkan/radv_image_view.c index 42fa26206e4..9f5a140d21a 100644 --- a/src/amd/vulkan/radv_image_view.c +++ b/src/amd/vulkan/radv_image_view.c @@ -10,11 +10,11 @@ #include "vk_log.h" -#include "radv_image_view.h" #include "radv_buffer_view.h" #include "radv_entrypoints.h" #include "radv_formats.h" #include "radv_image.h" +#include "radv_image_view.h" #include "ac_descriptors.h" #include "ac_formats.h" diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 050ed53afc6..6bcf18847bd 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -16,9 +16,9 @@ #define VG(x) ((void)0) #endif -#include "radv_instance.h" #include "radv_debug.h" #include "radv_entrypoints.h" +#include "radv_instance.h" #include "radv_wsi.h" #include "util/driconf.h" @@ -347,7 +347,7 @@ static const struct vk_instance_extension_table radv_instance_extensions_support }; static enum radeon_ctx_pstate -radv_parse_pstate(const char* str) +radv_parse_pstate(const char *str) { if (!strcmp(str, "peak")) { return RADEON_CTX_PSTATE_PEAK; diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 1e5eb70b5f8..8a795b02db0 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -140,13 +140,13 @@ enum radv_perfcounter_op { RADV_PC_OP_SUM_WEIGHTED_4, }; -#define S_REG_SEL(x) ((x)&0xFFFF) -#define G_REG_SEL(x) ((x)&0xFFFF) +#define S_REG_SEL(x) ((x) & 0xFFFF) +#define G_REG_SEL(x) ((x) & 0xFFFF) #define S_REG_BLOCK(x) ((x) << 16) #define G_REG_BLOCK(x) (((x) >> 16) & 0x7FFF) -#define S_REG_OFFSET(x) ((x)&0xFFFF) -#define G_REG_OFFSET(x) ((x)&0xFFFF) +#define S_REG_OFFSET(x) ((x) & 0xFFFF) +#define G_REG_OFFSET(x) ((x) & 0xFFFF) #define S_REG_INSTANCES(x) ((x) << 16) #define G_REG_INSTANCES(x) (((x) >> 16) & 0x7FFF) #define S_REG_CONSTANT(x) ((x) << 31) diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index f34bb3dd00d..66349b863d4 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -648,12 +648,12 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .KHR_video_decode_queue = pdev->video_decode_enabled, .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && pdev->video_decode_enabled, .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && pdev->video_decode_enabled, - .KHR_video_decode_vp9 = (radv_video_decode_vp9_supported(pdev) && - VIDEO_CODEC_VP9DEC && pdev->video_decode_enabled), + .KHR_video_decode_vp9 = + (radv_video_decode_vp9_supported(pdev) && VIDEO_CODEC_VP9DEC && pdev->video_decode_enabled), .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && pdev->video_encode_enabled, .KHR_video_encode_h265 = VIDEO_CODEC_H265ENC && pdev->video_encode_enabled, - .KHR_video_encode_av1 = (radv_video_encode_av1_supported(pdev) && - VIDEO_CODEC_AV1ENC && pdev->video_encode_enabled), + .KHR_video_encode_av1 = + (radv_video_encode_av1_supported(pdev) && VIDEO_CODEC_AV1ENC && pdev->video_encode_enabled), .KHR_video_encode_queue = pdev->video_encode_enabled, .KHR_vulkan_memory_model = true, .KHR_workgroup_memory_explicit_layout = true, diff --git a/src/amd/vulkan/radv_physical_device.h b/src/amd/vulkan/radv_physical_device.h index d6cbc72b39e..183c515a1f6 100644 --- a/src/amd/vulkan/radv_physical_device.h +++ b/src/amd/vulkan/radv_physical_device.h @@ -14,15 +14,15 @@ #include "ac_gpu_info.h" #include "ac_perfcounter.h" +#include "ac_uvd_dec.h" +#include "ac_vcn_enc.h" #include "radv_instance.h" #include "radv_queue.h" #include "radv_radeon_winsys.h" -#include "ac_uvd_dec.h" -#include "ac_vcn_enc.h" #include "wsi_common.h" -#include "nir_shader_compiler_options.h" #include "compiler/shader_enums.h" +#include "nir_shader_compiler_options.h" #include "vk_physical_device.h" diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 0fdc37a850e..542b358956f 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -573,9 +573,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, ac_nir_lower_global_access); NIR_PASS(_, stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, - pdev->info.has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog, - radv_select_hw_stage(&stage->info, gfx_level), stage->info.wave_size, stage->info.workgroup_size, - &stage->args.ac); + pdev->info.has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog, + radv_select_hw_stage(&stage->info, gfx_level), stage->info.wave_size, stage->info.workgroup_size, + &stage->args.ac); NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi); if (!stage->key.optimisations_disabled) { diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 397b0c07c5b..4b050e6048b 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -11,14 +11,14 @@ #include "util/mesa-sha1.h" #include "util/u_atomic.h" #include "util/u_debug.h" -#include "nir_serialize.h" #include "nir.h" +#include "nir_serialize.h" #include "radv_debug.h" #include "radv_descriptor_set.h" #include "radv_pipeline.h" +#include "radv_pipeline_binary.h" #include "radv_pipeline_compute.h" #include "radv_pipeline_graphics.h" -#include "radv_pipeline_binary.h" #include "radv_pipeline_rt.h" #include "radv_shader.h" #include "vk_pipeline.h" @@ -448,8 +448,7 @@ struct radv_ray_tracing_pipeline_cache_data { bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_ray_tracing_pipeline *pipeline, - bool *found_in_application_cache) + struct radv_ray_tracing_pipeline *pipeline, bool *found_in_application_cache) { struct radv_pipeline_cache_object *pipeline_obj; diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 8c99d3adcf0..543a21dfc3e 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1640,7 +1640,6 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages) if (p & nir_progress_producer) { radv_optimize_nir_algebraic(producer, true, false); NIR_PASS(_, producer, nir_opt_undef); - } if (p & nir_progress_consumer) { radv_optimize_nir_algebraic(consumer, true, false); @@ -2295,7 +2294,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask; NIR_PASS(_, nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, pdev->info.has_ls_vgpr_init_bug, - AC_HW_VERTEX_SHADER, 64, 64, &gs_copy_stage.args.ac); + AC_HW_VERTEX_SHADER, 64, 64, &gs_copy_stage.args.ac); NIR_PASS(_, nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi); struct radv_graphics_pipeline_key key = {0}; @@ -2446,8 +2445,7 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, struct r const VkPipelineShaderStageCreateInfo *sinfo = &lib->stages[i]; gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage); - radv_pipeline_stage_init(lib->base.base.create_flags, sinfo, - &lib->layout, &lib->stage_keys[s], &stages[s]); + radv_pipeline_stage_init(lib->base.base.create_flags, sinfo, &lib->layout, &lib->stage_keys[s], &stages[s]); } /* Import the NIR shaders (after SPIRV->NIR). */ @@ -3241,8 +3239,9 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct r if (pdev->info.gfx_level >= GFX10) { pipeline->vertex_input.non_trivial_format[i] = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format); } else { - pipeline->vertex_input.non_trivial_format[i] = - vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf); + pipeline->vertex_input.non_trivial_format[i] = vtx_info->dst_sel | + S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | + S_008F0C_DATA_FORMAT(hw_format & 0xf); } } else { pipeline->vertex_input.nontrivial_formats |= BITFIELD_BIT(i); diff --git a/src/amd/vulkan/radv_pipeline_graphics.h b/src/amd/vulkan/radv_pipeline_graphics.h index 6dba7b375a3..545be0a0b6c 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.h +++ b/src/amd/vulkan/radv_pipeline_graphics.h @@ -116,7 +116,6 @@ struct radv_graphics_pipeline { unsigned rast_prim; - /* Custom blend mode for internal operations. */ unsigned custom_blend_mode; diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 597b2ffd49c..d38758870f2 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -198,11 +198,11 @@ radv_rt_init_capture_replay(struct radv_device *device, const VkRayTracingPipeli } } - reloc_out: - simple_mtx_unlock(&library_shader->replay_mtx); - if (result != VK_SUCCESS) - return result; - } + reloc_out: + simple_mtx_unlock(&library_shader->replay_mtx); + if (result != VK_SUCCESS) + return result; + } } return result; @@ -618,8 +618,8 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca struct radv_shader_stage *stage = &stages[i]; gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage); - radv_pipeline_stage_init(pipeline->base.base.create_flags, &pCreateInfo->pStages[i], - pipeline_layout, &stage_keys[s], stage); + radv_pipeline_stage_init(pipeline->base.base.create_flags, &pCreateInfo->pStages[i], pipeline_layout, + &stage_keys[s], stage); /* precompile the shader */ stage->nir = radv_shader_spirv_to_nir(device, stage, NULL, false); diff --git a/src/amd/vulkan/radv_pipeline_rt.h b/src/amd/vulkan/radv_pipeline_rt.h index ad40801169d..ece5a0f259a 100644 --- a/src/amd/vulkan/radv_pipeline_rt.h +++ b/src/amd/vulkan/radv_pipeline_rt.h @@ -11,9 +11,9 @@ #ifndef RADV_PIPELINE_RT_H #define RADV_PIPELINE_RT_H +#include "util/bitset.h" #include "radv_pipeline_compute.h" #include "radv_shader.h" -#include "util/bitset.h" struct radv_ray_tracing_pipeline { struct radv_compute_pipeline base; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 98435fafe57..f4b4b42deea 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -2742,8 +2742,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta radv_cs_add_buffer(device->ws, cs, pool->bo); - assert(cmd_buffer->qf != RADV_QUEUE_VIDEO_DEC && - cmd_buffer->qf != RADV_QUEUE_VIDEO_ENC); + assert(cmd_buffer->qf != RADV_QUEUE_VIDEO_DEC && cmd_buffer->qf != RADV_QUEUE_VIDEO_ENC); if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { if (instance->drirc.flush_before_timestamp_write) { diff --git a/src/amd/vulkan/radv_query.h b/src/amd/vulkan/radv_query.h index b428bc0abbf..7df336d6f7e 100644 --- a/src/amd/vulkan/radv_query.h +++ b/src/amd/vulkan/radv_query.h @@ -25,7 +25,7 @@ struct radv_query_pool { uint64_t size; char *ptr; bool uses_emulated_queries; - bool uses_ace; /* For task shader invocations on GFX10.3+ */ + bool uses_ace; /* For task shader invocations on GFX10.3+ */ bool uses_shader_query_buf; /* For generated/written primitives on GFX12+ */ }; diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index c6db4ba0c8a..11d1f60f456 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -103,7 +103,7 @@ struct radeon_cmdbuf { #define RADEON_SURF_MODE_SHIFT 8 #define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK) -#define RADEON_SURF_SET(v, field) (((v)&RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT) +#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT) #define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT)) enum radeon_bo_layout { diff --git a/src/amd/vulkan/radv_sampler.c b/src/amd/vulkan/radv_sampler.c index 85e136dfb73..61d62771ea4 100644 --- a/src/amd/vulkan/radv_sampler.c +++ b/src/amd/vulkan/radv_sampler.c @@ -12,10 +12,10 @@ #include "ac_descriptors.h" -#include "radv_sampler.h" #include "radv_device.h" #include "radv_entrypoints.h" #include "radv_physical_device.h" +#include "radv_sampler.h" static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index a1243e241e5..78c008d668c 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -384,8 +384,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .device = device, .object = stage->spirv.object, }; - const struct spirv_capabilities spirv_caps = - vk_physical_device_get_spirv_capabilities(device->vk.physical); + const struct spirv_capabilities spirv_caps = vk_physical_device_get_spirv_capabilities(device->vk.physical); const struct spirv_to_nir_options spirv_options = { .amd_gcn_shader = true, .amd_shader_ballot = true, @@ -1666,7 +1665,6 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->ngg_info.max_gsprims * gs_num_invocations); } - info->regs.vgt_gs_max_vert_out = info->gs.vertices_out; } diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index cb4641a6496..a0a8572ede6 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -46,8 +46,8 @@ enum { #define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1) #define radv_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); stage = ffs(__tmp) - 1, __tmp; \ - __tmp &= ~(1 << (stage))) + for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits) & RADV_STAGE_MASK); \ + stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage))) enum radv_nggc_settings { radv_nggc_none = 0, @@ -200,22 +200,22 @@ struct radv_nir_compiler_options { } debug; }; -#define SET_SGPR_FIELD(field, value) (((unsigned)(value)&field##__MASK) << field##__SHIFT) +#define SET_SGPR_FIELD(field, value) (((unsigned)(value) & field##__MASK) << field##__SHIFT) -#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 0 -#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0x7f +#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 0 +#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0x7f #define TCS_OFFCHIP_LAYOUT_PATCH_VERTICES_IN__SHIFT 7 #define TCS_OFFCHIP_LAYOUT_PATCH_VERTICES_IN__MASK 0x1f #define TCS_OFFCHIP_LAYOUT_TCS_MEM_ATTRIB_STRIDE__SHIFT 12 #define TCS_OFFCHIP_LAYOUT_TCS_MEM_ATTRIB_STRIDE__MASK 0x1f -#define TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS__SHIFT 17 -#define TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS__MASK 0x3f -#define TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS__SHIFT 23 -#define TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS__MASK 0x3f -#define TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE__SHIFT 29 -#define TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE__MASK 0x03 -#define TCS_OFFCHIP_LAYOUT_TES_READS_TF__SHIFT 31 -#define TCS_OFFCHIP_LAYOUT_TES_READS_TF__MASK 0x01 +#define TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS__SHIFT 17 +#define TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS__MASK 0x3f +#define TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS__SHIFT 23 +#define TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS__MASK 0x3f +#define TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE__SHIFT 29 +#define TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE__MASK 0x03 +#define TCS_OFFCHIP_LAYOUT_TES_READS_TF__SHIFT 31 +#define TCS_OFFCHIP_LAYOUT_TES_READS_TF__MASK 0x01 #define TES_STATE_NUM_PATCHES__SHIFT 0 #define TES_STATE_NUM_PATCHES__MASK 0xff diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 5dad01ef5f7..29a47c5f1bf 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -22,15 +22,13 @@ mark_sampler_desc(const nir_variable *var, struct radv_shader_info *info) } static bool -radv_use_vs_prolog(const nir_shader *nir, - const struct radv_graphics_state_key *gfx_state) +radv_use_vs_prolog(const nir_shader *nir, const struct radv_graphics_state_key *gfx_state) { return gfx_state->vs.has_prolog && nir->info.inputs_read; } static bool -radv_use_per_attribute_vb_descs(const nir_shader *nir, - const struct radv_graphics_state_key *gfx_state, +radv_use_per_attribute_vb_descs(const nir_shader *nir, const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage_key *stage_key) { return stage_key->vertex_robustness1 || radv_use_vs_prolog(nir, gfx_state); @@ -367,7 +365,7 @@ gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info) const nir_xfb_info *xfb = nir->xfb_info; - u_foreach_bit(output_buffer, xfb->buffers_written) { + u_foreach_bit (output_buffer, xfb->buffers_written) { unsigned stream = xfb->buffer_to_stream[output_buffer]; so->enabled_stream_buffers_mask |= (1 << output_buffer) << (stream * 4); so->strides[output_buffer] = xfb->buffers[output_buffer].stride / 4; @@ -398,9 +396,8 @@ radv_get_output_masks(const struct nir_shader *nir, const struct radv_graphics_s uint64_t *per_vtx_mask, uint64_t *per_prim_mask) { /* These are not compiled into neither output param nor position exports. */ - const uint64_t special_mask = VARYING_BIT_PRIMITIVE_COUNT | - VARYING_BIT_PRIMITIVE_INDICES | - VARYING_BIT_CULL_PRIMITIVE; + const uint64_t special_mask = + VARYING_BIT_PRIMITIVE_COUNT | VARYING_BIT_PRIMITIVE_INDICES | VARYING_BIT_CULL_PRIMITIVE; *per_prim_mask = nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask; *per_vtx_mask = nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask; @@ -659,8 +656,8 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct if (!info->inputs_linked) { info->tes.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask( nir->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER))); - info->tes.num_linked_patch_inputs = util_last_bit64( - radv_gather_unlinked_patch_io_mask(nir->info.inputs_read, nir->info.patch_inputs_read)); + info->tes.num_linked_patch_inputs = + util_last_bit64(radv_gather_unlinked_patch_io_mask(nir->info.inputs_read, nir->info.patch_inputs_read)); } if (!info->outputs_linked) info->tes.num_linked_outputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.outputs_written)); diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index b6df44e4788..f9aa4bf69f0 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -98,7 +98,7 @@ struct radv_shader_info { bool has_xfb_query; uint8_t ngg_lds_scratch_size; uint32_t num_tess_patches; - uint32_t esgs_itemsize; /* Only for VS or TES as ES */ + uint32_t esgs_itemsize; /* Only for VS or TES as ES */ uint32_t ngg_lds_vertex_size; /* VS,TES: Cull+XFB, GS: GSVS size */ struct radv_vs_output_info outinfo; unsigned workgroup_size; @@ -111,7 +111,7 @@ struct radv_shader_info { bool outputs_linked; bool merged_shader_compiled_separately; /* GFX9+ */ bool force_indirect_desc_sets; - uint64_t gs_inputs_read; /* Mask of GS inputs read (only used by linked ES) */ + uint64_t gs_inputs_read; /* Mask of GS inputs read (only used by linked ES) */ unsigned nir_shared_size; /* Only used by LLVM. */ struct { diff --git a/src/amd/vulkan/radv_shader_object.c b/src/amd/vulkan/radv_shader_object.c index bfa0d0d893d..468ae72c147 100644 --- a/src/amd/vulkan/radv_shader_object.c +++ b/src/amd/vulkan/radv_shader_object.c @@ -6,6 +6,7 @@ #include "vk_log.h" +#include "util/blob.h" #include "radv_device.h" #include "radv_entrypoints.h" #include "radv_physical_device.h" @@ -13,7 +14,6 @@ #include "radv_pipeline_compute.h" #include "radv_pipeline_graphics.h" #include "radv_shader_object.h" -#include "util/blob.h" static void radv_shader_object_destroy_variant(struct radv_device *device, VkShaderCodeTypeEXT code_type, diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index 5f4d500d6a8..ccb2fa2334c 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -291,7 +291,7 @@ radv_spm_init(struct radv_device *device) return false; device->spm.buffer_size = 32 * 1024 * 1024; /* Default to 32MB. */ - device->spm.sample_interval = 4096; /* Default to 4096 clk. */ + device->spm.sample_interval = 4096; /* Default to 4096 clk. */ if (!radv_spm_init_bo(device)) return false; diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 328c4424211..4f245a175c9 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -757,8 +757,8 @@ radv_reset_sqtt_trace(struct radv_device *device) simple_mtx_lock(&device->sqtt_command_pool_mtx); for (unsigned i = 0; i < ARRAY_SIZE(device->sqtt_command_pool); i++) { if (device->sqtt_command_pool[i]) - vk_common_TrimCommandPool(radv_device_to_handle(device), vk_command_pool_to_handle(device->sqtt_command_pool[i]), - 0); + vk_common_TrimCommandPool(radv_device_to_handle(device), + vk_command_pool_to_handle(device->sqtt_command_pool[i]), 0); } simple_mtx_unlock(&device->sqtt_command_pool_mtx); } diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index 0c6c705488e..4662210b5ab 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -24,19 +24,19 @@ #include "radv_image_view.h" #include "radv_video.h" -#define RADV_VIDEO_H264_MAX_DPB_SLOTS 17 -#define RADV_VIDEO_H264_MAX_NUM_REF_FRAME 16 -#define RADV_VIDEO_H265_MAX_DPB_SLOTS 17 -#define RADV_VIDEO_H265_MAX_NUM_REF_FRAME 15 -#define RADV_VIDEO_AV1_MAX_DPB_SLOTS 9 -#define RADV_VIDEO_AV1_MAX_NUM_REF_FRAME 7 -#define RADV_VIDEO_VP9_MAX_DPB_SLOTS 9 -#define RADV_VIDEO_VP9_MAX_NUM_REF_FRAME 3 -#define FB_BUFFER_OFFSET 0x1000 -#define FB_BUFFER_SIZE 2048 -#define FB_BUFFER_SIZE_TONGA (2048 * 64) -#define IT_SCALING_TABLE_SIZE 992 -#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) +#define RADV_VIDEO_H264_MAX_DPB_SLOTS 17 +#define RADV_VIDEO_H264_MAX_NUM_REF_FRAME 16 +#define RADV_VIDEO_H265_MAX_DPB_SLOTS 17 +#define RADV_VIDEO_H265_MAX_NUM_REF_FRAME 15 +#define RADV_VIDEO_AV1_MAX_DPB_SLOTS 9 +#define RADV_VIDEO_AV1_MAX_NUM_REF_FRAME 7 +#define RADV_VIDEO_VP9_MAX_DPB_SLOTS 9 +#define RADV_VIDEO_VP9_MAX_NUM_REF_FRAME 3 +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define FB_BUFFER_SIZE_TONGA (2048 * 64) +#define IT_SCALING_TABLE_SIZE 992 +#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) /* Not 100% sure this isn't too much but works */ #define VID_DEFAULT_ALIGNMENT 256 @@ -46,10 +46,8 @@ static void set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t v static inline bool radv_check_vcn_fw_version(const struct radv_physical_device *pdev, uint32_t dec, uint32_t enc, uint32_t rev) { - return pdev->info.vcn_dec_version > dec || - pdev->info.vcn_enc_minor_version > enc || - (pdev->info.vcn_dec_version == dec && - pdev->info.vcn_enc_minor_version == enc && + return pdev->info.vcn_dec_version > dec || pdev->info.vcn_enc_minor_version > enc || + (pdev->info.vcn_dec_version == dec && pdev->info.vcn_enc_minor_version == enc && pdev->info.vcn_fw_revision >= rev); } @@ -444,8 +442,9 @@ radv_video_patch_session_parameters(struct radv_device *device, struct vk_video_ } } -static unsigned calc_ctx_size_vp9(const struct radv_physical_device *pdev, - struct radv_video_session *vid) { +static unsigned +calc_ctx_size_vp9(const struct radv_physical_device *pdev, struct radv_video_session *vid) +{ /* default probability + probability data */ unsigned ctx_size = 2304 * 5; @@ -467,7 +466,9 @@ static unsigned calc_ctx_size_vp9(const struct radv_physical_device *pdev, return ctx_size; } -static unsigned calc_intra_only_vp9(struct radv_video_session *vid) { +static unsigned +calc_intra_only_vp9(struct radv_video_session *vid) +{ unsigned width = align(vid->vk.max_coded.width, vid->db_alignment); unsigned height = align(vid->vk.max_coded.height, vid->db_alignment); @@ -820,8 +821,7 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons break; } case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: { - const bool have_12bit = pdev->info.vcn_ip_version >= VCN_5_0_0 || - pdev->info.vcn_ip_version == VCN_4_0_0; + const bool have_12bit = pdev->info.vcn_ip_version >= VCN_5_0_0 || pdev->info.vcn_ip_version == VCN_4_0_0; /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */ if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR && pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth) @@ -912,7 +912,8 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME); pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION; pCapabilities->maxDpbSlots = RADV_VIDEO_H264_MAX_DPB_SLOTS; - pCapabilities->maxActiveReferencePictures = MAX2(ext->maxPPictureL0ReferenceCount, ext->maxBPictureL0ReferenceCount + ext->maxL1ReferenceCount); + pCapabilities->maxActiveReferencePictures = + MAX2(ext->maxPPictureL0ReferenceCount, ext->maxBPictureL0ReferenceCount + ext->maxL1ReferenceCount); pCapabilities->minCodedExtent.width = pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5 ? 96 : 128; pCapabilities->minCodedExtent.height = pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5 ? 32 : 128; break; @@ -925,8 +926,7 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H265_PROFILE_INFO_KHR); if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN && - (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 || - h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) + (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 || h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR; if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR && @@ -972,14 +972,15 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME); pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION; pCapabilities->maxDpbSlots = RADV_VIDEO_H265_MAX_DPB_SLOTS; - pCapabilities->maxActiveReferencePictures = MAX2(ext->maxPPictureL0ReferenceCount, ext->maxBPictureL0ReferenceCount + ext->maxL1ReferenceCount); + pCapabilities->maxActiveReferencePictures = + MAX2(ext->maxPPictureL0ReferenceCount, ext->maxBPictureL0ReferenceCount + ext->maxL1ReferenceCount); pCapabilities->minCodedExtent.width = pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5 ? 384 : 130; pCapabilities->minCodedExtent.height = 128; break; } case VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR: { - struct VkVideoEncodeAV1CapabilitiesKHR *ext = (struct VkVideoEncodeAV1CapabilitiesKHR *) - vk_find_struct(pCapabilities->pNext, VIDEO_ENCODE_AV1_CAPABILITIES_KHR); + struct VkVideoEncodeAV1CapabilitiesKHR *ext = (struct VkVideoEncodeAV1CapabilitiesKHR *)vk_find_struct( + pCapabilities->pNext, VIDEO_ENCODE_AV1_CAPABILITIES_KHR); pCapabilities->maxDpbSlots = RADV_VIDEO_AV1_MAX_DPB_SLOTS; pCapabilities->maxActiveReferencePictures = RADV_VIDEO_AV1_MAX_NUM_REF_FRAME; strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME); @@ -1030,8 +1031,8 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons ext->requiresGopRemainingFrames = false; ext->stdSyntaxFlags = 0; if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5) { - ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_AV1_STD_SKIP_MODE_PRESENT_UNSET_BIT_KHR | - VK_VIDEO_ENCODE_AV1_STD_DELTA_Q_BIT_KHR; + ext->stdSyntaxFlags |= + VK_VIDEO_ENCODE_AV1_STD_SKIP_MODE_PRESENT_UNSET_BIT_KHR | VK_VIDEO_ENCODE_AV1_STD_DELTA_Q_BIT_KHR; } pCapabilities->minCodedExtent.width = pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5 ? 320 : 128; pCapabilities->minCodedExtent.height = 128; @@ -1057,10 +1058,10 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352; break; case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: - pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_RENOIR) ? - ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192; - pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_RENOIR) ? - ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352; + pCapabilities->maxCodedExtent.width = + (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192; + pCapabilities->maxCodedExtent.height = + (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352; break; default: break; @@ -1078,15 +1079,16 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, { VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice); - if ((pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) && + if ((pVideoFormatInfo->imageUsage & + (VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) && !pdev->video_encode_enabled) return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR; /* VCN < 5 requires separate allocates for DPB and decode video. */ - if (pdev->info.vcn_ip_version < VCN_5_0_0 && (pVideoFormatInfo->imageUsage & + if (pdev->info.vcn_ip_version < VCN_5_0_0 && + (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) == - (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) + (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR; VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount); @@ -1115,7 +1117,8 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY; p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY; p->imageCreateFlags = 0; - if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) + if (pVideoFormatInfo->imageUsage & + (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; p->imageType = VK_IMAGE_TYPE_2D; p->imageTiling = VK_IMAGE_TILING_OPTIMAL; @@ -1137,7 +1140,8 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY; p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY; p->imageCreateFlags = 0; - if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) + if (pVideoFormatInfo->imageUsage & + (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; p->imageType = VK_IMAGE_TYPE_2D; p->imageTiling = VK_IMAGE_TILING_OPTIMAL; @@ -1157,7 +1161,8 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY; p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY; p->imageCreateFlags = 0; - if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) + if (pVideoFormatInfo->imageUsage & + (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)) p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; p->imageType = VK_IMAGE_TYPE_2D; p->imageTiling = VK_IMAGE_TILING_OPTIMAL; @@ -1232,7 +1237,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi } } if (vid->stream_type == RDECODE_CODEC_VP9) { - vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { m->memoryBindIndex = RADV_BIND_DECODER_CTX; m->memoryRequirements.size = align(calc_ctx_size_vp9(pdev, vid), 4096); m->memoryRequirements.alignment = 0; @@ -1240,7 +1246,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi } if (vid->vk.max_dpb_slots == 0) { - vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { m->memoryBindIndex = RADV_BIND_INTRA_ONLY; m->memoryRequirements.size = calc_intra_only_vp9(vid); m->memoryRequirements.alignment = 0; @@ -1569,9 +1576,7 @@ get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *p static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, - uint32_t *width_in_samples, - uint32_t *height_in_samples, + const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples, uint32_t *height_in_samples, void *it_ptr) { rvcn_dec_message_hevc_t result; @@ -1723,17 +1728,13 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct } static rvcn_dec_message_vp9_t -get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, - struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, - void *probs_ptr, - int *update_reference_slot) +get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot) { rvcn_dec_message_vp9_t result; const struct VkVideoDecodeVP9PictureInfoKHR *vp9_pic_info = vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_VP9_PICTURE_INFO_KHR); - const struct StdVideoDecodeVP9PictureInfo *std_pic_info = - vp9_pic_info->pStdPictureInfo; + const struct StdVideoDecodeVP9PictureInfo *std_pic_info = vp9_pic_info->pStdPictureInfo; const int intra_only_decoding = vid->vk.max_dpb_slots == 0; if (intra_only_decoding) @@ -1743,7 +1744,7 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, memset(&result, 0, sizeof(result)); bool lossless = std_pic_info->base_q_idx == 0 && std_pic_info->delta_q_y_dc == 0 && - std_pic_info->delta_q_uv_dc == 0 && std_pic_info->delta_q_uv_ac == 0; + std_pic_info->delta_q_uv_dc == 0 && std_pic_info->delta_q_uv_ac == 0; ac_vcn_vp9_fill_probs_table(probs_ptr); @@ -1752,8 +1753,8 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, for (unsigned i = 0; i < 8; ++i) { prbs->seg.feature_data[i] = (uint16_t)std_pic_info->pSegmentation->FeatureData[i][0] | - ((uint32_t)(std_pic_info->pSegmentation->FeatureData[i][1] & 0xff) << 16) | - ((uint32_t)(std_pic_info->pSegmentation->FeatureData[i][2] & 0xf) << 24); + ((uint32_t)(std_pic_info->pSegmentation->FeatureData[i][1] & 0xff) << 16) | + ((uint32_t)(std_pic_info->pSegmentation->FeatureData[i][2] & 0xf) << 24); prbs->seg.feature_mask[i] = std_pic_info->pSegmentation->FeatureEnabled[i]; } @@ -1767,46 +1768,44 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, } // Based on the radeonsi implementation - result.frame_header_flags = (std_pic_info->frame_type - << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; + result.frame_header_flags = (std_pic_info->frame_type << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; - result.frame_header_flags |= (std_pic_info->flags.error_resilient_mode - << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; + result.frame_header_flags |= + (std_pic_info->flags.error_resilient_mode << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; - result.frame_header_flags |= (std_pic_info->flags.intra_only - << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & + result.frame_header_flags |= (std_pic_info->flags.intra_only << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK; - result.frame_header_flags |= (std_pic_info->flags.allow_high_precision_mv - << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; + result.frame_header_flags |= + (std_pic_info->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; result.frame_header_flags |= (std_pic_info->flags.frame_parallel_decoding_mode << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) & RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK; - result.frame_header_flags |= (std_pic_info->flags.refresh_frame_context - << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; + result.frame_header_flags |= + (std_pic_info->flags.refresh_frame_context << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; if (std_pic_info->flags.segmentation_enabled) { assert(std_pic_info->pSegmentation); - result.frame_header_flags |= (std_pic_info->flags.segmentation_enabled - << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; + result.frame_header_flags |= + (std_pic_info->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; result.frame_header_flags |= (std_pic_info->pSegmentation->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; result.frame_header_flags |= (std_pic_info->pSegmentation->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; result.frame_header_flags |= (std_pic_info->pSegmentation->flags.segmentation_update_data << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK; + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK; } result.frame_header_flags |= (std_pic_info->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) & @@ -1816,9 +1815,9 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) & RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK; - result.frame_header_flags |= (std_pic_info->flags.UsePrevFrameMvs - << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; + result.frame_header_flags |= + (std_pic_info->flags.UsePrevFrameMvs << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; result.frame_header_flags |= (1 << RDECODE_FRAME_HDR_INFO_VP9_USE_FRAME_SIZE_AS_OFFSET_SHIFT) & RDECODE_FRAME_HDR_INFO_VP9_USE_FRAME_SIZE_AS_OFFSET_MASK; @@ -1839,12 +1838,14 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, result.lf_adj_level[i][0][0] = result.lf_adj_level[i][0][1] = CLAMP(lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[0] * (1 << shifted)), 0, 63); for (int j = 1; j < 4; j++) { - result.lf_adj_level[i][j][0] = - CLAMP((lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[j] + - std_pic_info->pLoopFilter->loop_filter_mode_deltas[0]) * (1 << shifted)), 0, 63); - result.lf_adj_level[i][j][1] = - CLAMP((lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[j] + - std_pic_info->pLoopFilter->loop_filter_mode_deltas[1]) * (1 << shifted)), 0, 63); + result.lf_adj_level[i][j][0] = CLAMP((lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[j] + + std_pic_info->pLoopFilter->loop_filter_mode_deltas[0]) * + (1 << shifted)), + 0, 63); + result.lf_adj_level[i][j][1] = CLAMP((lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[j] + + std_pic_info->pLoopFilter->loop_filter_mode_deltas[1]) * + (1 << shifted)), + 0, 63); } } else { memset(result.lf_adj_level[i], lvl, 4 * 2); @@ -1867,8 +1868,7 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, result.log2_tile_rows = std_pic_info->tile_rows_log2; result.chroma_format = 1; - result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = - (std_pic_info->pColorConfig->BitDepth - 8); + result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = (std_pic_info->pColorConfig->BitDepth - 8); result.vp9_frame_size = vp9_pic_info->uncompressedHeaderOffset; result.compressed_header_size = vp9_pic_info->tilesOffset - vp9_pic_info->compressedHeaderOffset; @@ -1892,7 +1892,8 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, } for (unsigned i = 0; i < 3; i++) { - result.frame_refs[i] = vp9_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : vp9_pic_info->referenceNameSlotIndices[i]; + result.frame_refs[i] = + vp9_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : vp9_pic_info->referenceNameSlotIndices[i]; } for (unsigned i = STD_VIDEO_VP9_REFERENCE_NAME_LAST_FRAME; i <= STD_VIDEO_VP9_REFERENCE_NAME_ALTREF_FRAME; i++) { @@ -2309,9 +2310,9 @@ static void fill_ref_buffer(rvcn_dec_ref_buffer_t *ref, struct radv_image *img, uint32_t slice, uint32_t index) { uint64_t y_addr = img->bindings[0].addr + img->planes[0].surface.u.gfx9.surf_offset + - slice * img->planes[0].surface.u.gfx9.surf_slice_size; + slice * img->planes[0].surface.u.gfx9.surf_slice_size; uint64_t uv_addr = img->bindings[0].addr + img->planes[1].surface.u.gfx9.surf_offset + - slice * img->planes[1].surface.u.gfx9.surf_slice_size; + slice * img->planes[1].surface.u.gfx9.surf_slice_size; memset(ref, 0, sizeof(*ref)); ref->index = index; @@ -2349,8 +2350,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se bool use_intra_only_allocation_for_dpb = false; if (vid->dpb_type == DPB_DYNAMIC_TIER_3) { - VkImageUsageFlags coincide = - VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + VkImageUsageFlags coincide = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; if (luma->surface.is_linear || (img->vk.usage & coincide) != coincide) vid->dpb_type = DPB_DYNAMIC_TIER_2; else @@ -2445,10 +2445,9 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se int dt_array_idx = frame_info->dstPictureResource.baseArrayLayer + dst_iv->vk.base_array_layer; - decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset + - dt_array_idx * luma->surface.u.gfx9.surf_slice_size; - decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset + - dt_array_idx * chroma->surface.u.gfx9.surf_slice_size; + decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset + dt_array_idx * luma->surface.u.gfx9.surf_slice_size; + decode->dt_chroma_top_offset = + chroma->surface.u.gfx9.surf_offset + dt_array_idx * chroma->surface.u.gfx9.surf_slice_size; decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; @@ -2476,10 +2475,8 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se } case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { index_codec->size = sizeof(rvcn_dec_message_hevc_t); - rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, - &decode->width_in_samples, - &decode->height_in_samples, - it_probs_ptr); + rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, &decode->width_in_samples, + &decode->height_in_samples, it_probs_ptr); memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t)); index_codec->message_id = RDECODE_MESSAGE_HEVC; break; @@ -2535,7 +2532,8 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se } else { addr = dpb->bindings[0].addr; radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo); - addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size); + addr += dpb_array_idx * + (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size); } if (vid->dpb_type == DPB_DYNAMIC_TIER_1) { @@ -2544,8 +2542,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se dynamic_dpb->dpbArraySize = RADV_VIDEO_VP9_MAX_DPB_SLOTS; dynamic_dpb->dpbLumaPitch = dpb->planes[0].surface.u.gfx9.surf_pitch; dynamic_dpb->dpbLumaAlignedHeight = dpb->planes[0].surface.u.gfx9.surf_height; - dynamic_dpb->dpbLumaAlignedSize = - dpb->planes[0].surface.u.gfx9.surf_slice_size; + dynamic_dpb->dpbLumaAlignedSize = dpb->planes[0].surface.u.gfx9.surf_slice_size; dynamic_dpb->dpbChromaPitch = dpb->planes[1].surface.u.gfx9.surf_pitch; dynamic_dpb->dpbChromaAlignedHeight = dpb->planes[1].surface.u.gfx9.surf_height; dynamic_dpb->dpbChromaAlignedSize = dpb->planes[1].surface.u.gfx9.surf_slice_size; @@ -2573,11 +2570,13 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); assert(f_dpb_iv != NULL); struct radv_image *dpb_img = f_dpb_iv->image; - int f_dpb_array_idx = frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer; + int f_dpb_array_idx = + frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer; radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); addr = dpb_img->bindings[0].addr; - addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size + dpb_img->planes[1].surface.u.gfx9.surf_slice_size); + addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size + + dpb_img->planes[1].surface.u.gfx9.surf_slice_size); dynamic_dpb_t2->dpbAddrLo[i] = addr; dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32; @@ -2616,7 +2615,8 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se ib_header->package_type = RDECODE_IB_PARAM_DYNAMIC_REFLIST_BUFFER; cmd_buffer->cs->cdw++; - rvcn_dec_ref_buffers_header_t *refs = (rvcn_dec_ref_buffers_header_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + rvcn_dec_ref_buffers_header_t *refs = + (rvcn_dec_ref_buffers_header_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); cmd_buffer->cs->cdw += size / 4; refs->size = size; refs->num_bufs = 0; @@ -2628,14 +2628,17 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); assert(f_dpb_iv != NULL); struct radv_image *dpb_img = f_dpb_iv->image; - uint32_t f_dpb_array_idx = frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer; - fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb_img, f_dpb_array_idx, frame_info->pReferenceSlots[i].slotIndex); + uint32_t f_dpb_array_idx = + frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer; + fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb_img, f_dpb_array_idx, + frame_info->pReferenceSlots[i].slotIndex); radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); used_slots |= 1 << frame_info->pReferenceSlots[i].slotIndex; } if (add_setup_slot) - fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb, dpb_array_idx, frame_info->pSetupReferenceSlot->slotIndex); + fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb, dpb_array_idx, + frame_info->pSetupReferenceSlot->slotIndex); if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { for (int j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) { @@ -2764,8 +2767,8 @@ get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_param static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples, - uint32_t *height_in_samples, void *it_ptr) + const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples, uint32_t *height_in_samples, + void *it_ptr) { const struct radv_physical_device *pdev = radv_device_physical(device); struct ruvd_h265 result; @@ -2963,10 +2966,9 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v break; } case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { - msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info, - &msg->body.decode.width_in_samples, - &msg->body.decode.height_in_samples, - it_ptr); + msg->body.decode.codec.h265 = + get_uvd_h265_msg(device, vid, params, frame_info, &msg->body.decode.width_in_samples, + &msg->body.decode.height_in_samples, it_ptr); if (vid->ctx.mem) msg->body.decode.dpb_reserved = vid->ctx.size; @@ -2984,10 +2986,10 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset + - dt_array_idx * luma->surface.u.gfx9.surf_slice_size; - msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset + - dt_array_idx * chroma->surface.u.gfx9.surf_slice_size; + msg->body.decode.dt_luma_top_offset = + luma->surface.u.gfx9.surf_offset + dt_array_idx * luma->surface.u.gfx9.surf_slice_size; + msg->body.decode.dt_chroma_top_offset = + chroma->surface.u.gfx9.surf_offset + dt_array_idx * chroma->surface.u.gfx9.surf_slice_size; msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; msg->body.decode.dt_surf_tile_config = 0; @@ -3234,7 +3236,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf if (vid->dpb_type == DPB_DYNAMIC_TIER_1) { size += sizeof(rvcn_dec_message_index_t); size += sizeof(rvcn_dec_message_dynamic_dpb_t); - } else if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { + } else if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { size += sizeof(rvcn_dec_message_index_t); size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t); } diff --git a/src/amd/vulkan/radv_video.h b/src/amd/vulkan/radv_video.h index dfc5979d0de..93091ea1221 100644 --- a/src/amd/vulkan/radv_video.h +++ b/src/amd/vulkan/radv_video.h @@ -11,8 +11,8 @@ #ifndef RADV_VIDEO_H #define RADV_VIDEO_H -#include "vk_video.h" #include "radv_event.h" +#include "vk_video.h" #include "ac_vcn.h" @@ -26,9 +26,9 @@ struct radv_image_create_info; #define RADV_ENC_MAX_RATE_LAYER 4 -#define RADV_BIND_SESSION_CTX 0 -#define RADV_BIND_DECODER_CTX 1 -#define RADV_BIND_INTRA_ONLY 2 +#define RADV_BIND_SESSION_CTX 0 +#define RADV_BIND_DECODER_CTX 1 +#define RADV_BIND_INTRA_ONLY 2 #define RADV_BIND_ENCODE_AV1_CDF_STORE RADV_BIND_DECODER_CTX struct radv_vid_mem { @@ -95,10 +95,8 @@ VkResult radv_video_get_encode_session_memory_requirements(struct radv_device *d uint32_t *pMemoryRequirementsCount, VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements); void radv_video_patch_encode_session_parameters(struct radv_device *device, struct vk_video_session_parameters *params); -void radv_video_get_enc_dpb_image(struct radv_device *device, - const struct VkVideoProfileListInfoKHR *profile_list, - struct radv_image *image, - struct radv_image_create_info *create_info); +void radv_video_get_enc_dpb_image(struct radv_device *device, const struct VkVideoProfileListInfoKHR *profile_list, + struct radv_image *image, struct radv_image_create_info *create_info); bool radv_video_decode_vp9_supported(const struct radv_physical_device *pdev); bool radv_video_encode_av1_supported(const struct radv_physical_device *pdev); diff --git a/src/amd/vulkan/radv_video_enc.c b/src/amd/vulkan/radv_video_enc.c index 3fce5307a89..d7c6bf12b05 100644 --- a/src/amd/vulkan/radv_video_enc.c +++ b/src/amd/vulkan/radv_video_enc.c @@ -304,7 +304,7 @@ radv_enc_code_ue(struct radv_cmd_buffer *cmd_buffer, unsigned int value) x += 1; } if (x > 1) - radv_enc_code_fixed_bits(cmd_buffer, 0, x - 1); + radv_enc_code_fixed_bits(cmd_buffer, 0, x - 1); radv_enc_code_fixed_bits(cmd_buffer, ue_code, x); } @@ -398,9 +398,7 @@ radv_enc_av1_bs_copy_end(struct radv_cmd_buffer *cmd_buffer, uint32_t bits) /* av1 bitstream instruction type */ static void -radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, - uint32_t inst, - uint32_t obu_type) +radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t inst, uint32_t obu_type) { struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_enc_state *enc = &cmd_buffer->video.enc; @@ -483,8 +481,7 @@ radv_enc_session_init(struct radv_cmd_buffer *cmd_buffer, const struct VkVideoEn } } - if (pdev->info.vcn_ip_version == VCN_4_0_2 || - pdev->info.vcn_ip_version == VCN_4_0_5 || + if (pdev->info.vcn_ip_version == VCN_4_0_2 || pdev->info.vcn_ip_version == VCN_4_0_5 || pdev->info.vcn_ip_version == VCN_4_0_6) vid->enc_session.WA_flags = 1; @@ -570,9 +567,9 @@ radv_enc_spec_misc_h264(struct radv_cmd_buffer *cmd_buffer, const struct VkVideo vk_video_find_h264_enc_std_pps(&cmd_buffer->video.params->vk, pic->pic_parameter_set_id); RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.spec_misc_h264); - RADEON_ENC_CS(pps->flags.constrained_intra_pred_flag); // constrained_intra_pred_flag - RADEON_ENC_CS(pps->flags.entropy_coding_mode_flag); // cabac enable - RADEON_ENC_CS(0); // cabac init idc + RADEON_ENC_CS(pps->flags.constrained_intra_pred_flag); // constrained_intra_pred_flag + RADEON_ENC_CS(pps->flags.entropy_coding_mode_flag); // cabac enable + RADEON_ENC_CS(0); // cabac init idc if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5) RADEON_ENC_CS(pps->flags.transform_8x8_mode_flag); RADEON_ENC_CS(1); // half pel enabled @@ -651,8 +648,8 @@ radv_enc_av1_get_relative_dist(uint32_t order_hint_bits_minus_1, uint32_t a, uin } static bool -radv_enc_av1_skip_mode_allowed(uint32_t order_hint_bits, uint32_t *ref_order_hint, - uint32_t curr_order_hint, uint32_t frames[2]) +radv_enc_av1_skip_mode_allowed(uint32_t order_hint_bits, uint32_t *ref_order_hint, uint32_t curr_order_hint, + uint32_t frames[2]) { int32_t forward_idx = -1, backward_idx = -1; uint32_t forward_hint = 0, backward_hint = 0; @@ -688,7 +685,8 @@ radv_enc_av1_skip_mode_allowed(uint32_t order_hint_bits, uint32_t *ref_order_hin for (uint32_t i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) { uint32_t ref_hint = ref_order_hint[i]; if (radv_enc_av1_get_relative_dist(order_hint_bits, ref_hint, forward_hint) < 0) { - if (second_forward_idx < 0 || radv_enc_av1_get_relative_dist(order_hint_bits, ref_hint, second_forward_hint) > 0) { + if (second_forward_idx < 0 || + radv_enc_av1_get_relative_dist(order_hint_bits, ref_hint, second_forward_hint) > 0) { second_forward_idx = i; second_forward_hint = ref_hint; } @@ -704,8 +702,7 @@ radv_enc_av1_skip_mode_allowed(uint32_t order_hint_bits, uint32_t *ref_order_hin } static void -radv_enc_spec_misc_av1(struct radv_cmd_buffer *cmd_buffer, - const struct VkVideoEncodeInfoKHR *enc_info) +radv_enc_spec_misc_av1(struct radv_cmd_buffer *cmd_buffer, const struct VkVideoEncodeInfoKHR *enc_info) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -723,7 +720,8 @@ radv_enc_spec_misc_av1(struct radv_cmd_buffer *cmd_buffer, if (pic->flags.force_integer_mv) precision = RENCODE_AV1_MV_PRECISION_FORCE_INTEGER_MV; - vid->skip_mode_allowed = seq->flags.enable_order_hint && + vid->skip_mode_allowed = + seq->flags.enable_order_hint && av1_picture_info->predictionMode >= VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_UNIDIRECTIONAL_COMPOUND_KHR; if (vid->skip_mode_allowed) { @@ -736,8 +734,8 @@ radv_enc_spec_misc_av1(struct radv_cmd_buffer *cmd_buffer, vid->disallow_skip_mode = !vid->skip_mode_allowed; /* Skip mode frames must match reference frames */ if (vid->skip_mode_allowed) { - vid->disallow_skip_mode = !pic->flags.skip_mode_present || - skip_frames[0] != 0 || av1_picture_info->referenceNameSlotIndices[skip_frames[1]] == -1; + vid->disallow_skip_mode = !pic->flags.skip_mode_present || skip_frames[0] != 0 || + av1_picture_info->referenceNameSlotIndices[skip_frames[1]] == -1; } } @@ -880,9 +878,8 @@ radv_enc_latency(struct radv_cmd_buffer *cmd_buffer, VkVideoEncodeTuningModeKHR { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - if (tuning_mode == VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR - || tuning_mode == VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR) - { + if (tuning_mode == VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR || + tuning_mode == VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR) { RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.enc_latency); RADEON_ENC_CS(1000); RADEON_ENC_END(); @@ -1091,8 +1088,7 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf } static unsigned int -radv_enc_hevc_st_ref_pic_set(struct radv_cmd_buffer *cmd_buffer, - const StdVideoH265SequenceParameterSet *sps, +radv_enc_hevc_st_ref_pic_set(struct radv_cmd_buffer *cmd_buffer, const StdVideoH265SequenceParameterSet *sps, const StdVideoH265ShortTermRefPicSet *rps) { const StdVideoH265ShortTermRefPicSet *ref_rps; @@ -1230,9 +1226,7 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco radv_enc_code_fixed_bits(cmd_buffer, pic->PicOrderCntVal % (1 << max_poc_bits), max_poc_bits); radv_enc_code_fixed_bits(cmd_buffer, pic->flags.short_term_ref_pic_set_sps_flag, 0x1); if (!pic->flags.short_term_ref_pic_set_sps_flag) { - num_pic_total_curr = radv_enc_hevc_st_ref_pic_set(cmd_buffer, - sps, - pic->pShortTermRefPicSet); + num_pic_total_curr = radv_enc_hevc_st_ref_pic_set(cmd_buffer, sps, pic->pShortTermRefPicSet); } else if (sps->num_short_term_ref_pic_sets > 1) { radv_enc_code_fixed_bits(cmd_buffer, pic->short_term_ref_pic_set_idx, util_logbase2_ceil(sps->num_short_term_ref_pic_sets)); @@ -1246,7 +1240,8 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco for (unsigned i = 0; i < lt->num_long_term_sps + lt->num_long_term_pics; i++) { if (i < lt->num_long_term_sps) { if (sps->num_long_term_ref_pics_sps > 1) - radv_enc_code_fixed_bits(cmd_buffer, lt->lt_idx_sps[i], util_logbase2_ceil(sps->num_long_term_ref_pics_sps)); + radv_enc_code_fixed_bits(cmd_buffer, lt->lt_idx_sps[i], + util_logbase2_ceil(sps->num_long_term_ref_pics_sps)); } else { radv_enc_code_fixed_bits(cmd_buffer, lt->poc_lsb_lt[i], sps->log2_max_pic_order_cnt_lsb_minus4 + 4); radv_enc_code_fixed_bits(cmd_buffer, lt->used_by_curr_pic_lt_flag & (1 << i), 1); @@ -1284,14 +1279,16 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco if (pps->flags.lists_modification_present_flag && num_pic_total_curr > 1) { const StdVideoEncodeH265ReferenceListsInfo *rl = pic->pRefLists; unsigned num_pic_bits = util_logbase2_ceil(num_pic_total_curr); - unsigned num_ref_l0_minus1 = slice->flags.num_ref_idx_active_override_flag ? - rl->num_ref_idx_l0_active_minus1 : pps->num_ref_idx_l0_default_active_minus1; + unsigned num_ref_l0_minus1 = slice->flags.num_ref_idx_active_override_flag + ? rl->num_ref_idx_l0_active_minus1 + : pps->num_ref_idx_l0_default_active_minus1; radv_enc_code_fixed_bits(cmd_buffer, rl->flags.ref_pic_list_modification_flag_l0, 1); for (unsigned i = 0; i <= num_ref_l0_minus1; i++) radv_enc_code_fixed_bits(cmd_buffer, rl->list_entry_l0[i], num_pic_bits); if (pic->pic_type == STD_VIDEO_H265_PICTURE_TYPE_B) { - unsigned num_ref_l1_minus1 = slice->flags.num_ref_idx_active_override_flag ? - rl->num_ref_idx_l1_active_minus1 : pps->num_ref_idx_l1_default_active_minus1; + unsigned num_ref_l1_minus1 = slice->flags.num_ref_idx_active_override_flag + ? rl->num_ref_idx_l1_active_minus1 + : pps->num_ref_idx_l1_default_active_minus1; radv_enc_code_fixed_bits(cmd_buffer, rl->flags.ref_pic_list_modification_flag_l1, 1); for (unsigned i = 0; i <= num_ref_l1_minus1; i++) radv_enc_code_fixed_bits(cmd_buffer, rl->list_entry_l1[i], num_pic_bits); @@ -1372,10 +1369,7 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco } static void -dpb_image_sizes(struct radv_image *image, - uint32_t *luma_pitch, - uint32_t *luma_size, - uint32_t *chroma_size, +dpb_image_sizes(struct radv_image *image, uint32_t *luma_pitch, uint32_t *luma_size, uint32_t *chroma_size, uint32_t *colloc_bytes) { uint32_t rec_alignment = 64; @@ -1454,8 +1448,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf unsigned colloc_buffer_offset = 0; uint32_t sdb_frame_offset = offset; - if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR && - pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3) { + if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR && pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3) { colloc_buffer_offset = offset; offset += colloc_bytes; } else if (is_av1) @@ -1609,8 +1602,9 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in RADEON_ENC_CS(fcb_va >> 32); RADEON_ENC_CS(fcb_va & 0xffffffff); RADEON_ENC_CS(RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME); // colloc/cdf offset - RADEON_ENC_CS(RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME + RENCODE_AV1_FRAME_CONTEXT_CDF_TABLE_SIZE); // cdef offset - RADEON_ENC_CS(0); // metadata offset + RADEON_ENC_CS(RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME + + RENCODE_AV1_FRAME_CONTEXT_CDF_TABLE_SIZE); // cdef offset + RADEON_ENC_CS(0); // metadata offset } // pre-encode @@ -1709,8 +1703,8 @@ radv_enc_rc_per_pic(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoK } } - uint32_t cmd = pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5 ? - pdev->vcn_enc_cmds.rc_per_pic : pdev->vcn_enc_cmds.rc_per_pic_ex; + uint32_t cmd = + pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5 ? pdev->vcn_enc_cmds.rc_per_pic : pdev->vcn_enc_cmds.rc_per_pic_ex; RADEON_ENC_BEGIN(cmd); RADEON_ENC_CS(qp); // qp_i @@ -1793,9 +1787,8 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR * slot_idx = av1_picture_info->referenceNameSlotIndices[0]; break; } - radv_enc_layer_select(cmd_buffer, MIN2(av1_pic->pExtensionHeader ? - av1_pic->pExtensionHeader->temporal_id : 0, - max_layers)); + radv_enc_layer_select(cmd_buffer, + MIN2(av1_pic->pExtensionHeader ? av1_pic->pExtensionHeader->temporal_id : 0, max_layers)); } else { assert(0); return; @@ -1813,7 +1806,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR * RADEON_ENC_CS(src_img->planes[0].surface.u.gfx9.swizzle_mode); // swizzle mode if (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_5) - RADEON_ENC_CS(slot_idx); // ref0_idx + RADEON_ENC_CS(slot_idx); // ref0_idx if (enc_info->pSetupReferenceSlot) RADEON_ENC_CS(enc_info->pSetupReferenceSlot->slotIndex); // reconstructed picture index else @@ -1822,8 +1815,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR * } static void -radv_enc_params_h264(struct radv_cmd_buffer *cmd_buffer, - const VkVideoEncodeInfoKHR *enc_info) +radv_enc_params_h264(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *enc_info) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -1841,16 +1833,13 @@ radv_enc_params_h264(struct radv_cmd_buffer *cmd_buffer, switch (h264_pic->primary_pic_type) { case STD_VIDEO_H264_PICTURE_TYPE_P: slot_idx_0 = enc_info->pReferenceSlots[0].slotIndex; - slot_info_0 = vk_find_struct_const(enc_info->pReferenceSlots[0].pNext, - VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR); + slot_info_0 = vk_find_struct_const(enc_info->pReferenceSlots[0].pNext, VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR); break; case STD_VIDEO_H264_PICTURE_TYPE_B: slot_idx_0 = enc_info->pReferenceSlots[0].slotIndex; slot_idx_1 = enc_info->pReferenceSlots[1].slotIndex; - slot_info_0 = vk_find_struct_const(enc_info->pReferenceSlots[0].pNext, - VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR); - slot_info_1 = vk_find_struct_const(enc_info->pReferenceSlots[1].pNext, - VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR); + slot_info_0 = vk_find_struct_const(enc_info->pReferenceSlots[0].pNext, VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR); + slot_info_1 = vk_find_struct_const(enc_info->pReferenceSlots[1].pNext, VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR); break; default: break; @@ -1873,27 +1862,27 @@ radv_enc_params_h264(struct radv_cmd_buffer *cmd_buffer, RADEON_ENC_CS(RENCODE_H264_PICTURE_STRUCTURE_FRAME); RADEON_ENC_CS(slot_info_0->pStdReferenceInfo->PicOrderCnt); } else { - RADEON_ENC_CS(0); // l0 ref pic0 pic_type - RADEON_ENC_CS(0); // l0 ref pic0 is long term - RADEON_ENC_CS(0); // l0 ref pic0 picture structure - RADEON_ENC_CS(0); // l0 ref pic0 pic order cnt + RADEON_ENC_CS(0); // l0 ref pic0 pic_type + RADEON_ENC_CS(0); // l0 ref pic0 is long term + RADEON_ENC_CS(0); // l0 ref pic0 picture structure + RADEON_ENC_CS(0); // l0 ref pic0 pic order cnt } - RADEON_ENC_CS(0xffffffff); // l0 ref pic1 index - RADEON_ENC_CS(0); // l0 ref pic1 pic_type - RADEON_ENC_CS(0); // l0 ref pic1 is long term - RADEON_ENC_CS(0); // l0 ref pic1 picture structure - RADEON_ENC_CS(0); // l0 ref pic1 pic order cnt - RADEON_ENC_CS(slot_idx_1); // l1 ref pic0 index + RADEON_ENC_CS(0xffffffff); // l0 ref pic1 index + RADEON_ENC_CS(0); // l0 ref pic1 pic_type + RADEON_ENC_CS(0); // l0 ref pic1 is long term + RADEON_ENC_CS(0); // l0 ref pic1 picture structure + RADEON_ENC_CS(0); // l0 ref pic1 pic order cnt + RADEON_ENC_CS(slot_idx_1); // l1 ref pic0 index if (slot_info_1) { RADEON_ENC_CS(radv_enc_h264_pic_type(slot_info_1->pStdReferenceInfo->primary_pic_type)); RADEON_ENC_CS(slot_info_1->pStdReferenceInfo->flags.used_for_long_term_reference); RADEON_ENC_CS(RENCODE_H264_PICTURE_STRUCTURE_FRAME); RADEON_ENC_CS(slot_info_1->pStdReferenceInfo->PicOrderCnt); } else { - RADEON_ENC_CS(0); // l1 ref pic0 pic_type - RADEON_ENC_CS(0); // l1 ref pic0 is long term - RADEON_ENC_CS(0); // l1 ref pic0 picture structure - RADEON_ENC_CS(0); // l1 ref pic0 pic order cnt + RADEON_ENC_CS(0); // l1 ref pic0 pic_type + RADEON_ENC_CS(0); // l1 ref pic0 is long term + RADEON_ENC_CS(0); // l1 ref pic0 picture structure + RADEON_ENC_CS(0); // l1 ref pic0 pic order cnt } RADEON_ENC_CS(h264_pic->flags.is_reference); // is reference } else { @@ -1903,18 +1892,18 @@ radv_enc_params_h264(struct radv_cmd_buffer *cmd_buffer, RADEON_ENC_CS(h264_pic->flags.is_reference); RADEON_ENC_CS(h264_pic->flags.long_term_reference_flag); RADEON_ENC_CS(RENCODE_H264_INTERLACING_MODE_PROGRESSIVE); - RADEON_ENC_CS(slot_idx_0); // ref_list0[0] + RADEON_ENC_CS(slot_idx_0); // ref_list0[0] for (int i = 1; i < RENCODE_H264_MAX_REFERENCE_LIST_SIZE; i++) RADEON_ENC_CS(0); RADEON_ENC_CS(slot_idx_0 != 0xffffffff ? 1 : 0); // num_active_references_l0 - RADEON_ENC_CS(slot_idx_1); // ref_list1[0] + RADEON_ENC_CS(slot_idx_1); // ref_list1[0] for (int i = 1; i < RENCODE_H264_MAX_REFERENCE_LIST_SIZE; i++) RADEON_ENC_CS(0); RADEON_ENC_CS(slot_idx_1 != 0xffffffff ? 1 : 0); // num_active_references_l1 - RADEON_ENC_CS(0); // lsm_reference_pictures[0].list - RADEON_ENC_CS(0); // lsm_reference_pictures[0].list_index - RADEON_ENC_CS(1); // lsm_reference_pictures[1].list - RADEON_ENC_CS(0); // lsm_reference_pictures[0].list_index + RADEON_ENC_CS(0); // lsm_reference_pictures[0].list + RADEON_ENC_CS(0); // lsm_reference_pictures[0].list_index + RADEON_ENC_CS(1); // lsm_reference_pictures[1].list + RADEON_ENC_CS(0); // lsm_reference_pictures[0].list_index } RADEON_ENC_END(); } @@ -2080,9 +2069,9 @@ radv_enc_output_format(struct radv_cmd_buffer *cmd_buffer) RADEON_ENC_CS(0); // output color volume RADEON_ENC_CS(RENCODE_COLOR_RANGE_STUDIO); // output color range if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5) - RADEON_ENC_CS(0); // output chroma subsampling - RADEON_ENC_CS(0); // output chroma location - RADEON_ENC_CS(color_bit_depth); // output color bit depth + RADEON_ENC_CS(0); // output chroma subsampling + RADEON_ENC_CS(0); // output chroma location + RADEON_ENC_CS(color_bit_depth); // output color bit depth RADEON_ENC_END(); } @@ -2103,8 +2092,7 @@ radv_enc_headers_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf } static void -radv_enc_cdf_default_table(struct radv_cmd_buffer *cmd_buffer, - const VkVideoEncodeInfoKHR *enc_info) +radv_enc_cdf_default_table(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *enc_info) { struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); @@ -2160,7 +2148,7 @@ radv_enc_params_av1(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoK slot_idx_1 = 6; /* ALTREF_FRAME */ break; default: - break; + break; } RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.enc_params_av1); @@ -2194,10 +2182,8 @@ radv_enc_av1_tile_config(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncode /* 2 cols only supported for width > 4096. */ if (w <= 4096 && av1_pic->pTileInfo->TileCols > 1) { vid->tile_config.num_tile_cols = 1; - vid->tile_config.num_tile_rows = - MIN2(av1_pic->pTileInfo->TileRows * av1_pic->pTileInfo->TileCols, sb_h); - vid->tile_config.uniform_tile_spacing = - util_is_power_of_two_or_zero(vid->tile_config.num_tile_rows); + vid->tile_config.num_tile_rows = MIN2(av1_pic->pTileInfo->TileRows * av1_pic->pTileInfo->TileCols, sb_h); + vid->tile_config.uniform_tile_spacing = util_is_power_of_two_or_zero(vid->tile_config.num_tile_rows); } else { vid->tile_config.uniform_tile_spacing = av1_pic->pTileInfo->flags.uniform_tile_spacing_flag; vid->tile_config.num_tile_cols = av1_pic->pTileInfo->TileCols; @@ -2212,15 +2198,15 @@ radv_enc_av1_tile_config(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncode } } vid->tile_config.context_update_tile_id = av1_pic->pTileInfo->context_update_tile_id; - vid->tile_config.context_update_tile_id_mode = vid->tile_config.context_update_tile_id == 0 ? - RENCODE_AV1_CONTEXT_UPDATE_TILE_ID_MODE_DEFAULT : RENCODE_AV1_CONTEXT_UPDATE_TILE_ID_MODE_CUSTOMIZED; + vid->tile_config.context_update_tile_id_mode = vid->tile_config.context_update_tile_id == 0 + ? RENCODE_AV1_CONTEXT_UPDATE_TILE_ID_MODE_DEFAULT + : RENCODE_AV1_CONTEXT_UPDATE_TILE_ID_MODE_CUSTOMIZED; } else { vid->tile_config.num_tile_cols = w > 4096 ? 2 : 1; uint32_t max_tile_width = DIV_ROUND_UP(w, vid->tile_config.num_tile_cols); uint32_t max_tile_height = (4096 * 2304) / max_tile_width; vid->tile_config.num_tile_rows = DIV_ROUND_UP(h, max_tile_height); - vid->tile_config.uniform_tile_spacing = - util_is_power_of_two_or_zero(vid->tile_config.num_tile_rows); + vid->tile_config.uniform_tile_spacing = util_is_power_of_two_or_zero(vid->tile_config.num_tile_rows); vid->tile_config.context_update_tile_id = 0; vid->tile_config.context_update_tile_id_mode = RENCODE_AV1_CONTEXT_UPDATE_TILE_ID_MODE_DEFAULT; } @@ -2298,7 +2284,7 @@ radv_enc_av1_obu_header(struct radv_cmd_buffer *cmd_buffer, uint32_t obu_type, if (ext_header) { radv_enc_code_fixed_bits(cmd_buffer, ext_header->temporal_id, 3); radv_enc_code_fixed_bits(cmd_buffer, ext_header->spatial_id, 2); - radv_enc_code_fixed_bits(cmd_buffer, 0, 3); /* reserved 3 bits */ + radv_enc_code_fixed_bits(cmd_buffer, 0, 3); /* reserved 3 bits */ } } @@ -2314,7 +2300,8 @@ static unsigned radv_enc_av1_tile_log2(unsigned blk_size, unsigned target) { unsigned k; - for (k = 0; (blk_size << k) < target; k++); + for (k = 0; (blk_size << k) < target; k++) + ; return k; } @@ -2331,8 +2318,8 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn const StdVideoAV1SequenceHeader *seq = ¶ms->vk.av1_enc.seq_hdr.base; const StdVideoEncodeAV1ExtensionHeader *ext_header = av1_picture_info->generateObuExtensionHeader ? av1_pic->pExtensionHeader : NULL; - bool frame_is_intra = av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_KEY || - av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY; + bool frame_is_intra = + av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_KEY || av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY; bool error_resilient_mode = false; radv_enc_reset(cmd_buffer); @@ -2340,8 +2327,7 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.bitstream_instruction_av1); /* OBU_FRAME_HEADER */ - radv_enc_av1_bs_instruction_type(cmd_buffer, - RENCODE_AV1_BITSTREAM_INSTRUCTION_OBU_START, + radv_enc_av1_bs_instruction_type(cmd_buffer, RENCODE_AV1_BITSTREAM_INSTRUCTION_OBU_START, RENCODE_OBU_START_TYPE_FRAME_HEADER); radv_enc_av1_bs_instruction_type(cmd_buffer, RENCODE_AV1_BITSTREAM_INSTRUCTION_COPY, 0); @@ -2378,7 +2364,7 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn if (seq->flags.reduced_still_picture_header || av1_pic->flags.allow_screen_content_tools) { /* allow_screen_content_tools */ allow_screen_content_tools = /*av1_pic->av1_spec_misc.palette_mode_enable ||*/ - av1_pic->flags.force_integer_mv; + av1_pic->flags.force_integer_mv; radv_enc_code_fixed_bits(cmd_buffer, allow_screen_content_tools ? 1 : 0, 1); } @@ -2389,8 +2375,7 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn if (seq->flags.frame_id_numbers_present_flag) /* current_frame_id */ radv_enc_code_fixed_bits(cmd_buffer, av1_pic->current_frame_id, - seq->delta_frame_id_length_minus_2 + 2 + - seq->additional_frame_id_length_minus_1 + 1); + seq->delta_frame_id_length_minus_2 + 2 + seq->additional_frame_id_length_minus_1 + 1); bool frame_size_override = false; if (av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_SWITCH) @@ -2414,8 +2399,8 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn /* refresh_frame_flags */ radv_enc_code_fixed_bits(cmd_buffer, av1_pic->refresh_frame_flags, 8); - if ((!frame_is_intra || av1_pic->refresh_frame_flags != 0xff) && - error_resilient_mode && seq->flags.enable_order_hint) { + if ((!frame_is_intra || av1_pic->refresh_frame_flags != 0xff) && error_resilient_mode && + seq->flags.enable_order_hint) { for (unsigned i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) /* ref_order_hint */ radv_enc_code_fixed_bits(cmd_buffer, av1_pic->ref_order_hint[i], seq->order_hint_bits_minus_1 + 1); @@ -2441,8 +2426,7 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn /* ref_frame_idx */ radv_enc_code_fixed_bits(cmd_buffer, av1_pic->ref_frame_idx[i], 3); if (seq->flags.frame_id_numbers_present_flag) - radv_enc_code_fixed_bits(cmd_buffer, - av1_pic->delta_frame_id_minus_1[i], + radv_enc_code_fixed_bits(cmd_buffer, av1_pic->delta_frame_id_minus_1[i], seq->delta_frame_id_length_minus_2 + 2); } @@ -2594,15 +2578,15 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn radv_enc_code_fixed_bits(cmd_buffer, 0, 1); if (!frame_is_intra) - for (uint32_t ref = STD_VIDEO_AV1_REFERENCE_NAME_LAST_FRAME; ref <= STD_VIDEO_AV1_REFERENCE_NAME_ALTREF_FRAME; ref++) + for (uint32_t ref = STD_VIDEO_AV1_REFERENCE_NAME_LAST_FRAME; ref <= STD_VIDEO_AV1_REFERENCE_NAME_ALTREF_FRAME; + ref++) /* is_global */ radv_enc_code_fixed_bits(cmd_buffer, 0, 1); radv_enc_av1_bs_instruction_type(cmd_buffer, RENCODE_AV1_BITSTREAM_INSTRUCTION_OBU_END, 0); /* OBU_TILE_GROUP */ - radv_enc_av1_bs_instruction_type(cmd_buffer, - RENCODE_AV1_BITSTREAM_INSTRUCTION_OBU_START, + radv_enc_av1_bs_instruction_type(cmd_buffer, RENCODE_AV1_BITSTREAM_INSTRUCTION_OBU_START, RENCODE_OBU_START_TYPE_TILE_GROUP); radv_enc_av1_bs_instruction_type(cmd_buffer, RENCODE_AV1_BITSTREAM_INSTRUCTION_COPY, 0); radv_enc_av1_obu_header(cmd_buffer, RENCODE_OBU_TYPE_TILE_GROUP, ext_header); @@ -2616,8 +2600,7 @@ radv_enc_av1_obu_instruction(struct radv_cmd_buffer *cmd_buffer, const VkVideoEn } static void -radv_enc_headers_av1(struct radv_cmd_buffer *cmd_buffer, - const VkVideoEncodeInfoKHR *enc_info) +radv_enc_headers_av1(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *enc_info) { radv_enc_av1_obu_instruction(cmd_buffer, enc_info); radv_enc_params(cmd_buffer, enc_info); @@ -2847,8 +2830,8 @@ radv_video_enc_control_video_coding(struct radv_cmd_buffer *cmd_buffer, const Vk (VkVideoEncodeH265RateControlInfoKHR *)vk_find_struct_const(rate_control->pNext, VIDEO_ENCODE_H265_RATE_CONTROL_INFO_KHR); const VkVideoEncodeAV1RateControlInfoKHR *av1_rate_control = - (VkVideoEncodeAV1RateControlInfoKHR *)vk_find_struct_const(rate_control->pNext, - VIDEO_ENCODE_AV1_RATE_CONTROL_INFO_KHR); + (VkVideoEncodeAV1RateControlInfoKHR *)vk_find_struct_const(rate_control->pNext, + VIDEO_ENCODE_AV1_RATE_CONTROL_INFO_KHR); uint32_t rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; @@ -2891,7 +2874,7 @@ radv_video_enc_control_video_coding(struct radv_cmd_buffer *cmd_buffer, const Vk (VkVideoEncodeH265RateControlLayerInfoKHR *)vk_find_struct_const( layer->pNext, VIDEO_ENCODE_H265_RATE_CONTROL_LAYER_INFO_KHR); const VkVideoEncodeAV1RateControlLayerInfoKHR *av1_layer = - (VkVideoEncodeAV1RateControlLayerInfoKHR *)vk_find_struct_const( + (VkVideoEncodeAV1RateControlLayerInfoKHR *)vk_find_struct_const( layer->pNext, VIDEO_ENCODE_AV1_RATE_CONTROL_LAYER_INFO_KHR); uint32_t frame_rate_den, frame_rate_num; vid->rc_layer_init[l].target_bit_rate = layer->averageBitrate; @@ -2901,8 +2884,7 @@ radv_video_enc_control_video_coding(struct radv_cmd_buffer *cmd_buffer, const Vk radv_vcn_enc_invalid_frame_rate(&frame_rate_den, &frame_rate_num); vid->rc_layer_init[l].frame_rate_den = frame_rate_den; vid->rc_layer_init[l].frame_rate_num = frame_rate_num; - vid->rc_layer_init[l].vbv_buffer_size = - (rate_control->virtualBufferSizeInMs / 1000.) * layer->averageBitrate; + vid->rc_layer_init[l].vbv_buffer_size = (rate_control->virtualBufferSizeInMs / 1000.) * layer->averageBitrate; vid->rc_layer_init[l].avg_target_bits_per_picture = radv_vcn_per_frame_integer(layer->averageBitrate, frame_rate_den, frame_rate_num); vid->rc_layer_init[l].peak_bits_per_picture_integer = @@ -2938,8 +2920,10 @@ radv_video_enc_control_video_coding(struct radv_cmd_buffer *cmd_buffer, const Vk vid->rc_per_pic[l].max_qp_p = av1_layer->useMaxQIndex ? av1_layer->maxQIndex.predictiveQIndex : 0; vid->rc_per_pic[l].max_qp_b = av1_layer->useMaxQIndex ? av1_layer->maxQIndex.bipredictiveQIndex : 0; vid->rc_per_pic[l].max_au_size_i = av1_layer->useMaxFrameSize ? av1_layer->maxFrameSize.intraFrameSize : 0; - vid->rc_per_pic[l].max_au_size_p = av1_layer->useMaxFrameSize ? av1_layer->maxFrameSize.predictiveFrameSize : 0; - vid->rc_per_pic[l].max_au_size_b = av1_layer->useMaxFrameSize ? av1_layer->maxFrameSize.bipredictiveFrameSize : 0; + vid->rc_per_pic[l].max_au_size_p = + av1_layer->useMaxFrameSize ? av1_layer->maxFrameSize.predictiveFrameSize : 0; + vid->rc_per_pic[l].max_au_size_b = + av1_layer->useMaxFrameSize ? av1_layer->maxFrameSize.bipredictiveFrameSize : 0; } vid->rc_per_pic[l].enabled_filler_data = 1; @@ -3189,7 +3173,7 @@ radv_GetEncodedVideoSessionParametersKHR(VkDevice device, break; } case VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR: { - struct vk_video_av1_seq_hdr* seq_hdr = &templ->vk.av1_enc.seq_hdr; + struct vk_video_av1_seq_hdr *seq_hdr = &templ->vk.av1_enc.seq_hdr; if (!seq_hdr) return VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR; vk_video_encode_av1_seq_hdr(&templ->vk, size_limit, &total_size, pData); @@ -3224,7 +3208,8 @@ radv_video_get_encode_session_memory_requirements(struct radv_device *device, st } if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) { - vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { m->memoryBindIndex = RADV_BIND_ENCODE_AV1_CDF_STORE; m->memoryRequirements.size = VCN_ENC_AV1_DEFAULT_CDF_SIZE; if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_5) @@ -3234,16 +3219,14 @@ radv_video_get_encode_session_memory_requirements(struct radv_device *device, st for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++) if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) m->memoryRequirements.memoryTypeBits |= (1 << i); - } } return vk_outarray_status(&out); } -void radv_video_get_enc_dpb_image(struct radv_device *device, - const struct VkVideoProfileListInfoKHR *profile_list, - struct radv_image *image, - struct radv_image_create_info *create_info) +void +radv_video_get_enc_dpb_image(struct radv_device *device, const struct VkVideoProfileListInfoKHR *profile_list, + struct radv_image *image, struct radv_image_create_info *create_info) { const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t luma_pitch, luma_size, chroma_size, colloc_bytes; @@ -3288,13 +3271,13 @@ void radv_video_get_enc_dpb_image(struct radv_device *device, image->alignment = ENC_ALIGNMENT; } -bool radv_video_encode_av1_supported(const struct radv_physical_device *pdev) +bool +radv_video_encode_av1_supported(const struct radv_physical_device *pdev) { if (pdev->info.vcn_ip_version >= VCN_5_0_0) { return true; } else if (pdev->info.vcn_ip_version >= VCN_4_0_0) { - return pdev->info.vcn_ip_version != VCN_4_0_3 && - pdev->info.vcn_enc_minor_version >= 20; + return pdev->info.vcn_ip_version != VCN_4_0_3 && pdev->info.vcn_enc_minor_version >= 20; } else { return false; } diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 9fcabebb91e..e115cf9f165 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -98,8 +98,7 @@ radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) } static void -radv_amdgpu_log_va_op(struct radv_amdgpu_winsys *ws, - struct radv_amdgpu_winsys_bo *bo, uint64_t offset, uint64_t size, +radv_amdgpu_log_va_op(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, uint64_t offset, uint64_t size, uint64_t virtual_va) { struct radv_amdgpu_winsys_bo_log *bo_log = NULL; @@ -471,7 +470,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned * VRAM. */ if (!(ws->perftest & RADV_PERFTEST_NO_GTT_SPILL)) - request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; } if (initial_domain & RADEON_DOMAIN_GTT) @@ -1128,15 +1127,13 @@ radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file) u_rwlock_rdlock(&ws->log_bo_list_lock); LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) { if (bo_log->virtual_mapping) { - fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, mapped_to=%.16llx\n", - (long long)bo_log->timestamp, + fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, mapped_to=%.16llx\n", (long long)bo_log->timestamp, (long long)radv_amdgpu_canonicalize_va(bo_log->va), (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), (long long)radv_amdgpu_canonicalize_va(bo_log->mapped_va)); } else { fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", - (long long)bo_log->timestamp, - (long long)radv_amdgpu_canonicalize_va(bo_log->va), + (long long)bo_log->timestamp, (long long)radv_amdgpu_canonicalize_va(bo_log->va), (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual); } diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index f9a627b5de6..d0d3073f922 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -241,7 +241,8 @@ radv_amdgpu_cs_domain(const struct radeon_winsys *_ws) * If there is no PCIe info, assume there is enough bandwidth. */ const uint32_t bandwidth_mbps_threshold = 8 * 0.985 * 1024; - bool enough_bandwidth = !ws->info.has_pcie_bandwidth_info || ws->info.pcie_bandwidth_mbps >= bandwidth_mbps_threshold; + bool enough_bandwidth = + !ws->info.has_pcie_bandwidth_info || ws->info.pcie_bandwidth_mbps >= bandwidth_mbps_threshold; bool use_sam = (enough_vram && enough_bandwidth && ws->info.has_dedicated_vram && !(ws->perftest & RADV_PERFTEST_NO_SAM)) || diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index e27eb6dc0c4..5149d6dea41 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -211,8 +211,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, ++ws->refcount; } - if (is_virtio && - (perftest_flags & (RADV_PERFTEST_BO_LIST | RADV_PERFTEST_LOCAL_BOS))) { + if (is_virtio && (perftest_flags & (RADV_PERFTEST_BO_LIST | RADV_PERFTEST_LOCAL_BOS))) { /* virtio doesn't support VM_ALWAYS_VALID, so disable options that requires it. */ fprintf(stderr, "localbos and bolist options are not supported values for RADV_PERFTEST with virtio.\n"); return VK_ERROR_INITIALIZATION_FAILED; diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c index 6f8c93bc4d9..a930f9a4519 100644 --- a/src/amd/vulkan/winsys/null/radv_null_winsys.c +++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c @@ -150,8 +150,7 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_i gpu_info->family == CHIP_VEGA20 || (gpu_info->family >= CHIP_MI100 && gpu_info->family != CHIP_NAVI10 && gpu_info->family != CHIP_GFX1013); - gpu_info->has_image_bvh_intersect_ray = gpu_info->gfx_level >= GFX10_3 || - gpu_info->family == CHIP_GFX1013; + gpu_info->has_image_bvh_intersect_ray = gpu_info->gfx_level >= GFX10_3 || gpu_info->family == CHIP_GFX1013; gpu_info->address32_hi = gpu_info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;