From 1e8d367537f55432054b8686452fd6604a47d2d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 27 Nov 2025 15:08:11 +0100 Subject: [PATCH] amd: add and use ac_cu_info::has_vtx_format_alpha_adjust_bug Part-of: --- src/amd/common/ac_gpu_info.c | 1 + src/amd/common/ac_gpu_info.h | 2 ++ src/amd/common/ac_shader_util.c | 9 ++++----- src/amd/common/ac_shader_util.h | 4 ++-- .../instruction_selection/aco_select_nir_intrinsics.cpp | 5 ++--- .../instruction_selection/aco_select_vs_prolog.cpp | 6 ++---- src/amd/llvm/ac_llvm_build.c | 3 ++- src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c | 4 ++-- src/amd/vulkan/radv_cmd_buffer.c | 4 ++-- src/amd/vulkan/radv_pipeline_graphics.c | 6 +++--- 10 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 217b5336b40..b5bc3b8d148 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -322,6 +322,7 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_ cu_info->has_gfx6_mrt_export_bug = info->family == CHIP_TAHITI || info->family == CHIP_PITCAIRN || info->family == CHIP_VERDE; + cu_info->has_vtx_format_alpha_adjust_bug = info->gfx_level <= GFX8 && info->family != CHIP_STONEY; } enum ac_query_gpu_info_result diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 1db6db3d1c5..e8af2c2d0a5 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -68,6 +68,8 @@ struct ac_cu_info { bool has_image_bvh_intersect_ray : 1; /* Some GFX6 GPUs have a bug where it only looks at the x writemask component. */ bool has_gfx6_mrt_export_bug : 1; + /* Pre-GFX9: A bug where the alpha component of 10_10_10_2 formats is always unsigned.*/ + bool has_vtx_format_alpha_adjust_bug : 1; }; struct radeon_info { diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index c71227feb39..cfb9ddb0d32 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -482,20 +482,19 @@ static const struct ac_vtx_format_info vb_formats_gfx10[] = {VB_FORMATS}; static const struct ac_vtx_format_info vb_formats_gfx11[] = {VB_FORMATS}; const struct ac_vtx_format_info * -ac_get_vtx_format_info_table(enum amd_gfx_level level, enum radeon_family family) +ac_get_vtx_format_info_table(enum amd_gfx_level level, bool has_alpha_adjust_bug) { if (level >= GFX11) return vb_formats_gfx11; else if (level >= GFX10) return vb_formats_gfx10; - bool alpha_adjust = level <= GFX8 && family != CHIP_STONEY; - return alpha_adjust ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6; + return has_alpha_adjust_bug ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6; } const struct ac_vtx_format_info * -ac_get_vtx_format_info(enum amd_gfx_level level, enum radeon_family family, enum pipe_format fmt) +ac_get_vtx_format_info(enum amd_gfx_level level, bool has_alpha_adjust_bug, enum pipe_format fmt) { - return &ac_get_vtx_format_info_table(level, family)[fmt]; + return &ac_get_vtx_format_info_table(level, has_alpha_adjust_bug)[fmt]; } /** diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index e3a06eb2736..0a4da7518f5 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -247,10 +247,10 @@ uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum amd_gfx_level gfx_level); unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsigned nfmt); const struct ac_vtx_format_info *ac_get_vtx_format_info_table(enum amd_gfx_level level, - enum radeon_family family); + bool has_alpha_adjust_bug); const struct ac_vtx_format_info *ac_get_vtx_format_info(enum amd_gfx_level level, - enum radeon_family family, + bool has_alpha_adjust_bug, enum pipe_format fmt); unsigned ac_get_safe_fetch_size(const enum amd_gfx_level gfx_level, const struct ac_vtx_format_info* vtx_info, diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index f62ff5010c3..27fd2067c5c 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -1379,8 +1379,7 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_neede /* Determine number of fetched components. * Note, ACO IR works with GFX6-8 nfmt + dfmt fields, these are later converted for GFX10+. */ - const struct ac_vtx_format_info* vtx_info = - ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, info.format); + const struct ac_vtx_format_info* vtx_info = ac_get_vtx_format_info(GFX8, true, info.format); /* The number of channels in the format determines the memory range. */ const unsigned max_components = vtx_info->num_channels; /* Calculate maximum number of components loaded according to alignment. */ @@ -2754,7 +2753,7 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) if (intrin->intrinsic == nir_intrinsic_load_typed_buffer_amd) { const pipe_format format = nir_intrinsic_format(intrin); const struct ac_vtx_format_info* vtx_info = - ac_get_vtx_format_info(ctx->program->gfx_level, ctx->program->family, format); + ac_get_vtx_format_info(ctx->program->gfx_level, true, format); const struct util_format_description* f = util_format_description(format); /* Avoid splitting: diff --git a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp index f272026f618..59eba0178c6 100644 --- a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp @@ -341,8 +341,7 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index, bool is_last_attribute_large(const struct aco_vs_prolog_info* pinfo) { - const struct ac_vtx_format_info* vtx_info_table = - ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10); + const struct ac_vtx_format_info* vtx_info_table = ac_get_vtx_format_info_table(GFX8, true); unsigned last_attribute = pinfo->num_attributes - 1; if ((pinfo->misaligned_mask & (1u << last_attribute))) { @@ -436,8 +435,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh Operand::c32((unsigned)options->address32_hi)); } - const struct ac_vtx_format_info* vtx_info_table = - ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10); + const struct ac_vtx_format_info* vtx_info_table = ac_get_vtx_format_info_table(GFX8, true); UnalignedVsAttribLoadState unaligned_state; unaligned_state.max_vgprs = MAX2(84, num_vgprs + 8); diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 8f6871318c0..793c501c2f1 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1139,7 +1139,8 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRe enum gl_access_qualifier access, bool can_speculate) { - const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(ctx->gfx_level, ctx->info->family, format); + const struct ac_vtx_format_info *vtx_info = + ac_get_vtx_format_info(ctx->gfx_level, ctx->info->cu_info.has_vtx_format_alpha_adjust_bug, format); const unsigned max_channels = vtx_info->num_channels; LLVMValueRef voffset_plus_const = LLVMBuildAdd(ctx->builder, base_voffset, LLVMConstInt(ctx->i32, const_offset, 0), ""); diff --git a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c index a2f3ec8f7fe..42e4773e4c8 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c +++ b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c @@ -227,8 +227,8 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs const uint32_t attrib_stride = s->gfx_state->vi.vertex_attribute_strides[location]; const enum pipe_format attrib_format = s->gfx_state->vi.vertex_attribute_formats[location]; const struct util_format_description *f = util_format_description(attrib_format); - const struct ac_vtx_format_info *vtx_info = - ac_get_vtx_format_info(s->gpu_info->gfx_level, s->gpu_info->family, attrib_format); + const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info( + s->gpu_info->gfx_level, s->gpu_info->cu_info.has_vtx_format_alpha_adjust_bug, attrib_format); const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding; const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & BITFIELD_MASK(binding_index)); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8c7110d6bf9..5004d49305a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -9218,8 +9218,8 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD vertex_input.bindings_match_attrib = true; enum amd_gfx_level chip = pdev->info.gfx_level; - enum radeon_family family = pdev->info.family; - const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(chip, family); + bool alpha_adjust = pdev->info.cu_info.has_vtx_format_alpha_adjust_bug; + const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(chip, alpha_adjust); for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription2EXT *attrib = &pVertexAttributeDescriptions[i]; diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index b8a2b24f9ac..5ec00bd66c9 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -748,8 +748,8 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct r if (vs->info.vs.use_per_attribute_vb_descs) { const enum amd_gfx_level gfx_level = pdev->info.gfx_level; - const enum radeon_family family = pdev->info.family; - const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family); + const bool alpha_adjust = pdev->info.cu_info.has_vtx_format_alpha_adjust_bug; + const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, alpha_adjust); dynamic->vertex_input.bindings_match_attrib = true; @@ -2031,7 +2031,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct } const struct ac_vtx_format_info *vtx_info = - ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.family, format); + ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.cu_info.has_vtx_format_alpha_adjust_bug, format); unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size; /* If offset is misaligned, then the buffer offset must be too. Just skip updating