From 1e8d367537f55432054b8686452fd6604a47d2d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daniel@schuermann.dev>
Date: Thu, 27 Nov 2025 15:08:11 +0100
Subject: [PATCH] amd: add and use ac_cu_info::has_vtx_format_alpha_adjust_bug

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
---
 src/amd/common/ac_gpu_info.c                             | 1 +
 src/amd/common/ac_gpu_info.h                             | 2 ++
 src/amd/common/ac_shader_util.c                          | 9 ++++-----
 src/amd/common/ac_shader_util.h                          | 4 ++--
 .../instruction_selection/aco_select_nir_intrinsics.cpp  | 5 ++---
 .../instruction_selection/aco_select_vs_prolog.cpp       | 6 ++----
 src/amd/llvm/ac_llvm_build.c                             | 3 ++-
 src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c            | 4 ++--
 src/amd/vulkan/radv_cmd_buffer.c                         | 4 ++--
 src/amd/vulkan/radv_pipeline_graphics.c                  | 6 +++---
 10 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 217b5336b40..b5bc3b8d148 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -322,6 +322,7 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_
 
    cu_info->has_gfx6_mrt_export_bug =
       info->family == CHIP_TAHITI || info->family == CHIP_PITCAIRN || info->family == CHIP_VERDE;
+   cu_info->has_vtx_format_alpha_adjust_bug = info->gfx_level <= GFX8 && info->family != CHIP_STONEY;
 }
 
 enum ac_query_gpu_info_result
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 1db6db3d1c5..e8af2c2d0a5 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -68,6 +68,8 @@ struct ac_cu_info {
    bool has_image_bvh_intersect_ray : 1;
    /* Some GFX6 GPUs have a bug where it only looks at the x writemask component. */
    bool has_gfx6_mrt_export_bug : 1;
+   /* Pre-GFX9: A bug where the alpha component of 10_10_10_2 formats is always unsigned.*/
+   bool has_vtx_format_alpha_adjust_bug : 1;
 };
 
 struct radeon_info {
diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
index c71227feb39..cfb9ddb0d32 100644
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@@ -482,20 +482,19 @@ static const struct ac_vtx_format_info vb_formats_gfx10[] = {VB_FORMATS};
 static const struct ac_vtx_format_info vb_formats_gfx11[] = {VB_FORMATS};
 
 const struct ac_vtx_format_info *
-ac_get_vtx_format_info_table(enum amd_gfx_level level, enum radeon_family family)
+ac_get_vtx_format_info_table(enum amd_gfx_level level, bool has_alpha_adjust_bug)
 {
    if (level >= GFX11)
       return vb_formats_gfx11;
    else if (level >= GFX10)
       return vb_formats_gfx10;
-   bool alpha_adjust = level <= GFX8 && family != CHIP_STONEY;
-   return alpha_adjust ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6;
+   return has_alpha_adjust_bug ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6;
 }
 
 const struct ac_vtx_format_info *
-ac_get_vtx_format_info(enum amd_gfx_level level, enum radeon_family family, enum pipe_format fmt)
+ac_get_vtx_format_info(enum amd_gfx_level level, bool has_alpha_adjust_bug, enum pipe_format fmt)
 {
-   return &ac_get_vtx_format_info_table(level, family)[fmt];
+   return &ac_get_vtx_format_info_table(level, has_alpha_adjust_bug)[fmt];
 }
 
 /**
diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h
index e3a06eb2736..0a4da7518f5 100644
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@@ -247,10 +247,10 @@ uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum amd_gfx_level gfx_level);
 unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsigned nfmt);
 
 const struct ac_vtx_format_info *ac_get_vtx_format_info_table(enum amd_gfx_level level,
-                                                              enum radeon_family family);
+                                                              bool has_alpha_adjust_bug);
 
 const struct ac_vtx_format_info *ac_get_vtx_format_info(enum amd_gfx_level level,
-                                                        enum radeon_family family,
+                                                        bool has_alpha_adjust_bug,
                                                         enum pipe_format fmt);
 
 unsigned ac_get_safe_fetch_size(const enum amd_gfx_level gfx_level, const struct ac_vtx_format_info* vtx_info,
diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp
index f62ff5010c3..27fd2067c5c 100644
--- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp
+++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp
@@ -1379,8 +1379,7 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_neede
    /* Determine number of fetched components.
     * Note, ACO IR works with GFX6-8 nfmt + dfmt fields, these are later converted for GFX10+.
     */
-   const struct ac_vtx_format_info* vtx_info =
-      ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, info.format);
+   const struct ac_vtx_format_info* vtx_info = ac_get_vtx_format_info(GFX8, true, info.format);
    /* The number of channels in the format determines the memory range. */
    const unsigned max_components = vtx_info->num_channels;
    /* Calculate maximum number of components loaded according to alignment. */
@@ -2754,7 +2753,7 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
    if (intrin->intrinsic == nir_intrinsic_load_typed_buffer_amd) {
       const pipe_format format = nir_intrinsic_format(intrin);
       const struct ac_vtx_format_info* vtx_info =
-         ac_get_vtx_format_info(ctx->program->gfx_level, ctx->program->family, format);
+         ac_get_vtx_format_info(ctx->program->gfx_level, true, format);
       const struct util_format_description* f = util_format_description(format);
 
       /* Avoid splitting:
diff --git a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp
index f272026f618..59eba0178c6 100644
--- a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp
+++ b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp
@@ -341,8 +341,7 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index,
 bool
 is_last_attribute_large(const struct aco_vs_prolog_info* pinfo)
 {
-   const struct ac_vtx_format_info* vtx_info_table =
-      ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10);
+   const struct ac_vtx_format_info* vtx_info_table = ac_get_vtx_format_info_table(GFX8, true);
    unsigned last_attribute = pinfo->num_attributes - 1;
 
    if ((pinfo->misaligned_mask & (1u << last_attribute))) {
@@ -436,8 +435,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
                Operand::c32((unsigned)options->address32_hi));
    }
 
-   const struct ac_vtx_format_info* vtx_info_table =
-      ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10);
+   const struct ac_vtx_format_info* vtx_info_table = ac_get_vtx_format_info_table(GFX8, true);
 
    UnalignedVsAttribLoadState unaligned_state;
    unaligned_state.max_vgprs = MAX2(84, num_vgprs + 8);
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 8f6871318c0..793c501c2f1 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -1139,7 +1139,8 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRe
                                         enum gl_access_qualifier access,
                                         bool can_speculate)
 {
-   const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(ctx->gfx_level, ctx->info->family, format);
+   const struct ac_vtx_format_info *vtx_info =
+      ac_get_vtx_format_info(ctx->gfx_level, ctx->info->cu_info.has_vtx_format_alpha_adjust_bug, format);
    const unsigned max_channels = vtx_info->num_channels;
    LLVMValueRef voffset_plus_const =
       LLVMBuildAdd(ctx->builder, base_voffset, LLVMConstInt(ctx->i32, const_offset, 0), "");
diff --git a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c
index a2f3ec8f7fe..42e4773e4c8 100644
--- a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c
+++ b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c
@@ -227,8 +227,8 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
    const uint32_t attrib_stride = s->gfx_state->vi.vertex_attribute_strides[location];
    const enum pipe_format attrib_format = s->gfx_state->vi.vertex_attribute_formats[location];
    const struct util_format_description *f = util_format_description(attrib_format);
-   const struct ac_vtx_format_info *vtx_info =
-      ac_get_vtx_format_info(s->gpu_info->gfx_level, s->gpu_info->family, attrib_format);
+   const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(
+      s->gpu_info->gfx_level, s->gpu_info->cu_info.has_vtx_format_alpha_adjust_bug, attrib_format);
    const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
    const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & BITFIELD_MASK(binding_index));
 
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8c7110d6bf9..5004d49305a 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -9218,8 +9218,8 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
    vertex_input.bindings_match_attrib = true;
 
    enum amd_gfx_level chip = pdev->info.gfx_level;
-   enum radeon_family family = pdev->info.family;
-   const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(chip, family);
+   bool alpha_adjust = pdev->info.cu_info.has_vtx_format_alpha_adjust_bug;
+   const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(chip, alpha_adjust);
 
    for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) {
       const VkVertexInputAttributeDescription2EXT *attrib = &pVertexAttributeDescriptions[i];
diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c
index b8a2b24f9ac..5ec00bd66c9 100644
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@@ -748,8 +748,8 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct r
 
    if (vs->info.vs.use_per_attribute_vb_descs) {
       const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
-      const enum radeon_family family = pdev->info.family;
-      const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family);
+      const bool alpha_adjust = pdev->info.cu_info.has_vtx_format_alpha_adjust_bug;
+      const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, alpha_adjust);
 
       dynamic->vertex_input.bindings_match_attrib = true;
 
@@ -2031,7 +2031,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
          }
 
          const struct ac_vtx_format_info *vtx_info =
-            ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.family, format);
+            ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.cu_info.has_vtx_format_alpha_adjust_bug, format);
          unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
 
          /* If offset is misaligned, then the buffer offset must be too. Just skip updating