freedreno: Add compute constlen quirk for X1-85

This GPU seems to have half the compute constlen of other a7xx GPUs, because there are sporadic hangs in dEQP-VK.robustness.robustness2.* and other tests unless we limit the constlen. This does *not* happen on SM8550-HDK, so it does seem to be specific to the GPU in x1e laptops. Fixes: b0d22461b9 ("freedreno: Enable the X1-85") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31764> (cherry picked from commit 3c8190e8b2)
2026-05-07 09:18:04 +02:00 · 2024-10-17 18:30:05 -04:00 · 2024-10-17 18:30:05 -04:00 · d48aac9e19
commit d48aac9e19
parent 655cdbd649
4 changed files with 35 additions and 4 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -1214,7 +1214,7 @@
        "description": "freedreno: Add compute constlen quirk for X1-85",
        "nominated": true,
        "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "b0d22461b945de597f39062a53e4f08d4b8559a2",
        "notes": null
--- a/src/freedreno/common/freedreno_dev_info.h
+++ b/src/freedreno/common/freedreno_dev_info.h
@ -276,6 +276,9 @@ struct fd_dev_info {
      bool enable_tp_ubwc_flag_hint;

      bool storage_8bit;
+
+      /* Whether only 256 vec4 constants are available for compute */
+      bool compute_constlen_quirk;
   } a7xx;
 };

--- a/src/freedreno/common/freedreno_devices.py
+++ b/src/freedreno/common/freedreno_devices.py
@ -863,6 +863,18 @@ a7xx_740v3 = A7XXProps(
        enable_tp_ubwc_flag_hint = True,
    )

+a7xx_x1_85 = A7XXProps(
+        stsc_duplication_quirk = True,
+        has_event_write_sample_count = True,
+        ubwc_unorm_snorm_int_compatible = True,
+        supports_ibo_ubwc = True,
+        fs_must_have_non_zero_constlen_quirk = True,
+        # Most devices with a740 have blob v6xx which doesn't have
+        # this hint set. Match them for better compatibility by default.
+        enable_tp_ubwc_flag_hint = False,
+        compute_constlen_quirk = True,
+    )
+
 a7xx_750 = A7XXProps(
        has_event_write_sample_count = True,
        load_inline_uniforms_via_preamble_ldgk = True,
@ -1053,7 +1065,6 @@ add_gpus([
        GPUId(740), # Deprecated, used for dev kernels.
        GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data
        GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
-        GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"),
    ], A6xxGPUInfo(
        CHIP.A7XX,
        [a7xx_base, a7xx_740],
@ -1068,6 +1079,22 @@ add_gpus([
        raw_magic_regs = a740_raw_magic_regs,
    ))

+add_gpus([
+        GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"),
+    ], A6xxGPUInfo(
+        CHIP.A7XX,
+        [a7xx_base, a7xx_x1_85],
+        num_ccu = 6,
+        tile_align_w = 96,
+        tile_align_h = 32,
+        num_vsc_pipes = 32,
+        cs_shared_mem_size = 32 * 1024,
+        wave_granularity = 2,
+        fibers_per_sp = 128 * 2 * 16,
+        magic_regs = a740_magic_regs,
+        raw_magic_regs = a740_raw_magic_regs,
+    ))
+
 # Values from blob v676.0
 add_gpus([
        GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2)
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@ -190,11 +190,12 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,

      /* Compute shaders don't share a const file with the FS. Instead they
       * have their own file, which is smaller than the FS one. On a7xx the size
-       * was doubled.
+       * was doubled, although this doesn't work on X1-85.
       *
       * TODO: is this true on earlier gen's?
       */
-      compiler->max_const_compute = compiler->gen >= 7 ? 512 : 256;
+      compiler->max_const_compute =
+         (compiler->gen >= 7 && !dev_info->a7xx.compute_constlen_quirk) ? 512 : 256;

      /* TODO: implement clip+cull distances on earlier gen's */
      compiler->has_clip_cull = true;