From d48aac9e19b03f8c41a26b88e21fc462a786352e Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 17 Oct 2024 18:30:05 -0400 Subject: [PATCH] freedreno: Add compute constlen quirk for X1-85 This GPU seems to have half the compute constlen of other a7xx GPUs, because there are sporadic hangs in dEQP-VK.robustness.robustness2.* and other tests unless we limit the constlen. This does *not* happen on SM8550-HDK, so it does seem to be specific to the GPU in x1e laptops. Fixes: b0d22461b94 ("freedreno: Enable the X1-85") Part-of: (cherry picked from commit 3c8190e8b221a7996c6c79534e1c20929cf8d9ef) --- .pick_status.json | 2 +- src/freedreno/common/freedreno_dev_info.h | 3 +++ src/freedreno/common/freedreno_devices.py | 29 ++++++++++++++++++++++- src/freedreno/ir3/ir3_compiler.c | 5 ++-- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index ec28a2068e0..b4c04be05ad 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1214,7 +1214,7 @@ "description": "freedreno: Add compute constlen quirk for X1-85", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "b0d22461b945de597f39062a53e4f08d4b8559a2", "notes": null diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 33f30f4e330..c6d1b07e22f 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -276,6 +276,9 @@ struct fd_dev_info { bool enable_tp_ubwc_flag_hint; bool storage_8bit; + + /* Whether only 256 vec4 constants are available for compute */ + bool compute_constlen_quirk; } a7xx; }; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index c61cbd34a60..c94298a6725 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -863,6 +863,18 @@ a7xx_740v3 = A7XXProps( enable_tp_ubwc_flag_hint = True, ) +a7xx_x1_85 = A7XXProps( + stsc_duplication_quirk = True, + has_event_write_sample_count = True, + ubwc_unorm_snorm_int_compatible = True, + supports_ibo_ubwc = True, + fs_must_have_non_zero_constlen_quirk = True, + # Most devices with a740 have blob v6xx which doesn't have + # this hint set. Match them for better compatibility by default. + enable_tp_ubwc_flag_hint = False, + compute_constlen_quirk = True, + ) + a7xx_750 = A7XXProps( has_event_write_sample_count = True, load_inline_uniforms_via_preamble_ldgk = True, @@ -1053,7 +1065,6 @@ add_gpus([ GPUId(740), # Deprecated, used for dev kernels. GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback - GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"), ], A6xxGPUInfo( CHIP.A7XX, [a7xx_base, a7xx_740], @@ -1068,6 +1079,22 @@ add_gpus([ raw_magic_regs = a740_raw_magic_regs, )) +add_gpus([ + GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"), + ], A6xxGPUInfo( + CHIP.A7XX, + [a7xx_base, a7xx_x1_85], + num_ccu = 6, + tile_align_w = 96, + tile_align_h = 32, + num_vsc_pipes = 32, + cs_shared_mem_size = 32 * 1024, + wave_granularity = 2, + fibers_per_sp = 128 * 2 * 16, + magic_regs = a740_magic_regs, + raw_magic_regs = a740_raw_magic_regs, + )) + # Values from blob v676.0 add_gpus([ GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2) diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index c21b441028c..26ed75ebf9a 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -190,11 +190,12 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, /* Compute shaders don't share a const file with the FS. Instead they * have their own file, which is smaller than the FS one. On a7xx the size - * was doubled. + * was doubled, although this doesn't work on X1-85. * * TODO: is this true on earlier gen's? */ - compiler->max_const_compute = compiler->gen >= 7 ? 512 : 256; + compiler->max_const_compute = + (compiler->gen >= 7 && !dev_info->a7xx.compute_constlen_quirk) ? 512 : 256; /* TODO: implement clip+cull distances on earlier gen's */ compiler->has_clip_cull = true;