freedreno: Add compute constlen quirk for X1-85

This GPU seems to have half the compute constlen of other a7xx GPUs,
because there are sporadic hangs in dEQP-VK.robustness.robustness2.* and
other tests unless we limit the constlen. This does *not* happen on
SM8550-HDK, so it does seem to be specific to the GPU in x1e laptops.

Fixes: b0d22461b9 ("freedreno: Enable the X1-85")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31764>
(cherry picked from commit 3c8190e8b2)
This commit is contained in:
Connor Abbott 2024-10-17 18:30:05 -04:00 committed by Eric Engestrom
parent 655cdbd649
commit d48aac9e19
4 changed files with 35 additions and 4 deletions

View file

@ -1214,7 +1214,7 @@
"description": "freedreno: Add compute constlen quirk for X1-85",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "b0d22461b945de597f39062a53e4f08d4b8559a2",
"notes": null

View file

@ -276,6 +276,9 @@ struct fd_dev_info {
bool enable_tp_ubwc_flag_hint;
bool storage_8bit;
/* Whether only 256 vec4 constants are available for compute */
bool compute_constlen_quirk;
} a7xx;
};

View file

@ -863,6 +863,18 @@ a7xx_740v3 = A7XXProps(
enable_tp_ubwc_flag_hint = True,
)
a7xx_x1_85 = A7XXProps(
stsc_duplication_quirk = True,
has_event_write_sample_count = True,
ubwc_unorm_snorm_int_compatible = True,
supports_ibo_ubwc = True,
fs_must_have_non_zero_constlen_quirk = True,
# Most devices with a740 have blob v6xx which doesn't have
# this hint set. Match them for better compatibility by default.
enable_tp_ubwc_flag_hint = False,
compute_constlen_quirk = True,
)
a7xx_750 = A7XXProps(
has_event_write_sample_count = True,
load_inline_uniforms_via_preamble_ldgk = True,
@ -1053,7 +1065,6 @@ add_gpus([
GPUId(740), # Deprecated, used for dev kernels.
GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data
GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"),
], A6xxGPUInfo(
CHIP.A7XX,
[a7xx_base, a7xx_740],
@ -1068,6 +1079,22 @@ add_gpus([
raw_magic_regs = a740_raw_magic_regs,
))
add_gpus([
GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"),
], A6xxGPUInfo(
CHIP.A7XX,
[a7xx_base, a7xx_x1_85],
num_ccu = 6,
tile_align_w = 96,
tile_align_h = 32,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
fibers_per_sp = 128 * 2 * 16,
magic_regs = a740_magic_regs,
raw_magic_regs = a740_raw_magic_regs,
))
# Values from blob v676.0
add_gpus([
GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2)

View file

@ -190,11 +190,12 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
/* Compute shaders don't share a const file with the FS. Instead they
* have their own file, which is smaller than the FS one. On a7xx the size
* was doubled.
* was doubled, although this doesn't work on X1-85.
*
* TODO: is this true on earlier gen's?
*/
compiler->max_const_compute = compiler->gen >= 7 ? 512 : 256;
compiler->max_const_compute =
(compiler->gen >= 7 && !dev_info->a7xx.compute_constlen_quirk) ? 512 : 256;
/* TODO: implement clip+cull distances on earlier gen's */
compiler->has_clip_cull = true;