freedreno/devices: Separate device definition into base + gen features

After introduction of A7XX it doesn't make sense to define base GPU properties in A6xxGPUInfo. Now we move to a more clean definition: - a6xx_base + a6xx_genX - for A6XX - a7xx_base + a7xx_xxx - for A7XX, there is no sub-gens clearly identifiable at the moment. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25939>
2026-05-02 12:18:09 +02:00 · 2023-10-26 18:48:35 +02:00 · 2023-10-26 18:48:35 +02:00 · e6bfe42b6e
commit e6bfe42b6e
parent 4382bd8a34
1 changed files with 80 additions and 53 deletions
--- a/src/freedreno/common/freedreno_devices.py
+++ b/src/freedreno/common/freedreno_devices.py
@ -143,7 +143,7 @@ class A6xxGPUInfo(GPUInfo):
    def __init__(self, chip, template, num_ccu,
                 tile_align_w, tile_align_h, num_vsc_pipes,
                 cs_shared_mem_size, wave_granularity, fibers_per_sp,
-                 magic_regs, raw_magic_regs = None, max_sets = 5):
+                 magic_regs, raw_magic_regs = None):
        super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
                         tile_align_w = tile_align_w,
                         tile_align_h = tile_align_h,
@ -168,25 +168,6 @@ class A6xxGPUInfo(GPUInfo):
        if raw_magic_regs:
            self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs]

-        # Things that earlier gens have and later gens remove, provide
-        # defaults here and let them be overridden by sub-gen template:
-        self.a6xx.has_cp_reg_write = True
-        self.a6xx.has_8bpp_ubwc = True
-
-        self.a6xx.has_gmem_fast_clear = True
-        self.a6xx.has_hw_multiview = True
-        self.a6xx.has_fs_tex_prefetch = True
-        self.a6xx.has_sampler_minmax = True
-
-        self.a6xx.sysmem_per_ccu_cache_size = 64 * 1024
-        self.a6xx.gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value
-
-        self.a6xx.prim_alloc_threshold = 0x7
-
-        self.a6xx.vs_max_inputs_count = 32
-
-        self.a6xx.max_sets = max_sets
-
        templates = template if type(template) is list else [template]
        for template in templates:
            template.apply_props(self)
@ -315,24 +296,36 @@ class A7XXProps(dict):
            setattr(gpu_info.a7xx, name, val)


+a6xx_base = A6XXProps(
+        has_cp_reg_write = True,
+        has_8bpp_ubwc = True,
+        has_gmem_fast_clear = True,
+        has_hw_multiview = True,
+        has_fs_tex_prefetch = True,
+        has_sampler_minmax = True,
+
+        supports_double_threadsize = True,
+
+        sysmem_per_ccu_cache_size = 64 * 1024,
+        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
+
+        prim_alloc_threshold = 0x7,
+        vs_max_inputs_count = 32,
+        max_sets = 5,
+    )
+
+
 # a6xx can be divided into distinct sub-generations, where certain device-
 # info parameters are keyed to the sub-generation.  These templates reduce
 # the copypaste

-# a615, a616, a618, a619, a620 and a630:
-a6xx_gen1 = A6XXProps(
-        reg_size_vec4 = 96,
+a6xx_gen1_low = A6XXProps(
+        reg_size_vec4 = 48,
        instr_cache_size = 64,
-        concurrent_resolve = False,
        indirect_draw_wfm_quirk = True,
        depth_bounds_require_depth_test_quirk = True,
-        supports_double_threadsize = True,
-    )

-# a605, a608, a610, 612
-a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
        has_gmem_fast_clear = False,
-        reg_size_vec4 = 48,
        has_hw_multiview = False,
        has_sampler_minmax = False,
        has_fs_tex_prefetch = False,
@ -340,9 +333,15 @@ a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
        vs_max_inputs_count = 16,
        supports_double_threadsize = False,
-)})
+    )
+
+a6xx_gen1 = A6XXProps(
+        reg_size_vec4 = 96,
+        instr_cache_size = 64,
+        indirect_draw_wfm_quirk = True,
+        depth_bounds_require_depth_test_quirk = True,
+    )

-# a640, a680:
 a6xx_gen2 = A6XXProps(
        reg_size_vec4 = 96,
        instr_cache_size = 64, # TODO
@ -352,10 +351,8 @@ a6xx_gen2 = A6XXProps(
        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
        has_dp2acc = False, # TODO: check if true
        has_8bpp_ubwc = False,
-        supports_double_threadsize = True,
    )

-# a650:
 a6xx_gen3 = A6XXProps(
        reg_size_vec4 = 64,
        # Blob limits it to 128 but we hang with 128
@ -373,10 +370,8 @@ a6xx_gen3 = A6XXProps(
        enable_lrz_fast_clear = True,
        lrz_track_quirk = True,
        has_per_view_viewport = True,
-        supports_double_threadsize = True,
    )

-# a635, a660:
 a6xx_gen4 = A6XXProps(
        reg_size_vec4 = 64,
        # Blob limits it to 128 but we hang with 128
@ -398,7 +393,6 @@ a6xx_gen4 = A6XXProps(
        enable_lrz_fast_clear = True,
        has_lrz_dir_tracking = True,
        has_per_view_viewport = True,
-        supports_double_threadsize = True,
    )

 add_gpus([
@ -408,7 +402,7 @@ add_gpus([
        GPUId(612), # TODO: Test it, based only on libwrapfake dumps
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen1_low,
+        [a6xx_base, a6xx_gen1_low],
        num_ccu = 1,
        tile_align_w = 32,
        tile_align_h = 16,
@ -440,7 +434,7 @@ add_gpus([
        GPUId(619),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen1,
+        [a6xx_base, a6xx_gen1],
        num_ccu = 1,
        tile_align_w = 32,
        tile_align_h = 32,
@ -469,7 +463,7 @@ add_gpus([
        GPUId(620),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen1,
+        [a6xx_base, a6xx_gen1],
        num_ccu = 1,
        tile_align_w = 32,
        tile_align_h = 16,
@ -498,7 +492,7 @@ add_gpus([
        GPUId(630),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen1,
+        [a6xx_base, a6xx_gen1],
        num_ccu = 2,
        tile_align_w = 32,
        tile_align_h = 16,
@ -527,7 +521,7 @@ add_gpus([
        GPUId(640),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen2,
+        [a6xx_base, a6xx_gen2],
        num_ccu = 2,
        tile_align_w = 32,
        tile_align_h = 16,
@ -556,7 +550,7 @@ add_gpus([
        GPUId(680),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen2,
+        [a6xx_base, a6xx_gen2],
        num_ccu = 4,
        tile_align_w = 64,
        tile_align_h = 32,
@ -585,7 +579,7 @@ add_gpus([
        GPUId(650),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen3,
+        [a6xx_base, a6xx_gen3],
        num_ccu = 3,
        tile_align_w = 96,
        tile_align_h = 16,
@ -620,7 +614,7 @@ add_gpus([
        GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen4,
+        [a6xx_base, a6xx_gen4],
        num_ccu = 2,
        tile_align_w = 32,
        tile_align_h = 16,
@ -649,7 +643,7 @@ add_gpus([
        GPUId(660),
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen4,
+        [a6xx_base, a6xx_gen4],
        num_ccu = 3,
        tile_align_w = 96,
        tile_align_h = 16,
@ -679,7 +673,7 @@ add_gpus([
        GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback
    ], A6xxGPUInfo(
        CHIP.A6XX,
-        a6xx_gen4,
+        [a6xx_base, a6xx_gen4],
        num_ccu = 8,
        tile_align_w = 64,
        tile_align_h = 32,
@ -704,9 +698,45 @@ add_gpus([
        )
    ))

+# Based on a6xx_base + a6xx_gen4
+a7xx_base = A6XXProps(
+        has_gmem_fast_clear = True,
+        has_hw_multiview = True,
+        has_fs_tex_prefetch = True,
+        has_sampler_minmax = True,
+
+        supports_double_threadsize = True,
+
+        sysmem_per_ccu_cache_size = 64 * 1024,
+        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
+
+        prim_alloc_threshold = 0x7,
+        vs_max_inputs_count = 32,
+        max_sets = 8,
+
+        reg_size_vec4 = 64,
+        # Blob limits it to 128 but we hang with 128
+        instr_cache_size = 127,
+        supports_multiview_mask = True,
+        has_z24uint_s8uint = True,
+        tess_use_shared = True,
+        storage_16bit = True,
+        has_tex_filter_cubic = True,
+        has_separate_chroma_filter = True,
+        has_sample_locations = True,
+        has_lpac = True,
+        has_shading_rate = True,
+        has_getfiberid = True,
+        has_dp2acc = True,
+        has_dp4acc = True,
+        enable_lrz_fast_clear = True,
+        has_lrz_dir_tracking = True,
+        has_per_view_viewport = True,
+    )
+
 a7xx_725 = A7XXProps(
        cmdbuf_start_a725_quirk = True,
-)
+    )

 a7xx_730 = A7XXProps()

@ -761,7 +791,7 @@ add_gpus([
        GPUId(chip_id=0xffff07030002, name="FD725"),
    ], A6xxGPUInfo(
        CHIP.A7XX,
-        [a6xx_gen4, a7xx_725],
+        [a7xx_base, a7xx_725],
        num_ccu = 4,
        tile_align_w = 64,
        tile_align_h = 32,
@ -771,7 +801,6 @@ add_gpus([
        fibers_per_sp = 128 * 2 * 16,
        magic_regs = a730_magic_regs,
        raw_magic_regs = a730_raw_magic_regs,
-        max_sets = 8,
    ))

 add_gpus([
@ -779,7 +808,7 @@ add_gpus([
        GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback
    ], A6xxGPUInfo(
        CHIP.A7XX,
-        [a6xx_gen4, a7xx_730],
+        [a7xx_base, a7xx_730],
        num_ccu = 4,
        tile_align_w = 64,
        tile_align_h = 32,
@ -789,7 +818,6 @@ add_gpus([
        fibers_per_sp = 128 * 2 * 16,
        magic_regs = a730_magic_regs,
        raw_magic_regs = a730_raw_magic_regs,
-        max_sets = 8,
    ))

 add_gpus([
@ -798,7 +826,7 @@ add_gpus([
        GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
    ], A6xxGPUInfo(
        CHIP.A7XX,
-        [a6xx_gen4, a7xx_740],
+        [a7xx_base, a7xx_740],
        num_ccu = 6,
        tile_align_w = 64,
        tile_align_h = 32,
@ -853,7 +881,6 @@ add_gpus([
            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
        ],
-        max_sets = 8,
    ))

 template = """\