mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 12:18:09 +02:00
freedreno/devices: Separate device definition into base + gen features
After introduction of A7XX it doesn't make sense to define base GPU
properties in A6xxGPUInfo. Now we move to a more clean definition:
- a6xx_base + a6xx_genX - for A6XX
- a7xx_base + a7xx_xxx - for A7XX, there is no sub-gens clearly
identifiable at the moment.
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25939>
This commit is contained in:
parent
4382bd8a34
commit
e6bfe42b6e
1 changed files with 80 additions and 53 deletions
|
|
@ -143,7 +143,7 @@ class A6xxGPUInfo(GPUInfo):
|
|||
def __init__(self, chip, template, num_ccu,
|
||||
tile_align_w, tile_align_h, num_vsc_pipes,
|
||||
cs_shared_mem_size, wave_granularity, fibers_per_sp,
|
||||
magic_regs, raw_magic_regs = None, max_sets = 5):
|
||||
magic_regs, raw_magic_regs = None):
|
||||
super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
|
||||
tile_align_w = tile_align_w,
|
||||
tile_align_h = tile_align_h,
|
||||
|
|
@ -168,25 +168,6 @@ class A6xxGPUInfo(GPUInfo):
|
|||
if raw_magic_regs:
|
||||
self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs]
|
||||
|
||||
# Things that earlier gens have and later gens remove, provide
|
||||
# defaults here and let them be overridden by sub-gen template:
|
||||
self.a6xx.has_cp_reg_write = True
|
||||
self.a6xx.has_8bpp_ubwc = True
|
||||
|
||||
self.a6xx.has_gmem_fast_clear = True
|
||||
self.a6xx.has_hw_multiview = True
|
||||
self.a6xx.has_fs_tex_prefetch = True
|
||||
self.a6xx.has_sampler_minmax = True
|
||||
|
||||
self.a6xx.sysmem_per_ccu_cache_size = 64 * 1024
|
||||
self.a6xx.gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value
|
||||
|
||||
self.a6xx.prim_alloc_threshold = 0x7
|
||||
|
||||
self.a6xx.vs_max_inputs_count = 32
|
||||
|
||||
self.a6xx.max_sets = max_sets
|
||||
|
||||
templates = template if type(template) is list else [template]
|
||||
for template in templates:
|
||||
template.apply_props(self)
|
||||
|
|
@ -315,24 +296,36 @@ class A7XXProps(dict):
|
|||
setattr(gpu_info.a7xx, name, val)
|
||||
|
||||
|
||||
a6xx_base = A6XXProps(
|
||||
has_cp_reg_write = True,
|
||||
has_8bpp_ubwc = True,
|
||||
has_gmem_fast_clear = True,
|
||||
has_hw_multiview = True,
|
||||
has_fs_tex_prefetch = True,
|
||||
has_sampler_minmax = True,
|
||||
|
||||
supports_double_threadsize = True,
|
||||
|
||||
sysmem_per_ccu_cache_size = 64 * 1024,
|
||||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
|
||||
|
||||
prim_alloc_threshold = 0x7,
|
||||
vs_max_inputs_count = 32,
|
||||
max_sets = 5,
|
||||
)
|
||||
|
||||
|
||||
# a6xx can be divided into distinct sub-generations, where certain device-
|
||||
# info parameters are keyed to the sub-generation. These templates reduce
|
||||
# the copypaste
|
||||
|
||||
# a615, a616, a618, a619, a620 and a630:
|
||||
a6xx_gen1 = A6XXProps(
|
||||
reg_size_vec4 = 96,
|
||||
a6xx_gen1_low = A6XXProps(
|
||||
reg_size_vec4 = 48,
|
||||
instr_cache_size = 64,
|
||||
concurrent_resolve = False,
|
||||
indirect_draw_wfm_quirk = True,
|
||||
depth_bounds_require_depth_test_quirk = True,
|
||||
supports_double_threadsize = True,
|
||||
)
|
||||
|
||||
# a605, a608, a610, 612
|
||||
a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
|
||||
has_gmem_fast_clear = False,
|
||||
reg_size_vec4 = 48,
|
||||
has_hw_multiview = False,
|
||||
has_sampler_minmax = False,
|
||||
has_fs_tex_prefetch = False,
|
||||
|
|
@ -340,9 +333,15 @@ a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
|
|||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
|
||||
vs_max_inputs_count = 16,
|
||||
supports_double_threadsize = False,
|
||||
)})
|
||||
)
|
||||
|
||||
a6xx_gen1 = A6XXProps(
|
||||
reg_size_vec4 = 96,
|
||||
instr_cache_size = 64,
|
||||
indirect_draw_wfm_quirk = True,
|
||||
depth_bounds_require_depth_test_quirk = True,
|
||||
)
|
||||
|
||||
# a640, a680:
|
||||
a6xx_gen2 = A6XXProps(
|
||||
reg_size_vec4 = 96,
|
||||
instr_cache_size = 64, # TODO
|
||||
|
|
@ -352,10 +351,8 @@ a6xx_gen2 = A6XXProps(
|
|||
depth_bounds_require_depth_test_quirk = True, # TODO: check if true
|
||||
has_dp2acc = False, # TODO: check if true
|
||||
has_8bpp_ubwc = False,
|
||||
supports_double_threadsize = True,
|
||||
)
|
||||
|
||||
# a650:
|
||||
a6xx_gen3 = A6XXProps(
|
||||
reg_size_vec4 = 64,
|
||||
# Blob limits it to 128 but we hang with 128
|
||||
|
|
@ -373,10 +370,8 @@ a6xx_gen3 = A6XXProps(
|
|||
enable_lrz_fast_clear = True,
|
||||
lrz_track_quirk = True,
|
||||
has_per_view_viewport = True,
|
||||
supports_double_threadsize = True,
|
||||
)
|
||||
|
||||
# a635, a660:
|
||||
a6xx_gen4 = A6XXProps(
|
||||
reg_size_vec4 = 64,
|
||||
# Blob limits it to 128 but we hang with 128
|
||||
|
|
@ -398,7 +393,6 @@ a6xx_gen4 = A6XXProps(
|
|||
enable_lrz_fast_clear = True,
|
||||
has_lrz_dir_tracking = True,
|
||||
has_per_view_viewport = True,
|
||||
supports_double_threadsize = True,
|
||||
)
|
||||
|
||||
add_gpus([
|
||||
|
|
@ -408,7 +402,7 @@ add_gpus([
|
|||
GPUId(612), # TODO: Test it, based only on libwrapfake dumps
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen1_low,
|
||||
[a6xx_base, a6xx_gen1_low],
|
||||
num_ccu = 1,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -440,7 +434,7 @@ add_gpus([
|
|||
GPUId(619),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen1,
|
||||
[a6xx_base, a6xx_gen1],
|
||||
num_ccu = 1,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 32,
|
||||
|
|
@ -469,7 +463,7 @@ add_gpus([
|
|||
GPUId(620),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen1,
|
||||
[a6xx_base, a6xx_gen1],
|
||||
num_ccu = 1,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -498,7 +492,7 @@ add_gpus([
|
|||
GPUId(630),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen1,
|
||||
[a6xx_base, a6xx_gen1],
|
||||
num_ccu = 2,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -527,7 +521,7 @@ add_gpus([
|
|||
GPUId(640),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen2,
|
||||
[a6xx_base, a6xx_gen2],
|
||||
num_ccu = 2,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -556,7 +550,7 @@ add_gpus([
|
|||
GPUId(680),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen2,
|
||||
[a6xx_base, a6xx_gen2],
|
||||
num_ccu = 4,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
|
|
@ -585,7 +579,7 @@ add_gpus([
|
|||
GPUId(650),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen3,
|
||||
[a6xx_base, a6xx_gen3],
|
||||
num_ccu = 3,
|
||||
tile_align_w = 96,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -620,7 +614,7 @@ add_gpus([
|
|||
GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen4,
|
||||
[a6xx_base, a6xx_gen4],
|
||||
num_ccu = 2,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -649,7 +643,7 @@ add_gpus([
|
|||
GPUId(660),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen4,
|
||||
[a6xx_base, a6xx_gen4],
|
||||
num_ccu = 3,
|
||||
tile_align_w = 96,
|
||||
tile_align_h = 16,
|
||||
|
|
@ -679,7 +673,7 @@ add_gpus([
|
|||
GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A6XX,
|
||||
a6xx_gen4,
|
||||
[a6xx_base, a6xx_gen4],
|
||||
num_ccu = 8,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
|
|
@ -704,9 +698,45 @@ add_gpus([
|
|||
)
|
||||
))
|
||||
|
||||
# Based on a6xx_base + a6xx_gen4
|
||||
a7xx_base = A6XXProps(
|
||||
has_gmem_fast_clear = True,
|
||||
has_hw_multiview = True,
|
||||
has_fs_tex_prefetch = True,
|
||||
has_sampler_minmax = True,
|
||||
|
||||
supports_double_threadsize = True,
|
||||
|
||||
sysmem_per_ccu_cache_size = 64 * 1024,
|
||||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
|
||||
|
||||
prim_alloc_threshold = 0x7,
|
||||
vs_max_inputs_count = 32,
|
||||
max_sets = 8,
|
||||
|
||||
reg_size_vec4 = 64,
|
||||
# Blob limits it to 128 but we hang with 128
|
||||
instr_cache_size = 127,
|
||||
supports_multiview_mask = True,
|
||||
has_z24uint_s8uint = True,
|
||||
tess_use_shared = True,
|
||||
storage_16bit = True,
|
||||
has_tex_filter_cubic = True,
|
||||
has_separate_chroma_filter = True,
|
||||
has_sample_locations = True,
|
||||
has_lpac = True,
|
||||
has_shading_rate = True,
|
||||
has_getfiberid = True,
|
||||
has_dp2acc = True,
|
||||
has_dp4acc = True,
|
||||
enable_lrz_fast_clear = True,
|
||||
has_lrz_dir_tracking = True,
|
||||
has_per_view_viewport = True,
|
||||
)
|
||||
|
||||
a7xx_725 = A7XXProps(
|
||||
cmdbuf_start_a725_quirk = True,
|
||||
)
|
||||
)
|
||||
|
||||
a7xx_730 = A7XXProps()
|
||||
|
||||
|
|
@ -761,7 +791,7 @@ add_gpus([
|
|||
GPUId(chip_id=0xffff07030002, name="FD725"),
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A7XX,
|
||||
[a6xx_gen4, a7xx_725],
|
||||
[a7xx_base, a7xx_725],
|
||||
num_ccu = 4,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
|
|
@ -771,7 +801,6 @@ add_gpus([
|
|||
fibers_per_sp = 128 * 2 * 16,
|
||||
magic_regs = a730_magic_regs,
|
||||
raw_magic_regs = a730_raw_magic_regs,
|
||||
max_sets = 8,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
|
|
@ -779,7 +808,7 @@ add_gpus([
|
|||
GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A7XX,
|
||||
[a6xx_gen4, a7xx_730],
|
||||
[a7xx_base, a7xx_730],
|
||||
num_ccu = 4,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
|
|
@ -789,7 +818,6 @@ add_gpus([
|
|||
fibers_per_sp = 128 * 2 * 16,
|
||||
magic_regs = a730_magic_regs,
|
||||
raw_magic_regs = a730_raw_magic_regs,
|
||||
max_sets = 8,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
|
|
@ -798,7 +826,7 @@ add_gpus([
|
|||
GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
|
||||
], A6xxGPUInfo(
|
||||
CHIP.A7XX,
|
||||
[a6xx_gen4, a7xx_740],
|
||||
[a7xx_base, a7xx_740],
|
||||
num_ccu = 6,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
|
|
@ -853,7 +881,6 @@ add_gpus([
|
|||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
|
||||
],
|
||||
max_sets = 8,
|
||||
))
|
||||
|
||||
template = """\
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue