freedreno: Make possible to specify A7XX feature flags

Previously the idea was for each generation to have unique list
of feature flags, now it makes more sense for new generation
to have only new flags defined and "inherit" older gen flags.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25086>
This commit is contained in:
Danylo Piliaiev 2023-09-05 15:44:49 +02:00 committed by Marge Bot
parent 823b3bfeea
commit e39b6e2b9b
2 changed files with 160 additions and 138 deletions

View file

@ -62,7 +62,6 @@ struct fd_dev_info {
uint32_t num_ccu; uint32_t num_ccu;
}; };
union {
struct { struct {
uint32_t reg_size_vec4; uint32_t reg_size_vec4;
@ -190,7 +189,9 @@ struct fd_dev_info {
uint32_t value; uint32_t value;
} magic_raw[32]; } magic_raw[32];
} a6xx; } a6xx;
};
struct {
} a7xx;
}; };
struct fd_dev_id { struct fd_dev_id {

View file

@ -158,6 +158,8 @@ class A6xxGPUInfo(GPUInfo):
self.num_ccu = num_ccu self.num_ccu = num_ccu
self.a6xx = Struct() self.a6xx = Struct()
self.a7xx = Struct()
self.a6xx.magic = Struct() self.a6xx.magic = Struct()
for name, val in magic_regs.items(): for name, val in magic_regs.items():
@ -183,10 +185,10 @@ class A6xxGPUInfo(GPUInfo):
self.a6xx.vs_max_inputs_count = 32 self.a6xx.vs_max_inputs_count = 32
for name, val in template.items(): templates = template if type(template) is list else [template]
if name == "magic": # handled above for template in templates:
continue template.apply_props(self)
setattr(self.a6xx, name, val)
def __str__(self): def __str__(self):
return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}") return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}")
@ -296,12 +298,27 @@ add_gpus([
fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
)) ))
class A6XXProps(dict):
def apply_props(self, gpu_info):
for name, val in self.items():
if name == "magic":
continue
setattr(gpu_info.a6xx, name, val)
class A7XXProps(dict):
def apply_props(self, gpu_info):
for name, val in self.items():
setattr(gpu_info.a7xx, name, val)
# a6xx can be divided into distinct sub-generations, where certain device- # a6xx can be divided into distinct sub-generations, where certain device-
# info parameters are keyed to the sub-generation. These templates reduce # info parameters are keyed to the sub-generation. These templates reduce
# the copypaste # the copypaste
# a615, a616, a618, a619, a620 and a630: # a615, a616, a618, a619, a620 and a630:
a6xx_gen1 = dict( a6xx_gen1 = A6XXProps(
reg_size_vec4 = 96, reg_size_vec4 = 96,
instr_cache_size = 64, instr_cache_size = 64,
concurrent_resolve = False, concurrent_resolve = False,
@ -311,7 +328,7 @@ a6xx_gen1 = dict(
) )
# a605, a608, a610, 612 # a605, a608, a610, 612
a6xx_gen1_low = {**a6xx_gen1, **dict( a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
has_gmem_fast_clear = False, has_gmem_fast_clear = False,
reg_size_vec4 = 48, reg_size_vec4 = 48,
has_hw_multiview = False, has_hw_multiview = False,
@ -321,10 +338,10 @@ a6xx_gen1_low = {**a6xx_gen1, **dict(
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value, gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
vs_max_inputs_count = 16, vs_max_inputs_count = 16,
supports_double_threadsize = False, supports_double_threadsize = False,
)} )})
# a640, a680: # a640, a680:
a6xx_gen2 = dict( a6xx_gen2 = A6XXProps(
reg_size_vec4 = 96, reg_size_vec4 = 96,
instr_cache_size = 64, # TODO instr_cache_size = 64, # TODO
supports_multiview_mask = True, supports_multiview_mask = True,
@ -337,7 +354,7 @@ a6xx_gen2 = dict(
) )
# a650: # a650:
a6xx_gen3 = dict( a6xx_gen3 = A6XXProps(
reg_size_vec4 = 64, reg_size_vec4 = 64,
# Blob limits it to 128 but we hang with 128 # Blob limits it to 128 but we hang with 128
instr_cache_size = 127, instr_cache_size = 127,
@ -358,7 +375,7 @@ a6xx_gen3 = dict(
) )
# a635, a660: # a635, a660:
a6xx_gen4 = dict( a6xx_gen4 = A6XXProps(
reg_size_vec4 = 64, reg_size_vec4 = 64,
# Blob limits it to 128 but we hang with 128 # Blob limits it to 128 but we hang with 128
instr_cache_size = 127, instr_cache_size = 127,
@ -685,12 +702,16 @@ add_gpus([
) )
)) ))
a7xx_730 = A7XXProps()
a7xx_740 = A7XXProps()
add_gpus([ add_gpus([
GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data
GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback
], A6xxGPUInfo( ], A6xxGPUInfo(
CHIP.A7XX, CHIP.A7XX,
a6xx_gen4, [a6xx_gen4, a7xx_730],
num_ccu = 4, num_ccu = 4,
tile_align_w = 64, tile_align_w = 64,
tile_align_h = 32, tile_align_h = 32,
@ -746,7 +767,7 @@ add_gpus([
GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
], A6xxGPUInfo( ], A6xxGPUInfo(
CHIP.A7XX, CHIP.A7XX,
a6xx_gen4, [a6xx_gen4, a7xx_740],
num_ccu = 6, num_ccu = 6,
tile_align_w = 64, tile_align_w = 64,
tile_align_h = 32, tile_align_h = 32,