diff --git a/src/freedreno/common/freedreno_dev_info.c b/src/freedreno/common/freedreno_dev_info.c index fdc17860a35..6e04d7ebde4 100644 --- a/src/freedreno/common/freedreno_dev_info.c +++ b/src/freedreno/common/freedreno_dev_info.c @@ -25,106 +25,16 @@ #include "freedreno_dev_info.h" #include "util/macros.h" -static inline unsigned -max_bitfield_val(unsigned high, unsigned low, unsigned shift) -{ - return BITFIELD_MASK(high - low) << shift; -} +extern const struct fd_dev_id fd_dev_ids[]; +extern const unsigned fd_dev_ids_count; -void -fd_dev_info_init(struct fd_dev_info *info, uint32_t gpu_id) +const struct fd_dev_info * +fd_dev_info(uint32_t gpu_id) { - if (gpu_id >= 600) { - info->gmem_align_w = 16; - info->gmem_align_h = 4; - info->tile_align_w = gpu_id == 650 ? 96 : 32; - info->tile_align_h = 32; - /* based on GRAS_BIN_CONTROL: */ - info->tile_max_w = 1024; /* max_bitfield_val(5, 0, 5) */ - info->tile_max_h = max_bitfield_val(14, 8, 4); - info->num_vsc_pipes = 32; - - switch (gpu_id) { - case 615: - case 618: - info->num_sp_cores = 1; - info->fibers_per_sp = 128 * 16; - info->a6xx.ccu_cntl_gmem_unk2 = true; - info->a6xx.supports_multiview_mask = false; - info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000; - info->a6xx.magic.PC_UNKNOWN_9805 = 0; - info->a6xx.magic.SP_UNKNOWN_A0F8 = 0; - break; - case 630: - info->num_sp_cores = 2; - info->fibers_per_sp = 128 * 16; - info->a6xx.ccu_cntl_gmem_unk2 = true; - info->a6xx.supports_multiview_mask = false; - info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000; - info->a6xx.magic.PC_UNKNOWN_9805 = 1; - info->a6xx.magic.SP_UNKNOWN_A0F8 = 1; - break; - case 640: - info->num_sp_cores = 2; - /* The wavefront ID returned by the getwid instruction has a - * maximum of 3 * 10 - 1, or so it seems. However the swizzled - * index used in the mem offset calcuation is - * "(wid / 3) | ((wid % 3) << 4)", so that the actual max is - * around 3 * 16. Furthermore, with the per-fiber layout, the HW - * swizzles the wavefront index and fiber index itself, and it - * pads the number of wavefronts to 4 * 16 to make the swizzling - * simpler, so we have to bump the number of wavefronts to 4 * 16 - * for the per-fiber layout. We could theoretically reduce it for - * the per-wave layout though. - */ - info->fibers_per_sp = 128 * 4 * 16; - info->a6xx.supports_multiview_mask = true; - info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000; - info->a6xx.magic.PC_UNKNOWN_9805 = 1; - info->a6xx.magic.SP_UNKNOWN_A0F8 = 1; - info->a6xx.has_z24uint_s8uint = true; - break; - case 650: - info->num_sp_cores = 3; - info->fibers_per_sp = 128 * 2 * 16; - info->a6xx.supports_multiview_mask = true; - info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000; - info->a6xx.magic.PC_UNKNOWN_9805 = 2; - info->a6xx.magic.SP_UNKNOWN_A0F8 = 2; - info->a6xx.has_z24uint_s8uint = true; - break; - default: - /* Drivers should be doing their own version filtering, so we - * should never get here. - */ - unreachable("missing a6xx config"); + for (int i = 0; i < fd_dev_ids_count; i++) { + if (gpu_id == fd_dev_ids[i].gpu_id) { + return fd_dev_ids[i].info; } - } else if (gpu_id >= 500) { - info->gmem_align_w = info->tile_align_w = 64; - info->gmem_align_h = info->tile_align_h = 32; - /* based on VSC_BIN_SIZE: */ - info->tile_max_w = 1024; /* max_bitfield_val(7, 0, 5) */ - info->tile_max_h = max_bitfield_val(16, 9, 5); - info->num_vsc_pipes = 16; - } else if (gpu_id >= 400) { - info->gmem_align_w = info->tile_align_w = 32; - info->gmem_align_h = info->tile_align_h = 32; - /* based on VSC_BIN_SIZE: */ - info->tile_max_w = 1024; /* max_bitfield_val(4, 0, 5) */ - info->tile_max_h = max_bitfield_val(9, 5, 5); - info->num_vsc_pipes = 8; - } else if (gpu_id >= 300) { - info->gmem_align_w = info->tile_align_w = 32; - info->gmem_align_h = info->tile_align_h = 32; - /* based on VSC_BIN_SIZE: */ - info->tile_max_w = 992; /* max_bitfield_val(4, 0, 5) */ - info->tile_max_h = max_bitfield_val(9, 5, 5); - info->num_vsc_pipes = 8; - } else { - info->gmem_align_w = info->tile_align_w = 32; - info->gmem_align_h = info->tile_align_h = 32; - info->tile_max_w = 512; - info->tile_max_h = ~0; /* TODO */ - info->num_vsc_pipes = 8; } + return NULL; } diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index f787e9a6ddb..8d6fbe287ee 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -51,11 +51,12 @@ struct fd_dev_info { uint32_t num_sp_cores; uint32_t num_ccu; }; - /* Information for private memory calculations */ - uint32_t fibers_per_sp; union { struct { + /* Information for private memory calculations */ + uint32_t fibers_per_sp; + /* Whether the PC_MULTIVIEW_MASK register exists. */ bool supports_multiview_mask; @@ -72,6 +73,11 @@ struct fd_dev_info { }; }; +struct fd_dev_id { + uint32_t gpu_id; + const struct fd_dev_info *info; +}; + /* per CCU GMEM amount reserved for depth cache for direct rendering */ #define A6XX_CCU_DEPTH_SIZE (64 * 1024) /* per CCU GMEM amount reserved for color cache used by GMEM resolves @@ -83,7 +89,7 @@ struct fd_dev_info { */ #define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024) -void fd_dev_info_init(struct fd_dev_info *info, uint32_t gpu_id); +const struct fd_dev_info * fd_dev_info(uint32_t gpu_id); #ifdef __cplusplus } /* end of extern "C" */ diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py new file mode 100644 index 00000000000..248d1d32791 --- /dev/null +++ b/src/freedreno/common/freedreno_devices.py @@ -0,0 +1,281 @@ +# +# Copyright © 2021 Google, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +from mako.template import Template +import sys + +def max_bitfield_val(high, low, shift): + return ((1 << (high - low)) - 1) << shift + +class State(object): + def __init__(self): + # List of unique device-info structs, multiple different GPU ids + # can map to a single info struct in cases where the differences + # are not sw visible, or the only differences are parameters + # queried from the kernel (like GMEM size) + self.gpu_infos = [] + + # Table mapping GPU id to device-info struct + self.gpus = {} + + def info_index(self, gpu_info): + i = 0 + for info in self.gpu_infos: + if gpu_info == info: + return i + i += 1 + raise Error("invalid info") + +s = State() + +def add_gpus(ids, info): + for id in ids: + s.gpus[id] = info + +class Struct(object): + """A helper class that stringifies itself to a 'C' struct initializer + """ + def __str__(self): + s = "{" + for name, value in vars(self).items(): + s += "." + name + "=" + str(value) + "," + return s + "}" + +class GPUInfo(Struct): + """Base class for any generation of adreno, consists of GMEM layout + related parameters + + Note that tile_max_h is normally only constrained by corresponding + bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h + tends to have lower limits, in which case a comment will describe + the bitfield size/shift + """ + def __init__(self, gmem_align_w, gmem_align_h, + tile_align_w, tile_align_h, + tile_max_w, tile_max_h, num_vsc_pipes): + self.gmem_align_w = gmem_align_w + self.gmem_align_h = gmem_align_h + self.tile_align_w = tile_align_w + self.tile_align_h = tile_align_h + self.tile_max_w = tile_max_w + self.tile_max_h = tile_max_h + self.num_vsc_pipes = num_vsc_pipes + + s.gpu_infos.append(self) + + +class A6xxGPUInfo(GPUInfo): + """The a6xx generation has a lot more parameters, and is broken down + into distinct sub-generations. The template parameter avoids + duplication of parameters that are unique to the sub-generation. + """ + def __init__(self, template, num_sp_cores, num_ccu, + RB_UNKNOWN_8E04_blit, PC_UNKNOWN_9805, + SP_UNKNOWN_A0F8): + super().__init__(gmem_align_w = 16, gmem_align_h = 4, + tile_align_w = 32, tile_align_h = 32, + tile_max_w = 1024, # max_bitfield_val(5, 0, 5) + tile_max_h = max_bitfield_val(14, 8, 4), + num_vsc_pipes = 32) + assert(num_sp_cores == num_ccu) + + self.num_sp_cores = num_sp_cores + + # 96 tile alignment seems correlated to 3 CCU + if num_ccu == 3: + self.tile_align_h = 96 + + self.a6xx = Struct() + self.a6xx.magic = Struct() + + # Various "magic" register values: + self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit + self.a6xx.magic.PC_UNKNOWN_9805 = PC_UNKNOWN_9805 + self.a6xx.magic.SP_UNKNOWN_A0F8 = SP_UNKNOWN_A0F8 + + for name, val in template.items(): + setattr(self.a6xx, name, val) + +# a2xx is really two sub-generations, a20x and a22x, but we don't currently +# capture that in the device-info tables +add_gpus([ + 200, + 201, + 205, + 220, + ], GPUInfo( + gmem_align_w = 32, gmem_align_h = 32, + tile_align_w = 32, tile_align_h = 32, + tile_max_w = 512, + tile_max_h = ~0, # TODO + num_vsc_pipes = 8, + )) + +add_gpus([ + 305, + 307, + 320, + 330, + ], GPUInfo( + gmem_align_w = 32, gmem_align_h = 32, + tile_align_w = 32, tile_align_h = 32, + tile_max_w = 992, # max_bitfield_val(4, 0, 5) + tile_max_h = max_bitfield_val(9, 5, 5), + num_vsc_pipes = 8, + )) + +add_gpus([ + 405, + 420, + 430, + ], GPUInfo( + gmem_align_w = 32, gmem_align_h = 32, + tile_align_w = 32, tile_align_h = 32, + tile_max_w = 1024, # max_bitfield_val(4, 0, 5) + tile_max_h = max_bitfield_val(9, 5, 5), + num_vsc_pipes = 8, + )) + +add_gpus([ + 510, + 530, + 540, + ], GPUInfo( + gmem_align_w = 64, gmem_align_h = 32, + tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, # max_bitfield_val(7, 0, 5) + tile_max_h = max_bitfield_val(16, 9, 5), + num_vsc_pipes = 16, + )) + +# a6xx can be divided into distinct sub-generations, where certain device- +# info parameters are keyed to the sub-generation. These templates reduce +# the copypaste + +# a615, a618, a630: +a6xx_gen1 = dict( + fibers_per_sp = 128 * 16, + ccu_cntl_gmem_unk2 = True, + ) + +# a640, a680: +a6xx_gen2 = dict( + fibers_per_sp = 128 * 4 * 16, + supports_multiview_mask = True, + has_z24uint_s8uint = True, + ) + +# a650: +a6xx_gen3 = dict( + fibers_per_sp = 128 * 2 * 16, + supports_multiview_mask = True, + has_z24uint_s8uint = True, + ) + +add_gpus([ + 615, + 618, + ], A6xxGPUInfo( + a6xx_gen1, + num_sp_cores = 1, + num_ccu = 1, + RB_UNKNOWN_8E04_blit = 0x00100000, + PC_UNKNOWN_9805 = 0, + SP_UNKNOWN_A0F8 = 0, + )) + +add_gpus([ + 630, + ], A6xxGPUInfo( + a6xx_gen1, + num_sp_cores = 2, + num_ccu = 2, + RB_UNKNOWN_8E04_blit = 0x01000000, + PC_UNKNOWN_9805 = 1, + SP_UNKNOWN_A0F8 = 1, + )) + +add_gpus([ + 640, + ], A6xxGPUInfo( + a6xx_gen2, + num_sp_cores = 2, + num_ccu = 2, + RB_UNKNOWN_8E04_blit = 0x00100000, + PC_UNKNOWN_9805 = 1, + SP_UNKNOWN_A0F8 = 1, + )) + +add_gpus([ + 650, + ], A6xxGPUInfo( + a6xx_gen3, + num_sp_cores = 3, + num_ccu = 3, + RB_UNKNOWN_8E04_blit = 0x04100000, + PC_UNKNOWN_9805 = 2, + SP_UNKNOWN_A0F8 = 2, + )) + +template = """\ +/* Copyright (C) 2021 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "freedreno_dev_info.h" + +/* Map python to C: */ +#define True true +#define False false + +%for info in s.gpu_infos: +static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)}; +%endfor + +const struct fd_dev_id fd_dev_ids[] = { +%for id, info in s.gpus.items(): + { ${id}, &__info${s.info_index(info)} }, +%endfor +}; +const unsigned fd_dev_ids_count = ${len(s.gpus)}; +""" + +print(Template(template).render(s=s)) + diff --git a/src/freedreno/common/meson.build b/src/freedreno/common/meson.build index 4e449e40f4e..30a7f955baf 100644 --- a/src/freedreno/common/meson.build +++ b/src/freedreno/common/meson.build @@ -18,6 +18,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +freedreno_devices_c = custom_target( + 'freedreno_devices.c', + input: 'freedreno_devices.py', + output: 'freedreno_devices.c', + command: [prog_python, '@INPUT@'], + capture: true, +) + libfreedreno_common = static_library( 'freedreno_common', [ @@ -28,6 +36,7 @@ libfreedreno_common = static_library( 'freedreno_uuid.c', 'freedreno_uuid.h', 'freedreno_guardband.h', + freedreno_devices_c, sha1_h, ], include_directories : [inc_freedreno, inc_include, inc_src, inc_gallium], diff --git a/src/freedreno/ds/fd_pps_driver.cc b/src/freedreno/ds/fd_pps_driver.cc index 7c7adec623a..349a7a8ce4e 100644 --- a/src/freedreno/ds/fd_pps_driver.cc +++ b/src/freedreno/ds/fd_pps_driver.cc @@ -78,7 +78,7 @@ FreedrenoDriver::setup_a6xx_counters() ); counter("Shader Core Utilization", Counter::Units::Percent, [=]() { - return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores); + return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores); } ); @@ -177,7 +177,7 @@ FreedrenoDriver::init_perfcnt() for (auto countable : countables) countable.resolve(); - fd_dev_info_init(&info, gpu_id); + info = fd_dev_info(gpu_id); io = fd_dt_find_io(); if (!io) { diff --git a/src/freedreno/ds/fd_pps_driver.h b/src/freedreno/ds/fd_pps_driver.h index e8f5c246b6f..28144be56c4 100644 --- a/src/freedreno/ds/fd_pps_driver.h +++ b/src/freedreno/ds/fd_pps_driver.h @@ -42,7 +42,7 @@ private: bool has_suspend_count; uint32_t suspend_count; - struct fd_dev_info info; + const struct fd_dev_info *info; /** * The memory mapped i/o space for counter readback: diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index cb35f71f954..58aca80820b 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -195,13 +195,16 @@ tu_physical_device_init(struct tu_physical_device *device, memset(device->name, 0, sizeof(device->name)); sprintf(device->name, "FD%d", device->gpu_id); - switch (device->gpu_id) { - case 615: - case 618: - case 630: - case 640: - case 650: - fd_dev_info_init(&device->info, device->gpu_id); + const struct fd_dev_info *info = fd_dev_info(device->gpu_id); + if (!info) { + result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "device %s is unsupported", device->name); + return result; + } + switch (device->gpu_id / 100) { + case 6: + // TODO convert to pointer: + device->info = *info; device->ccu_offset_bypass = device->info.num_ccu * A6XX_CCU_DEPTH_SIZE; device->ccu_offset_gmem = (device->gmem_size - device->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 9b4f03436fb..6dc075d220d 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -2064,7 +2064,7 @@ calc_pvtmem_size(struct tu_device *dev, struct tu_pvtmem_config *config, { uint32_t per_fiber_size = ALIGN(pvtmem_bytes, 512); uint32_t per_sp_size = - ALIGN(per_fiber_size * dev->physical_device->info.fibers_per_sp, 1 << 12); + ALIGN(per_fiber_size * dev->physical_device->info.a6xx.fibers_per_sp, 1 << 12); if (config) { config->per_fiber_size = per_fiber_size; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 38ce74bbcd8..d9df55e1a98 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -103,7 +103,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_emit_string5(ring, name, strlen(name)); #endif - uint32_t fibers_per_sp = ctx->screen->info.fibers_per_sp; + uint32_t fibers_per_sp = ctx->screen->info.a6xx.fibers_per_sp; uint32_t num_sp_cores = ctx->screen->info.num_sp_cores; uint32_t per_fiber_size = ALIGN(so->pvtmem_size, 512); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 7fa9ceb874e..6cd35b95e62 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -1020,6 +1020,12 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro) DBG(" Chip-id: 0x%08x", screen->chip_id); DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes); + const struct fd_dev_info *info = fd_dev_info(screen->gpu_id); + if (!info) { + mesa_loge("unsupported GPU: a%03d", screen->gpu_id); + goto fail; + } + /* explicitly checking for GPU revisions that are known to work. This * may be overly conservative for a3xx, where spoofing the gpu_id with * the blob driver seems to generate identical cmdstream dumps. But @@ -1031,33 +1037,20 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro) * of the cases below and see what happens. And if it works, please * send a patch ;-) */ - switch (screen->gpu_id) { - case 200: - case 201: - case 205: - case 220: + switch (screen->gpu_id / 100) { + case 2: fd2_screen_init(pscreen); break; - case 305: - case 307: - case 320: - case 330: + case 3: fd3_screen_init(pscreen); break; - case 405: - case 420: - case 430: + case 4: fd4_screen_init(pscreen); break; - case 510: - case 530: - case 540: + case 5: fd5_screen_init(pscreen); break; - case 618: - case 630: - case 640: - case 650: + case 6: fd6_screen_init(pscreen); break; default: @@ -1065,7 +1058,8 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro) goto fail; } - fd_dev_info_init(&screen->info, screen->gpu_id); + // TODO change to pointer: + screen->info = *info; if (is_a6xx(screen)) { screen->ccu_offset_bypass = screen->info.num_ccu * A6XX_CCU_DEPTH_SIZE; diff --git a/src/gallium/drivers/freedreno/gmemtool.c b/src/gallium/drivers/freedreno/gmemtool.c index c39af1702d4..38a45b81bc7 100644 --- a/src/gallium/drivers/freedreno/gmemtool.c +++ b/src/gallium/drivers/freedreno/gmemtool.c @@ -167,7 +167,8 @@ main(int argc, char **argv) .gmemsize_bytes = gpu_info->gmemsize_bytes, }; - fd_dev_info_init(&screen.info, gpu_info->gpu_id); + // TODO change to pointer: + screen.info = *fd_dev_info(gpu_info->gpu_id); /* And finally run thru all the GMEM keys: */ for (int i = 0; i < ARRAY_SIZE(keys); i++) {