From 96d65b47c715155bb0403f3e0b5555b348d68539 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 7 Jul 2022 18:39:17 -0400 Subject: [PATCH] panfrost: Use implementation-specific tile size The physical tile buffer size (and hence the maximum available tilebuffer size) are implementation-defined. Track this information on the device so we can correctly select tile sizes, instead of hardcoding the value for Midgard. Implementation values are pulled from the "Tile bits/pixel" row of the public Mali data sheet [1]. That row lists the maximum number of bits available for a pixel given the maximum tile size and pipelining. For currently supported hardware (v9 and older), that maximum tile size is 16x16. So those values should be multiplied by (16 * 16 * 2) / 8 to get the physical size in bytes. This may improve Bifrost/Valhall performance on workloads using multiple render targets. It also gets us ready for the dazzling array of tile sizes available with v10. [1] https://developer.arm.com/documentation/102849/latest/ Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/lib/pan_cs.c | 5 ++-- src/panfrost/lib/pan_device.h | 6 +++++ src/panfrost/lib/pan_props.c | 51 ++++++++++++++++++++++++----------- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/src/panfrost/lib/pan_cs.c b/src/panfrost/lib/pan_cs.c index dbec8c4a3fc..233af107256 100644 --- a/src/panfrost/lib/pan_cs.c +++ b/src/panfrost/lib/pan_cs.c @@ -709,7 +709,8 @@ GENX(pan_emit_fbd)(const struct panfrost_device *dev, #endif unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb); - unsigned tile_size = pan_select_max_tile_size(4096, bytes_per_pixel); + unsigned tile_size = pan_select_max_tile_size(dev->optimal_tib_size, + bytes_per_pixel); /* Clamp tile size to hardware limits */ tile_size = MIN2(tile_size, 16 * 16); @@ -717,7 +718,7 @@ GENX(pan_emit_fbd)(const struct panfrost_device *dev, /* Colour buffer allocations must be 1K aligned. */ unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024); - assert(cbuf_allocation <= 4096 && "tile too big"); + assert(cbuf_allocation <= dev->optimal_tib_size && "tile too big"); int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size); bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0); diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h index 0cbebe09820..ad18d154a2c 100644 --- a/src/panfrost/lib/pan_device.h +++ b/src/panfrost/lib/pan_device.h @@ -165,6 +165,9 @@ struct panfrost_model { */ uint32_t min_rev_anisotropic; + /* Default tilebuffer size in bytes for the model. */ + unsigned tilebuffer_size; + struct { /* The GPU lacks the capability for hierarchical tiling, without * an "Advanced Tiling Unit", instead requiring a single bin @@ -193,6 +196,9 @@ struct panfrost_device { */ unsigned core_id_range; + /* Maximum tilebuffer size in bytes for optimal performance. */ + unsigned optimal_tib_size; + unsigned thread_tls_alloc; struct panfrost_tiler_features tiler_features; const struct panfrost_model *model; diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index 1627e55e4b8..2c00b1b06a3 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -42,32 +42,33 @@ #define NO_ANISO (~0) #define HAS_ANISO (0) -#define MODEL(gpu_id_, shortname, counters_, min_rev_anisotropic_, quirks_) \ +#define MODEL(gpu_id_, shortname, counters_, min_rev_anisotropic_, tib_size_, quirks_) \ { \ .gpu_id = gpu_id_, \ .name = "Mali-" shortname " (Panfrost)", \ .performance_counters = counters_, \ .min_rev_anisotropic = min_rev_anisotropic_, \ + .tilebuffer_size = tib_size_, \ .quirks = quirks_, \ } /* Table of supported Mali GPUs */ const struct panfrost_model panfrost_model_list[] = { - MODEL(0x720, "T720", "T72x", NO_ANISO, { .no_hierarchical_tiling = true }), - MODEL(0x750, "T760", "T76x", NO_ANISO, {}), - MODEL(0x820, "T820", "T82x", NO_ANISO, { .no_hierarchical_tiling = true }), - MODEL(0x830, "T830", "T83x", NO_ANISO, { .no_hierarchical_tiling = true }), - MODEL(0x860, "T860", "T86x", NO_ANISO, {}), - MODEL(0x880, "T880", "T88x", NO_ANISO, {}), + MODEL(0x720, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), + MODEL(0x750, "T760", "T76x", NO_ANISO, 8192, {}), + MODEL(0x820, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), + MODEL(0x830, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), + MODEL(0x860, "T860", "T86x", NO_ANISO, 8192, {}), + MODEL(0x880, "T880", "T88x", NO_ANISO, 8192, {}), - MODEL(0x6000, "G71", "TMIx", NO_ANISO, {}), - MODEL(0x6221, "G72", "THEx", 0x0030 /* r0p3 */, {}), - MODEL(0x7090, "G51", "TSIx", 0x1010 /* r1p1 */, {}), - MODEL(0x7093, "G31", "TDVx", HAS_ANISO, {}), - MODEL(0x7211, "G76", "TNOx", HAS_ANISO, {}), - MODEL(0x7212, "G52", "TGOx", HAS_ANISO, {}), - MODEL(0x7402, "G52 r1", "TGOx", HAS_ANISO, {}), - MODEL(0x9093, "G57", "TNAx", HAS_ANISO, {}), + MODEL(0x6000, "G71", "TMIx", NO_ANISO, 8192, {}), + MODEL(0x6221, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}), + MODEL(0x7090, "G51", "TSIx", 0x1010 /* r1p1 */, 16384, {}), + MODEL(0x7093, "G31", "TDVx", HAS_ANISO, 16384, {}), + MODEL(0x7211, "G76", "TNOx", HAS_ANISO, 16384, {}), + MODEL(0x7212, "G52", "TGOx", HAS_ANISO, 16384, {}), + MODEL(0x7402, "G52 r1", "TGOx", HAS_ANISO, 16384, {}), + MODEL(0x9093, "G57", "TNAx", HAS_ANISO, 16384, {}), }; #undef NO_ANISO @@ -257,6 +258,25 @@ panfrost_query_afbc(int fd, unsigned arch) return (arch >= 5) && (reg == 0); } +/* + * To pipeline multiple tiles, a given tile may use at most half of the tile + * buffer. This function returns the optimal size (assuming pipelining). + * + * For Mali-G510 and Mali-G310, we will need extra logic to query the tilebuffer + * size for the particular variant. The CORE_FEATURES register might help. + */ +static unsigned +panfrost_query_optimal_tib_size(const struct panfrost_device *dev) +{ + /* Preconditions ensure the returned value is a multiple of 1 KiB, the + * granularity of the colour buffer allocation field. + */ + assert(dev->model->tilebuffer_size >= 2048); + assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size)); + + return dev->model->tilebuffer_size / 2; +} + void panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) { @@ -269,6 +289,7 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) dev->kernel_version = drmGetVersion(fd); dev->revision = panfrost_query_gpu_revision(fd); dev->model = panfrost_get_model(dev->gpu_id); + dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev); dev->compressed_formats = panfrost_query_compressed_formats(fd); dev->tiler_features = panfrost_query_tiler_features(fd); dev->has_afbc = panfrost_query_afbc(fd, dev->arch);