mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-21 21:58:10 +02:00
276 lines
12 KiB
C
276 lines
12 KiB
C
|
|
/*
|
||
|
|
* Copyright © 2023 Intel Corporation
|
||
|
|
*
|
||
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
|
|
* copy of this software and associated documentation files (the "Software"),
|
||
|
|
* to deal in the Software without restriction, including without limitation
|
||
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||
|
|
* Software is furnished to do so, subject to the following conditions:
|
||
|
|
*
|
||
|
|
* The above copyright notice and this permission notice (including the next
|
||
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
||
|
|
* Software.
|
||
|
|
*
|
||
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||
|
|
* IN THE SOFTWARE.
|
||
|
|
*/
|
||
|
|
#ifndef INTEL_TILED_RENDER_H
|
||
|
|
#define INTEL_TILED_RENDER_H
|
||
|
|
|
||
|
|
#include "intel/common/intel_l3_config.h"
|
||
|
|
#include "intel/dev/intel_device_info.h"
|
||
|
|
#include "intel/isl/isl.h"
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Return the tile cache space used as target by the tiling parameter
|
||
|
|
* calculation algorithm below. Cache space units are in bits.
|
||
|
|
*
|
||
|
|
* \sa intel_calculate_tile_dimensions()
|
||
|
|
*/
|
||
|
|
UNUSED static unsigned
|
||
|
|
intel_calculate_tile_cache_size(const struct intel_device_info *devinfo,
|
||
|
|
const struct intel_l3_config *cfg)
|
||
|
|
{
|
||
|
|
const unsigned tc_l3_partition_size = 1024 * 8 *
|
||
|
|
intel_get_l3_partition_size(devinfo, cfg, INTEL_L3P_TC);
|
||
|
|
const unsigned all_l3_partition_size = 1024 * 8 *
|
||
|
|
intel_get_l3_partition_size(devinfo, cfg, INTEL_L3P_ALL);
|
||
|
|
/* Target half of the total L3 space as simple heuristic, could be
|
||
|
|
* improved by adjusting the target dynamically.
|
||
|
|
*/
|
||
|
|
const unsigned target_all_l3_partition_size = all_l3_partition_size / 2;
|
||
|
|
/* If there's a tile cache partition on the L3, use its size as
|
||
|
|
* target, otherwise (e.g. in unified L3 cache mode) use a fraction
|
||
|
|
* of the total L3 available.
|
||
|
|
*
|
||
|
|
* XXX - Note that this assumes TBIMR in pixel hashing mode is in use.
|
||
|
|
*/
|
||
|
|
const unsigned tile_cache_size = tc_l3_partition_size ? tc_l3_partition_size :
|
||
|
|
target_all_l3_partition_size;
|
||
|
|
assert(tile_cache_size > 0);
|
||
|
|
return tile_cache_size;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Return the amount of bits per pixel used to store an ISL surface in
|
||
|
|
* memory. This can be used as helper to estimate the value of the \p
|
||
|
|
* pixel_size argument of intel_calculate_tile_dimensions() below.
|
||
|
|
*/
|
||
|
|
UNUSED static unsigned
|
||
|
|
intel_calculate_surface_pixel_size(const struct isl_surf *surf)
|
||
|
|
{
|
||
|
|
const struct isl_format_layout *layout = isl_format_get_layout(surf->format);
|
||
|
|
const unsigned num_samples = MAX2(1, surf->samples);
|
||
|
|
if (surf->size_B > 0)
|
||
|
|
return DIV_ROUND_UP(layout->bpb * num_samples,
|
||
|
|
layout->bw * layout->bh * layout->bd);
|
||
|
|
else
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Estimate tiling parameters that yield a reasonable balance between
|
||
|
|
* tile cache utilization and avoidance of thrashing, based on the
|
||
|
|
* device's current caching configuration, the framebuffer dimensions
|
||
|
|
* and an estimate of the tile cache footprint per fragment in bits (\p
|
||
|
|
* pixel_size).
|
||
|
|
*
|
||
|
|
* The calculated tile dimensions are guaranteed to be a multiple of
|
||
|
|
* the block dimensions \p block_width and \p block_height, which for
|
||
|
|
* TBIMR in pixel hashing mode must be equal to the pixel hashing
|
||
|
|
* block size, typically 16x16 or 32x32.
|
||
|
|
*/
|
||
|
|
UNUSED static void
|
||
|
|
intel_calculate_tile_dimensions(const struct intel_device_info *devinfo,
|
||
|
|
const struct intel_l3_config *cfg,
|
||
|
|
unsigned block_width, unsigned block_height,
|
||
|
|
unsigned fb_width, unsigned fb_height,
|
||
|
|
unsigned pixel_size,
|
||
|
|
unsigned *tile_width, unsigned *tile_height)
|
||
|
|
{
|
||
|
|
/* Maximum number of tiles supported by the TBIMR tile sequencing
|
||
|
|
* hardware.
|
||
|
|
*/
|
||
|
|
const unsigned max_horiz_tiles = 32;
|
||
|
|
const unsigned max_vert_tiles = 32;
|
||
|
|
|
||
|
|
/* Represent dimensions in hashing block units, which guarantees
|
||
|
|
* that the resulting tile dimensions are a multiple of the hashing
|
||
|
|
* block dimensions, a requirement of TBIMR in pixel hashing mode.
|
||
|
|
*/
|
||
|
|
const unsigned fb_block_width = DIV_ROUND_UP(fb_width, block_width);
|
||
|
|
const unsigned fb_block_height = DIV_ROUND_UP(fb_height, block_height);
|
||
|
|
|
||
|
|
/* Amount of tile cache space for the workload to target. */
|
||
|
|
const unsigned tile_cache_size = intel_calculate_tile_cache_size(devinfo, cfg);
|
||
|
|
/* Cache footprint of a single hashing block worth of threads. */
|
||
|
|
const unsigned block_size = MAX2(1, pixel_size * block_width * block_height);
|
||
|
|
/* Calculate the desired tile surface (in block units) that fully
|
||
|
|
* utilizes the target portion of the tile cache, which in an ideal
|
||
|
|
* world where an oracle has given us the tile cache footprint per
|
||
|
|
* block is just the ratio of the two.
|
||
|
|
*/
|
||
|
|
const unsigned desired_tile_surf = MAX2(1, tile_cache_size / block_size);
|
||
|
|
/* Clamp the desired tile surface to be between the surface of the
|
||
|
|
* whole framebuffer and the surface of the smallest tile possible
|
||
|
|
* at the maximum suported tile count.
|
||
|
|
*/
|
||
|
|
const unsigned tile_surf = CLAMP(desired_tile_surf,
|
||
|
|
(DIV_ROUND_UP(fb_block_width, max_horiz_tiles) *
|
||
|
|
DIV_ROUND_UP(fb_block_height, max_vert_tiles)),
|
||
|
|
fb_block_width * fb_block_height);
|
||
|
|
/* XXX - If the tile_surf calculated above is smaller than the
|
||
|
|
* number of pixel pipes on the GPU, the pipeline is so
|
||
|
|
* cache-heavy that the parallelism of the GPU will have to
|
||
|
|
* be constrained in order to avoid thrashing the tile cache.
|
||
|
|
* Possibly emit a performance warning, or better, return an
|
||
|
|
* error indicating that the pixel pipe hashing config needs
|
||
|
|
* to be adjusted to use a finer hashing mode in order to
|
||
|
|
* spread out the workload evenly across the available slices.
|
||
|
|
*/
|
||
|
|
|
||
|
|
/* Select the tile aspect ratio that minimizes the number of passes
|
||
|
|
* required to render the whole framebuffer. The search starts at
|
||
|
|
* an approximately square tile size of the desired surface and
|
||
|
|
* increases the ratio between its major and minor axes in a
|
||
|
|
* sequence of finite increments.
|
||
|
|
*
|
||
|
|
* The algorithm is biased in favor of the squarest possible tiling
|
||
|
|
* config since it starts with a tile shape closest to a square and
|
||
|
|
* early-exits when a global minimum is detected. This bias is
|
||
|
|
* intentional since cache locality may suffer at high tile aspect
|
||
|
|
* ratios.
|
||
|
|
*/
|
||
|
|
const float base_major = sqrtf(tile_surf);
|
||
|
|
/* Make sure that the minimum major axis where the search starts
|
||
|
|
* isn't so small (due to a small framebuffer or rounding) that the
|
||
|
|
* tile would have to be larger than the framebuffer in the
|
||
|
|
* opposite "minor" direction.
|
||
|
|
*/
|
||
|
|
const unsigned min_major = MAX3(1, floorf(base_major),
|
||
|
|
tile_surf / MIN2(fb_block_width, fb_block_height));
|
||
|
|
/* Stop search at a an aspect ratio of approximately 2 (A major
|
||
|
|
* axis equal to 'base_major * M_SQRT2' would give an aspect ratio
|
||
|
|
* of exactly 2 if it was a valid integer number). Aspect ratios
|
||
|
|
* higher than 2 could technically be useful, the upper bound is
|
||
|
|
* intended as a heuristic in order to set a low limit to the
|
||
|
|
* number of iterations the loop below may execute.
|
||
|
|
*/
|
||
|
|
const unsigned max_major = ceilf(MAX2(base_major, min_major) * M_SQRT2);
|
||
|
|
assert(max_major < INT_MAX);
|
||
|
|
|
||
|
|
/* Best tile dimensions found so far. */
|
||
|
|
unsigned best_count = UINT_MAX;
|
||
|
|
unsigned best_block_width = 0;
|
||
|
|
unsigned best_block_height = 0;
|
||
|
|
|
||
|
|
for (unsigned major = min_major; major <= max_major;) {
|
||
|
|
/* Minor axis that yields the desired tile surface for the
|
||
|
|
* present major parameter.
|
||
|
|
*/
|
||
|
|
const unsigned minor = MAX2(1, tile_surf / major);
|
||
|
|
|
||
|
|
/* Calculate the total number of tiles if this aspect ratio is
|
||
|
|
* used in the X-major orientation.
|
||
|
|
*/
|
||
|
|
const unsigned horiz_tiles_x = DIV_ROUND_UP(fb_block_width, major);
|
||
|
|
const unsigned vert_tiles_x = DIV_ROUND_UP(fb_block_height, minor);
|
||
|
|
const unsigned count_x = horiz_tiles_x * vert_tiles_x;
|
||
|
|
|
||
|
|
/* Calculate the number of blocks we need to add to the major
|
||
|
|
* axis for the number of X-major tile columns (horiz_tiles_x)
|
||
|
|
* to drop by one. This avoids many useless iterations relative
|
||
|
|
* to exhaustive search, since an increase in major can only
|
||
|
|
* decrease the total tile count if it decreases horiz_tiles_x
|
||
|
|
* as well, vert_tiles_x is monotonically increasing with major.
|
||
|
|
*
|
||
|
|
* If the number of tile columns is already 1 the X-major
|
||
|
|
* solution cannot be improved further, use "infinity" so the
|
||
|
|
* increment for the next iteration is only determined by the
|
||
|
|
* Y-major search -- If the Y-major solution cannot be improved
|
||
|
|
* either the search will be terminated.
|
||
|
|
*/
|
||
|
|
const unsigned delta_x = horiz_tiles_x == 1 ? INT_MAX :
|
||
|
|
DIV_ROUND_UP(fb_block_width - major * (horiz_tiles_x - 1),
|
||
|
|
horiz_tiles_x - 1);
|
||
|
|
|
||
|
|
/* Update the best known solution with the present X-major one
|
||
|
|
* if it's allowed by the hardware and requires a lower total
|
||
|
|
* number of tiles to cover the whole framebuffer.
|
||
|
|
*/
|
||
|
|
if (horiz_tiles_x <= max_horiz_tiles && vert_tiles_x <= max_vert_tiles &&
|
||
|
|
count_x < best_count) {
|
||
|
|
best_count = count_x;
|
||
|
|
best_block_width = major;
|
||
|
|
best_block_height = minor;
|
||
|
|
|
||
|
|
/* The array of tiles is fully covered by the framebuffer, a
|
||
|
|
* global minimum has been found, terminate the search.
|
||
|
|
*/
|
||
|
|
if (count_x * tile_surf == fb_block_width * fb_block_height)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Calculate the total number of tiles if this aspect ratio is
|
||
|
|
* used in the Y-major orientation.
|
||
|
|
*/
|
||
|
|
const unsigned horiz_tiles_y = DIV_ROUND_UP(fb_block_width, minor);
|
||
|
|
const unsigned vert_tiles_y = DIV_ROUND_UP(fb_block_height, major);
|
||
|
|
const unsigned count_y = horiz_tiles_y * vert_tiles_y;
|
||
|
|
|
||
|
|
/* Calculate the number of blocks we need to add to the major
|
||
|
|
* axis for the number of Y-major tile rows (vert_tiles_y) to
|
||
|
|
* drop by one. Analogous to the delta_x described above after
|
||
|
|
* a flip of the X and Y axes.
|
||
|
|
*/
|
||
|
|
const unsigned delta_y = vert_tiles_y == 1 ? INT_MAX :
|
||
|
|
DIV_ROUND_UP(fb_block_height - major * (vert_tiles_y - 1),
|
||
|
|
vert_tiles_y - 1);
|
||
|
|
|
||
|
|
/* Update the best known solution with the present Y-major one
|
||
|
|
* if it's allowed by the hardware and requires a lower total
|
||
|
|
* number of tiles to cover the whole framebuffer.
|
||
|
|
*/
|
||
|
|
if (horiz_tiles_y <= max_horiz_tiles && vert_tiles_y <= max_vert_tiles &&
|
||
|
|
count_y < best_count) {
|
||
|
|
best_count = count_y;
|
||
|
|
best_block_width = minor;
|
||
|
|
best_block_height = major;
|
||
|
|
|
||
|
|
/* The array of tiles is fully covered by the framebuffer, a
|
||
|
|
* global minimum has been found, terminate the search.
|
||
|
|
*/
|
||
|
|
if (count_y * tile_surf == fb_block_width * fb_block_height)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Use the smallest of the computed major increments in order to
|
||
|
|
* visit the closest subsequent solution candidate. If both the
|
||
|
|
* X-major and Y-major searches have terminated major will be
|
||
|
|
* pushed above the upper bound of the search, causing immediate
|
||
|
|
* termination.
|
||
|
|
*/
|
||
|
|
const unsigned delta = MIN2(delta_x, delta_y);
|
||
|
|
assert(major + delta > major);
|
||
|
|
major += delta;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Sanity-check and return the result, scaling it back to pixel
|
||
|
|
* units.
|
||
|
|
*/
|
||
|
|
assert(best_block_width > 0 && best_block_height > 0);
|
||
|
|
assert(DIV_ROUND_UP(fb_block_width, best_block_width) <= max_horiz_tiles);
|
||
|
|
assert(DIV_ROUND_UP(fb_block_height, best_block_height) <= max_vert_tiles);
|
||
|
|
|
||
|
|
*tile_width = best_block_width * block_width;
|
||
|
|
*tile_height = best_block_height * block_height;
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif
|