mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 05:20:09 +01:00
intel/blorp: Redescribe gfx12.5 surfaces for CCS fast clears
According to HSD 1407682962 and the associated simulator code, fast-clear performance can be affected by: image alignment, tiling, dimensionality, and row pitch. Redescribe surfaces in order avoid fast-clearing at a slower rate. Also, benchmarking the main patch in the performance CI (hw=A750) shows that some traces are helped significantly: * TotalWarWarhammer3 +5.58% (n=2) * Factorio +3.75% (n=1) * TerminatorResistance +3.3% (n=2) * Borderlands3 +3.23% (n=2) We could additionally increase the alignment requirements of surfaces in order to deterministically increase fast-clear performance. That's left out of this patch in order to avoid any functional pitfalls that can arise with increased memory consumption. As a result, performance will continue to be affected by how ISL/drivers/apps configure main surface memory alignments (directly or indirectly). Thanks to Lionel Landwerlin for pointing me to the relevant simulator code. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11168 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11418 Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33776>
This commit is contained in:
parent
169e22f962
commit
312952048b
2 changed files with 126 additions and 11 deletions
|
|
@ -164,7 +164,7 @@ blorp_surface_info_init(struct blorp_batch *batch,
|
|||
info->addr = surf->addr;
|
||||
|
||||
info->aux_usage = surf->aux_usage;
|
||||
if (info->aux_usage != ISL_AUX_USAGE_NONE) {
|
||||
if (!blorp_address_is_null(surf->aux_addr)) {
|
||||
info->aux_surf = *surf->aux_surf;
|
||||
info->aux_addr = surf->aux_addr;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -446,22 +446,21 @@ convert_rt_from_3d_to_2d(const struct isl_device *isl_dev,
|
|||
info->surf.size_B = size_B;
|
||||
}
|
||||
|
||||
void
|
||||
blorp_fast_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
enum isl_format format, struct isl_swizzle swizzle,
|
||||
uint32_t level, uint32_t start_layer, uint32_t num_layers,
|
||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
|
||||
static void
|
||||
fast_clear_surf(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
enum isl_format format, struct isl_swizzle swizzle,
|
||||
uint32_t level, uint32_t start_layer, uint32_t num_layers)
|
||||
{
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
params.num_layers = num_layers;
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
|
||||
params.x0 = x0;
|
||||
params.y0 = y0;
|
||||
params.x1 = x1;
|
||||
params.y1 = y1;
|
||||
params.x0 = 0;
|
||||
params.y0 = 0;
|
||||
params.x1 = u_minify(surf->surf->logical_level0_px.w, level);
|
||||
params.y1 = u_minify(surf->surf->logical_level0_px.h, level);
|
||||
|
||||
if (batch->blorp->isl_dev->info->ver >= 20) {
|
||||
union isl_color_value clear_color =
|
||||
|
|
@ -522,6 +521,122 @@ blorp_fast_clear(struct blorp_batch *batch,
|
|||
batch->blorp->exec(batch, ¶ms);
|
||||
}
|
||||
|
||||
void
|
||||
blorp_fast_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
enum isl_format format, struct isl_swizzle swizzle,
|
||||
uint32_t level, uint32_t start_layer, uint32_t num_layers,
|
||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
|
||||
{
|
||||
assert(x0 == 0);
|
||||
assert(y0 == 0);
|
||||
assert(x1 == u_minify(surf->surf->logical_level0_px.w, level));
|
||||
assert(y1 == u_minify(surf->surf->logical_level0_px.h, level));
|
||||
|
||||
/* We may want to perform a virtual address-based clear. Collect the memory
|
||||
* range information to do that.
|
||||
*/
|
||||
int64_t size_B = 0;
|
||||
int unaligned_height = 0;
|
||||
struct blorp_address addr = surf->addr;
|
||||
if (surf->surf->samples == 1) {
|
||||
uint64_t start_tile_B, end_tile_B;
|
||||
if (isl_surf_image_has_unique_tiles(surf->surf, level,
|
||||
start_layer, num_layers,
|
||||
&start_tile_B, &end_tile_B)) {
|
||||
size_B = end_tile_B - start_tile_B;
|
||||
addr.offset += start_tile_B;
|
||||
} else if (level == 0 && start_layer == 0 && num_layers == 1) {
|
||||
assert(surf->surf->tiling == ISL_TILING_4 ||
|
||||
surf->surf->tiling == ISL_TILING_Y0);
|
||||
assert(surf->surf->levels > 1 ||
|
||||
surf->surf->logical_level0_px.d > 1 ||
|
||||
surf->surf->logical_level0_px.a > 1);
|
||||
const int phys_height0 = ALIGN(surf->surf->logical_level0_px.h,
|
||||
surf->surf->image_alignment_el.h);
|
||||
unaligned_height = phys_height0 % 32;
|
||||
size_B = surf->surf->row_pitch_B * (phys_height0 - unaligned_height);
|
||||
}
|
||||
}
|
||||
|
||||
if (ISL_GFX_VERX10(batch->blorp->isl_dev) == 125 && size_B > 0) {
|
||||
/* According to HSD 1407682962 and its simulator implementation, CCS
|
||||
* fast-clears will operate at a slower rate if any of the following are
|
||||
* true:
|
||||
*
|
||||
* 1) The clear rectangle covers less than 16KB of main surface data
|
||||
* (i.e., less than 64B of CCS data).
|
||||
* 2) The surface type is SURFTYPE_3D.
|
||||
* 3) The surface tiling is Tile4 and either a) the base address is
|
||||
* not aligned to 64KB OR b) the pitch is not aligned to 16-tiles.
|
||||
*
|
||||
* This slow-down can also occur on subrectangles within a larger clear
|
||||
* rectangle. Redescribe this memory range to reduce the chance of
|
||||
* slow-downs.
|
||||
*/
|
||||
const int _16k = 16 * 1024;
|
||||
const int _64k = 64 * 1024;
|
||||
struct isl_surf isl_surf;
|
||||
struct blorp_surf mem_surf = {
|
||||
.surf = &isl_surf,
|
||||
.addr = addr,
|
||||
.clear_color_addr = surf->clear_color_addr,
|
||||
.aux_usage = surf->aux_usage,
|
||||
};
|
||||
|
||||
do {
|
||||
if (mem_surf.addr.offset % _64k == 0) {
|
||||
if (size_B <= _16k * 16 * 32) {
|
||||
/* The size fits within a single row of tiles. So, we can align
|
||||
* the pitch as needed.
|
||||
*/
|
||||
isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
|
||||
mem_surf.addr.offset, size_B, ISL_TILING_4);
|
||||
assert(isl_surf.logical_level0_px.h == 32);
|
||||
assert(isl_surf.logical_level0_px.a == 1);
|
||||
isl_surf.row_pitch_B = ALIGN(isl_surf.row_pitch_B, 16 * 128);
|
||||
} else {
|
||||
isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
|
||||
mem_surf.addr.offset, size_B, ISL_TILING_64);
|
||||
}
|
||||
} else {
|
||||
int size_to_64k_alignment =
|
||||
align64(mem_surf.addr.offset, _64k) - mem_surf.addr.offset;
|
||||
isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
|
||||
mem_surf.addr.offset,
|
||||
size_B - size_to_64k_alignment < _16k ?
|
||||
size_B : size_to_64k_alignment, ISL_TILING_4);
|
||||
}
|
||||
|
||||
assert(isl_surf.dim == ISL_SURF_DIM_2D);
|
||||
fast_clear_surf(batch, &mem_surf, isl_surf.format, swizzle,
|
||||
0, 0, isl_surf.logical_level0_px.a);
|
||||
|
||||
size_B -= isl_surf.size_B;
|
||||
mem_surf.addr.offset += isl_surf.size_B;
|
||||
} while (size_B != 0);
|
||||
|
||||
/* Use coordinate-based clears to clear the area that is not aligned to
|
||||
* a tile.
|
||||
*/
|
||||
if (unaligned_height > 0) {
|
||||
assert(level == 0 && start_layer == 0 && num_layers == 1);
|
||||
assert(surf->surf->tiling == ISL_TILING_4);
|
||||
isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
|
||||
mem_surf.addr.offset, surf->surf->row_pitch_B * 32,
|
||||
ISL_TILING_4);
|
||||
assert(isl_surf.logical_level0_px.h == 32);
|
||||
isl_surf.logical_level0_px.h = unaligned_height;
|
||||
isl_surf.phys_level0_sa.h = unaligned_height;
|
||||
fast_clear_surf(batch, &mem_surf, isl_surf.format, swizzle,
|
||||
0, 0, isl_surf.logical_level0_px.a);
|
||||
}
|
||||
} else {
|
||||
fast_clear_surf(batch, surf, format, swizzle,
|
||||
level, start_layer, num_layers);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
blorp_clear_supports_blitter(struct blorp_context *blorp,
|
||||
const struct blorp_surf *surf,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue