diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index f5a75354aea..1a12460f314 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -164,7 +164,7 @@ blorp_surface_info_init(struct blorp_batch *batch, info->addr = surf->addr; info->aux_usage = surf->aux_usage; - if (info->aux_usage != ISL_AUX_USAGE_NONE) { + if (!blorp_address_is_null(surf->aux_addr)) { info->aux_surf = *surf->aux_surf; info->aux_addr = surf->aux_addr; } diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index cf51ff9c87f..4659cb362f7 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -446,22 +446,21 @@ convert_rt_from_3d_to_2d(const struct isl_device *isl_dev, info->surf.size_B = size_B; } -void -blorp_fast_clear(struct blorp_batch *batch, - const struct blorp_surf *surf, - enum isl_format format, struct isl_swizzle swizzle, - uint32_t level, uint32_t start_layer, uint32_t num_layers, - uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) +static void +fast_clear_surf(struct blorp_batch *batch, + const struct blorp_surf *surf, + enum isl_format format, struct isl_swizzle swizzle, + uint32_t level, uint32_t start_layer, uint32_t num_layers) { struct blorp_params params; blorp_params_init(¶ms); params.num_layers = num_layers; assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); - params.x0 = x0; - params.y0 = y0; - params.x1 = x1; - params.y1 = y1; + params.x0 = 0; + params.y0 = 0; + params.x1 = u_minify(surf->surf->logical_level0_px.w, level); + params.y1 = u_minify(surf->surf->logical_level0_px.h, level); if (batch->blorp->isl_dev->info->ver >= 20) { union isl_color_value clear_color = @@ -522,6 +521,122 @@ blorp_fast_clear(struct blorp_batch *batch, batch->blorp->exec(batch, ¶ms); } +void +blorp_fast_clear(struct blorp_batch *batch, + const struct blorp_surf *surf, + enum isl_format format, struct isl_swizzle swizzle, + uint32_t level, uint32_t start_layer, uint32_t num_layers, + uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) +{ + assert(x0 == 0); + assert(y0 == 0); + assert(x1 == u_minify(surf->surf->logical_level0_px.w, level)); + assert(y1 == u_minify(surf->surf->logical_level0_px.h, level)); + + /* We may want to perform a virtual address-based clear. Collect the memory + * range information to do that. + */ + int64_t size_B = 0; + int unaligned_height = 0; + struct blorp_address addr = surf->addr; + if (surf->surf->samples == 1) { + uint64_t start_tile_B, end_tile_B; + if (isl_surf_image_has_unique_tiles(surf->surf, level, + start_layer, num_layers, + &start_tile_B, &end_tile_B)) { + size_B = end_tile_B - start_tile_B; + addr.offset += start_tile_B; + } else if (level == 0 && start_layer == 0 && num_layers == 1) { + assert(surf->surf->tiling == ISL_TILING_4 || + surf->surf->tiling == ISL_TILING_Y0); + assert(surf->surf->levels > 1 || + surf->surf->logical_level0_px.d > 1 || + surf->surf->logical_level0_px.a > 1); + const int phys_height0 = ALIGN(surf->surf->logical_level0_px.h, + surf->surf->image_alignment_el.h); + unaligned_height = phys_height0 % 32; + size_B = surf->surf->row_pitch_B * (phys_height0 - unaligned_height); + } + } + + if (ISL_GFX_VERX10(batch->blorp->isl_dev) == 125 && size_B > 0) { + /* According to HSD 1407682962 and its simulator implementation, CCS + * fast-clears will operate at a slower rate if any of the following are + * true: + * + * 1) The clear rectangle covers less than 16KB of main surface data + * (i.e., less than 64B of CCS data). + * 2) The surface type is SURFTYPE_3D. + * 3) The surface tiling is Tile4 and either a) the base address is + * not aligned to 64KB OR b) the pitch is not aligned to 16-tiles. + * + * This slow-down can also occur on subrectangles within a larger clear + * rectangle. Redescribe this memory range to reduce the chance of + * slow-downs. + */ + const int _16k = 16 * 1024; + const int _64k = 64 * 1024; + struct isl_surf isl_surf; + struct blorp_surf mem_surf = { + .surf = &isl_surf, + .addr = addr, + .clear_color_addr = surf->clear_color_addr, + .aux_usage = surf->aux_usage, + }; + + do { + if (mem_surf.addr.offset % _64k == 0) { + if (size_B <= _16k * 16 * 32) { + /* The size fits within a single row of tiles. So, we can align + * the pitch as needed. + */ + isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf, + mem_surf.addr.offset, size_B, ISL_TILING_4); + assert(isl_surf.logical_level0_px.h == 32); + assert(isl_surf.logical_level0_px.a == 1); + isl_surf.row_pitch_B = ALIGN(isl_surf.row_pitch_B, 16 * 128); + } else { + isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf, + mem_surf.addr.offset, size_B, ISL_TILING_64); + } + } else { + int size_to_64k_alignment = + align64(mem_surf.addr.offset, _64k) - mem_surf.addr.offset; + isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf, + mem_surf.addr.offset, + size_B - size_to_64k_alignment < _16k ? + size_B : size_to_64k_alignment, ISL_TILING_4); + } + + assert(isl_surf.dim == ISL_SURF_DIM_2D); + fast_clear_surf(batch, &mem_surf, isl_surf.format, swizzle, + 0, 0, isl_surf.logical_level0_px.a); + + size_B -= isl_surf.size_B; + mem_surf.addr.offset += isl_surf.size_B; + } while (size_B != 0); + + /* Use coordinate-based clears to clear the area that is not aligned to + * a tile. + */ + if (unaligned_height > 0) { + assert(level == 0 && start_layer == 0 && num_layers == 1); + assert(surf->surf->tiling == ISL_TILING_4); + isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf, + mem_surf.addr.offset, surf->surf->row_pitch_B * 32, + ISL_TILING_4); + assert(isl_surf.logical_level0_px.h == 32); + isl_surf.logical_level0_px.h = unaligned_height; + isl_surf.phys_level0_sa.h = unaligned_height; + fast_clear_surf(batch, &mem_surf, isl_surf.format, swizzle, + 0, 0, isl_surf.logical_level0_px.a); + } + } else { + fast_clear_surf(batch, surf, format, swizzle, + level, start_layer, num_layers); + } +} + bool blorp_clear_supports_blitter(struct blorp_context *blorp, const struct blorp_surf *surf,