mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
isl: Add additional alignment/padding requirements to prevent overfetch
Bspec 58779 describes various cases where additional padding is required on the bottom and right sides of a sampling engine surface to avoid page faults. Since we don't want to mess up the other drivers that also use ISL, there's now a requires_padding boolean in isl_dev that can be used to enable/disable the extra padding calculations per device and driver. The extra padding can also be disabled per-surface by adding the usage flag ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT, like when a specific row pitch is needed. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40149>
This commit is contained in:
parent
aee9602fea
commit
8d13628f7f
4 changed files with 173 additions and 9 deletions
|
|
@ -2480,6 +2480,8 @@ shrink_surface_params(const struct isl_device *dev,
|
||||||
size = MIN2((uint32_t)ceil(*y1), info->surf.logical_level0_px.height);
|
size = MIN2((uint32_t)ceil(*y1), info->surf.logical_level0_px.height);
|
||||||
info->surf.logical_level0_px.height = size;
|
info->surf.logical_level0_px.height = size;
|
||||||
info->surf.phys_level0_sa.height = size * px_size_sa.h;
|
info->surf.phys_level0_sa.height = size * px_size_sa.h;
|
||||||
|
|
||||||
|
info->surf.usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
|
|
@ -2957,7 +2957,7 @@ isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
|
||||||
* can be 128B), so align the row pitch to the alignment.
|
* can be 128B), so align the row pitch to the alignment.
|
||||||
*/
|
*/
|
||||||
assert(alignment_B >= tile_info->phys_extent_B.width);
|
assert(alignment_B >= tile_info->phys_extent_B.width);
|
||||||
return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
|
return isl_align_npot(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
|
|
@ -3160,25 +3160,165 @@ isl_calc_row_pitch(const struct isl_device *dev,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
isl_calc_sampler_padding_rows(const struct isl_device *dev,
|
||||||
|
const struct isl_surf_init_info *info,
|
||||||
|
const struct isl_extent3d *image_align_el,
|
||||||
|
uint32_t *phys_total_h_el)
|
||||||
|
{
|
||||||
|
if (!dev->requires_padding ||
|
||||||
|
!(info->usage & ISL_SURF_USAGE_TEXTURE_BIT) ||
|
||||||
|
(info->usage & ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT))
|
||||||
|
return;
|
||||||
|
|
||||||
|
const uint32_t original_total_h_el = *phys_total_h_el;
|
||||||
|
uint32_t total_h_el = original_total_h_el;
|
||||||
|
|
||||||
|
if (isl_format_is_compressed(info->format)) {
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "For compressed textures (BC*, FXT1, ETC*, and EAC* surface formats),
|
||||||
|
* padding at the bottom of the surface is to an even compressed row.
|
||||||
|
* This is equivalent to a multiple of 2q, where q is the compression
|
||||||
|
* block height in texels."
|
||||||
|
*/
|
||||||
|
total_h_el = MAX2(total_h_el, isl_align(original_total_h_el, 2));
|
||||||
|
} else {
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "To determine the necessary padding on the bottom and right side of
|
||||||
|
* the surface, refer to the table in Alignment Unit Size section for
|
||||||
|
* the i and j parameters for the surface format in use."
|
||||||
|
*
|
||||||
|
* The height of the surface needs to be aligned to VAlign to accommodate
|
||||||
|
* the overfetch we get when SurfaceArray is enabled.
|
||||||
|
*/
|
||||||
|
total_h_el = MAX2(total_h_el,
|
||||||
|
isl_align(original_total_h_el, image_align_el->h));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "For cube surfaces, an additional two rows of padding are required at
|
||||||
|
* the bottom of the surface."
|
||||||
|
*/
|
||||||
|
if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
|
||||||
|
total_h_el = MAX2(total_h_el, original_total_h_el + 2);
|
||||||
|
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
|
||||||
|
* additional padding is required. These surfaces require an extra
|
||||||
|
* row plus 16 bytes of padding at the bottom in addition to the
|
||||||
|
* general padding requirements."
|
||||||
|
*
|
||||||
|
* This is to handle the extra row.
|
||||||
|
*/
|
||||||
|
if (isl_format_get_layout(info->format)->bpb % 3 == 0 ||
|
||||||
|
isl_format_is_yuv(info->format))
|
||||||
|
++total_h_el;
|
||||||
|
|
||||||
|
*phys_total_h_el = total_h_el;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
isl_calc_sampler_padding_last_row(const struct isl_device *dev,
|
||||||
|
const struct isl_surf_init_info *info,
|
||||||
|
const struct isl_tile_info *tile_info,
|
||||||
|
const struct isl_extent3d *image_align_el,
|
||||||
|
uint32_t row_pitch_B,
|
||||||
|
uint64_t *out_size_B)
|
||||||
|
{
|
||||||
|
if (!dev->requires_padding ||
|
||||||
|
!(info->usage & ISL_SURF_USAGE_TEXTURE_BIT) ||
|
||||||
|
(info->usage & ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* The total size should make sense with the tiling */
|
||||||
|
assert(!(*out_size_B % (tile_info->phys_extent_B.width *
|
||||||
|
tile_info->phys_extent_B.height)));
|
||||||
|
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "It is possible that a cache line will straddle a page boundary if
|
||||||
|
* the base address or pitch is not aligned. [...] The surface must
|
||||||
|
* then be extended to the next multiple of the alignment unit size
|
||||||
|
* in each dimension"
|
||||||
|
*
|
||||||
|
* They appear to be telling us to align the row pitch to the horizontal
|
||||||
|
* image alignment parameter. However, empirical testing has shown that the
|
||||||
|
* overfetch for every row of the image appears to be relative to the start
|
||||||
|
* of the row, so we can just extend the last row of the image to whatever
|
||||||
|
* alignment is needed, and leave the rest as-is to save memory.
|
||||||
|
*/
|
||||||
|
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
|
||||||
|
uint32_t row_alignment_B = image_align_el->w * fmtl->bpb / 8;
|
||||||
|
uint64_t padding_B = isl_align_npot(row_pitch_B, row_alignment_B)
|
||||||
|
- row_pitch_B;
|
||||||
|
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
|
||||||
|
* additional padding is required. These surfaces require an extra
|
||||||
|
* row plus 16 bytes of padding at the bottom in addition to the
|
||||||
|
* general padding requirements."
|
||||||
|
*
|
||||||
|
* This is to handle the extra 16 bytes after we already added the extra
|
||||||
|
* row in isl_calc_padding_rows.
|
||||||
|
*/
|
||||||
|
if (isl_format_get_layout(info->format)->bpb % 3 == 0 ||
|
||||||
|
isl_format_is_yuv(info->format))
|
||||||
|
padding_B += 16;
|
||||||
|
|
||||||
|
/* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements:
|
||||||
|
* BSpec 58780:
|
||||||
|
*
|
||||||
|
* "For linear surfaces, additional padding of 64 bytes is required at
|
||||||
|
* the bottom of the surface. This is in addition to the padding
|
||||||
|
* required above."
|
||||||
|
*/
|
||||||
|
if (tile_info->tiling == ISL_TILING_LINEAR)
|
||||||
|
padding_B += 64;
|
||||||
|
|
||||||
|
/* Add the required padding to the total image size, we also have to round
|
||||||
|
* it up to the tile size since the padding bytes may be swizzled.
|
||||||
|
*/
|
||||||
|
*out_size_B += isl_align_npot(padding_B, tile_info->phys_extent_B.width *
|
||||||
|
tile_info->phys_extent_B.height);
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
isl_calc_size(const struct isl_device *dev,
|
isl_calc_size(const struct isl_device *dev,
|
||||||
const struct isl_surf_init_info *info,
|
const struct isl_surf_init_info *info,
|
||||||
const struct isl_tile_info *tile_info,
|
const struct isl_tile_info *tile_info,
|
||||||
const struct isl_extent4d *phys_total_el,
|
const struct isl_extent4d *phys_total_el,
|
||||||
|
const struct isl_extent3d *image_align_el,
|
||||||
uint32_t array_pitch_el_rows,
|
uint32_t array_pitch_el_rows,
|
||||||
uint32_t row_pitch_B,
|
uint32_t row_pitch_B,
|
||||||
uint64_t *out_size_B)
|
uint64_t *out_size_B)
|
||||||
{
|
{
|
||||||
|
uint32_t phys_total_h_el = phys_total_el->h;
|
||||||
|
isl_calc_sampler_padding_rows(dev, info, image_align_el, &phys_total_h_el);
|
||||||
|
|
||||||
uint64_t size_B;
|
uint64_t size_B;
|
||||||
if (tile_info->tiling == ISL_TILING_LINEAR) {
|
if (tile_info->tiling == ISL_TILING_LINEAR) {
|
||||||
/* LINEAR tiling has no concept of intra-tile arrays */
|
/* LINEAR tiling has no concept of intra-tile arrays */
|
||||||
assert(phys_total_el->d == 1 && phys_total_el->a == 1);
|
assert(phys_total_el->d == 1 && phys_total_el->a == 1);
|
||||||
|
|
||||||
size_B = (uint64_t) row_pitch_B * phys_total_el->h;
|
size_B = (uint64_t) row_pitch_B * phys_total_h_el;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* Pitches must make sense with the tiling */
|
/* Pitches must make sense with the tiling */
|
||||||
assert(row_pitch_B % tile_info->phys_extent_B.width == 0);
|
assert(row_pitch_B % tile_info->phys_extent_B.width == 0);
|
||||||
|
/* Tile size should already be a multiple of VAlign */
|
||||||
|
assert(!dev->requires_padding ||
|
||||||
|
tile_info->phys_extent_B.height % image_align_el->h == 0);
|
||||||
|
|
||||||
uint32_t array_slices, array_pitch_tl_rows;
|
uint32_t array_slices, array_pitch_tl_rows;
|
||||||
if (phys_total_el->d > 1) {
|
if (phys_total_el->d > 1) {
|
||||||
|
|
@ -3201,7 +3341,7 @@ isl_calc_size(const struct isl_device *dev,
|
||||||
|
|
||||||
const uint32_t total_h_tl =
|
const uint32_t total_h_tl =
|
||||||
(array_slices - 1) * array_pitch_tl_rows +
|
(array_slices - 1) * array_pitch_tl_rows +
|
||||||
isl_align_div(phys_total_el->h, tile_info->logical_extent_el.height);
|
isl_align_div(phys_total_h_el, tile_info->logical_extent_el.height);
|
||||||
|
|
||||||
size_B = (uint64_t) total_h_tl * tile_info->phys_extent_B.height *
|
size_B = (uint64_t) total_h_tl * tile_info->phys_extent_B.height *
|
||||||
row_pitch_B;
|
row_pitch_B;
|
||||||
|
|
@ -3221,6 +3361,9 @@ isl_calc_size(const struct isl_device *dev,
|
||||||
size_B += 4096;
|
size_B += 4096;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isl_calc_sampler_padding_last_row(dev, info, tile_info, image_align_el,
|
||||||
|
row_pitch_B, &size_B);
|
||||||
|
|
||||||
/* If for some reason we can't support the appropriate tiling format and
|
/* If for some reason we can't support the appropriate tiling format and
|
||||||
* end up falling to linear or some other format, make sure the image size
|
* end up falling to linear or some other format, make sure the image size
|
||||||
* and alignment are aligned to the expected block size so we can at least
|
* and alignment are aligned to the expected block size so we can at least
|
||||||
|
|
@ -3288,8 +3431,11 @@ isl_calc_base_alignment(const struct isl_device *dev,
|
||||||
*
|
*
|
||||||
* "For Linear memory, this field specifies the stride in chunks of
|
* "For Linear memory, this field specifies the stride in chunks of
|
||||||
* 64 bytes (1 cache line)."
|
* 64 bytes (1 cache line)."
|
||||||
*
|
*/
|
||||||
* From the ATSM PRM Vol 2d,
|
if (isl_surf_usage_is_display(info->usage))
|
||||||
|
base_alignment_B = MAX(base_alignment_B, 64);
|
||||||
|
|
||||||
|
/* From the ATSM PRM Vol 2d,
|
||||||
* MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress:
|
* MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress:
|
||||||
*
|
*
|
||||||
* "Specifies the 64 byte aligned reference frame buffer addresses"
|
* "Specifies the 64 byte aligned reference frame buffer addresses"
|
||||||
|
|
@ -3300,8 +3446,7 @@ isl_calc_base_alignment(const struct isl_device *dev,
|
||||||
*
|
*
|
||||||
* "Format: SplitBaseAddress64ByteAligned"
|
* "Format: SplitBaseAddress64ByteAligned"
|
||||||
*/
|
*/
|
||||||
if (isl_surf_usage_is_display(info->usage) ||
|
if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)
|
||||||
(info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT))
|
|
||||||
base_alignment_B = MAX(base_alignment_B, 64);
|
base_alignment_B = MAX(base_alignment_B, 64);
|
||||||
} else {
|
} else {
|
||||||
const uint32_t tile_size_B = tile_info->phys_extent_B.width *
|
const uint32_t tile_size_B = tile_info->phys_extent_B.width *
|
||||||
|
|
@ -3435,7 +3580,8 @@ isl_surf_init_s_with_tiling(const struct isl_device *dev,
|
||||||
|
|
||||||
uint64_t size_B;
|
uint64_t size_B;
|
||||||
if (!isl_calc_size(dev, info, &tile_info, &phys_total_el,
|
if (!isl_calc_size(dev, info, &tile_info, &phys_total_el,
|
||||||
array_pitch_el_rows, row_pitch_B, &size_B))
|
&image_align_el, array_pitch_el_rows,
|
||||||
|
row_pitch_B, &size_B))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const uint32_t base_alignment_B =
|
const uint32_t base_alignment_B =
|
||||||
|
|
@ -3768,7 +3914,8 @@ isl_surf_from_mem(const struct isl_device *isl_dev,
|
||||||
/* Create the surface. */
|
/* Create the surface. */
|
||||||
isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT |
|
isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT |
|
||||||
ISL_SURF_USAGE_RENDER_TARGET_BIT |
|
ISL_SURF_USAGE_RENDER_TARGET_BIT |
|
||||||
ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT;
|
ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT |
|
||||||
|
ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
|
||||||
ASSERTED bool ok = isl_surf_init(isl_dev, surf,
|
ASSERTED bool ok = isl_surf_init(isl_dev, surf,
|
||||||
.dim = ISL_SURF_DIM_2D,
|
.dim = ISL_SURF_DIM_2D,
|
||||||
.format = fmtl->format,
|
.format = fmtl->format,
|
||||||
|
|
@ -4808,6 +4955,8 @@ isl_surf_get_image_surf(const struct isl_device *dev,
|
||||||
usage &= ~ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT;
|
usage &= ~ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
|
||||||
|
|
||||||
bool ok UNUSED;
|
bool ok UNUSED;
|
||||||
ok = isl_surf_init(dev, image_surf,
|
ok = isl_surf_init(dev, image_surf,
|
||||||
.dim = ISL_SURF_DIM_2D,
|
.dim = ISL_SURF_DIM_2D,
|
||||||
|
|
@ -4922,6 +5071,8 @@ isl_surf_get_uncompressed_surf(const struct isl_device *dev,
|
||||||
usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
|
usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
|
||||||
|
|
||||||
bool ok UNUSED;
|
bool ok UNUSED;
|
||||||
ok = isl_surf_init(dev, ucompr_surf,
|
ok = isl_surf_init(dev, ucompr_surf,
|
||||||
.dim = surf->dim,
|
.dim = surf->dim,
|
||||||
|
|
@ -5036,6 +5187,8 @@ isl_surf_get_uncompressed_surf(const struct isl_device *dev,
|
||||||
usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
|
usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
|
||||||
|
|
||||||
bool ok UNUSED;
|
bool ok UNUSED;
|
||||||
ok = isl_surf_init(dev, ucompr_surf,
|
ok = isl_surf_init(dev, ucompr_surf,
|
||||||
.dim = ISL_SURF_DIM_2D,
|
.dim = ISL_SURF_DIM_2D,
|
||||||
|
|
|
||||||
|
|
@ -1173,6 +1173,7 @@ typedef uint64_t isl_surf_usage_flags_t;
|
||||||
#define ISL_SURF_USAGE_SOFTWARE_DETILING (1u << 26)
|
#define ISL_SURF_USAGE_SOFTWARE_DETILING (1u << 26)
|
||||||
#define ISL_SURF_USAGE_PREFER_4K_ALIGNMENT (1u << 27)
|
#define ISL_SURF_USAGE_PREFER_4K_ALIGNMENT (1u << 27)
|
||||||
#define ISL_SURF_USAGE_NO_ARRAY_OVERFETCH_BIT (1u << 28)
|
#define ISL_SURF_USAGE_NO_ARRAY_OVERFETCH_BIT (1u << 28)
|
||||||
|
#define ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT (1u << 29)
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1386,6 +1387,12 @@ struct isl_device {
|
||||||
*/
|
*/
|
||||||
bool buffer_length_in_aux_addr;
|
bool buffer_length_in_aux_addr;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the driver is running with scratch page disabled and requires
|
||||||
|
* extra padding on some surfaces to avoid page faults.
|
||||||
|
*/
|
||||||
|
bool requires_padding;
|
||||||
|
|
||||||
uint64_t dummy_aux_address;
|
uint64_t dummy_aux_address;
|
||||||
|
|
||||||
void (*surf_fill_state_s)(const struct isl_device *dev, void *state,
|
void (*surf_fill_state_s)(const struct isl_device *dev, void *state,
|
||||||
|
|
|
||||||
|
|
@ -245,6 +245,8 @@ get_blorp_surf_for_anv_address(struct anv_cmd_buffer *cmd_buffer,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
|
||||||
|
|
||||||
ok = isl_surf_init(&cmd_buffer->device->isl_dev, isl_surf,
|
ok = isl_surf_init(&cmd_buffer->device->isl_dev, isl_surf,
|
||||||
.dim = ISL_SURF_DIM_2D,
|
.dim = ISL_SURF_DIM_2D,
|
||||||
.format = format,
|
.format = format,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue