From 8d13628f7f4a7ae139fc91521c2bb7be1571004e Mon Sep 17 00:00:00 2001 From: Calder Young Date: Tue, 17 Feb 2026 17:23:22 -0800 Subject: [PATCH] isl: Add additional alignment/padding requirements to prevent overfetch Bspec 58779 describes various cases where additional padding is required on the bottom and right sides of a sampling engine surface to avoid page faults. Since we don't want to mess up the other drivers that also use ISL, there's now a requires_padding boolean in isl_dev that can be used to enable/disable the extra padding calculations per device and driver. The extra padding can also be disabled per-surface by adding the usage flag ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT, like when a specific row pitch is needed. Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/blorp/blorp_blit.c | 2 + src/intel/isl/isl.c | 171 +++++++++++++++++++++++++++++++++-- src/intel/isl/isl.h | 7 ++ src/intel/vulkan/anv_blorp.c | 2 + 4 files changed, 173 insertions(+), 9 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 9bad4128900..d152d94149a 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2480,6 +2480,8 @@ shrink_surface_params(const struct isl_device *dev, size = MIN2((uint32_t)ceil(*y1), info->surf.logical_level0_px.height); info->surf.logical_level0_px.height = size; info->surf.phys_level0_sa.height = size * px_size_sa.h; + + info->surf.usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; } static void diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 96742c86e3f..e6197b83d8a 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -2957,7 +2957,7 @@ isl_calc_tiled_min_row_pitch(const struct isl_device *dev, * can be 128B), so align the row pitch to the alignment. */ assert(alignment_B >= tile_info->phys_extent_B.width); - return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B); + return isl_align_npot(total_w_tl * tile_info->phys_extent_B.width, alignment_B); } static uint32_t @@ -3160,25 +3160,165 @@ isl_calc_row_pitch(const struct isl_device *dev, return true; } +static void +isl_calc_sampler_padding_rows(const struct isl_device *dev, + const struct isl_surf_init_info *info, + const struct isl_extent3d *image_align_el, + uint32_t *phys_total_h_el) +{ + if (!dev->requires_padding || + !(info->usage & ISL_SURF_USAGE_TEXTURE_BIT) || + (info->usage & ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT)) + return; + + const uint32_t original_total_h_el = *phys_total_h_el; + uint32_t total_h_el = original_total_h_el; + + if (isl_format_is_compressed(info->format)) { + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "For compressed textures (BC*, FXT1, ETC*, and EAC* surface formats), + * padding at the bottom of the surface is to an even compressed row. + * This is equivalent to a multiple of 2q, where q is the compression + * block height in texels." + */ + total_h_el = MAX2(total_h_el, isl_align(original_total_h_el, 2)); + } else { + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "To determine the necessary padding on the bottom and right side of + * the surface, refer to the table in Alignment Unit Size section for + * the i and j parameters for the surface format in use." + * + * The height of the surface needs to be aligned to VAlign to accommodate + * the overfetch we get when SurfaceArray is enabled. + */ + total_h_el = MAX2(total_h_el, + isl_align(original_total_h_el, image_align_el->h)); + } + + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "For cube surfaces, an additional two rows of padding are required at + * the bottom of the surface." + */ + if (info->usage & ISL_SURF_USAGE_CUBE_BIT) + total_h_el = MAX2(total_h_el, original_total_h_el + 2); + + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, + * additional padding is required. These surfaces require an extra + * row plus 16 bytes of padding at the bottom in addition to the + * general padding requirements." + * + * This is to handle the extra row. + */ + if (isl_format_get_layout(info->format)->bpb % 3 == 0 || + isl_format_is_yuv(info->format)) + ++total_h_el; + + *phys_total_h_el = total_h_el; +} + +static void +isl_calc_sampler_padding_last_row(const struct isl_device *dev, + const struct isl_surf_init_info *info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_el, + uint32_t row_pitch_B, + uint64_t *out_size_B) +{ + if (!dev->requires_padding || + !(info->usage & ISL_SURF_USAGE_TEXTURE_BIT) || + (info->usage & ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT)) + return; + + /* The total size should make sense with the tiling */ + assert(!(*out_size_B % (tile_info->phys_extent_B.width * + tile_info->phys_extent_B.height))); + + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "It is possible that a cache line will straddle a page boundary if + * the base address or pitch is not aligned. [...] The surface must + * then be extended to the next multiple of the alignment unit size + * in each dimension" + * + * They appear to be telling us to align the row pitch to the horizontal + * image alignment parameter. However, empirical testing has shown that the + * overfetch for every row of the image appears to be relative to the start + * of the row, so we can just extend the last row of the image to whatever + * alignment is needed, and leave the rest as-is to save memory. + */ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + uint32_t row_alignment_B = image_align_el->w * fmtl->bpb / 8; + uint64_t padding_B = isl_align_npot(row_pitch_B, row_alignment_B) + - row_pitch_B; + + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, + * additional padding is required. These surfaces require an extra + * row plus 16 bytes of padding at the bottom in addition to the + * general padding requirements." + * + * This is to handle the extra 16 bytes after we already added the extra + * row in isl_calc_padding_rows. + */ + if (isl_format_get_layout(info->format)->bpb % 3 == 0 || + isl_format_is_yuv(info->format)) + padding_B += 16; + + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "For linear surfaces, additional padding of 64 bytes is required at + * the bottom of the surface. This is in addition to the padding + * required above." + */ + if (tile_info->tiling == ISL_TILING_LINEAR) + padding_B += 64; + + /* Add the required padding to the total image size, we also have to round + * it up to the tile size since the padding bytes may be swizzled. + */ + *out_size_B += isl_align_npot(padding_B, tile_info->phys_extent_B.width * + tile_info->phys_extent_B.height); +} + static bool isl_calc_size(const struct isl_device *dev, const struct isl_surf_init_info *info, const struct isl_tile_info *tile_info, const struct isl_extent4d *phys_total_el, + const struct isl_extent3d *image_align_el, uint32_t array_pitch_el_rows, uint32_t row_pitch_B, uint64_t *out_size_B) { + uint32_t phys_total_h_el = phys_total_el->h; + isl_calc_sampler_padding_rows(dev, info, image_align_el, &phys_total_h_el); + uint64_t size_B; if (tile_info->tiling == ISL_TILING_LINEAR) { /* LINEAR tiling has no concept of intra-tile arrays */ assert(phys_total_el->d == 1 && phys_total_el->a == 1); - size_B = (uint64_t) row_pitch_B * phys_total_el->h; + size_B = (uint64_t) row_pitch_B * phys_total_h_el; } else { /* Pitches must make sense with the tiling */ assert(row_pitch_B % tile_info->phys_extent_B.width == 0); + /* Tile size should already be a multiple of VAlign */ + assert(!dev->requires_padding || + tile_info->phys_extent_B.height % image_align_el->h == 0); uint32_t array_slices, array_pitch_tl_rows; if (phys_total_el->d > 1) { @@ -3201,7 +3341,7 @@ isl_calc_size(const struct isl_device *dev, const uint32_t total_h_tl = (array_slices - 1) * array_pitch_tl_rows + - isl_align_div(phys_total_el->h, tile_info->logical_extent_el.height); + isl_align_div(phys_total_h_el, tile_info->logical_extent_el.height); size_B = (uint64_t) total_h_tl * tile_info->phys_extent_B.height * row_pitch_B; @@ -3221,6 +3361,9 @@ isl_calc_size(const struct isl_device *dev, size_B += 4096; } + isl_calc_sampler_padding_last_row(dev, info, tile_info, image_align_el, + row_pitch_B, &size_B); + /* If for some reason we can't support the appropriate tiling format and * end up falling to linear or some other format, make sure the image size * and alignment are aligned to the expected block size so we can at least @@ -3288,8 +3431,11 @@ isl_calc_base_alignment(const struct isl_device *dev, * * "For Linear memory, this field specifies the stride in chunks of * 64 bytes (1 cache line)." - * - * From the ATSM PRM Vol 2d, + */ + if (isl_surf_usage_is_display(info->usage)) + base_alignment_B = MAX(base_alignment_B, 64); + + /* From the ATSM PRM Vol 2d, * MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress: * * "Specifies the 64 byte aligned reference frame buffer addresses" @@ -3300,8 +3446,7 @@ isl_calc_base_alignment(const struct isl_device *dev, * * "Format: SplitBaseAddress64ByteAligned" */ - if (isl_surf_usage_is_display(info->usage) || - (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)) + if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) base_alignment_B = MAX(base_alignment_B, 64); } else { const uint32_t tile_size_B = tile_info->phys_extent_B.width * @@ -3435,7 +3580,8 @@ isl_surf_init_s_with_tiling(const struct isl_device *dev, uint64_t size_B; if (!isl_calc_size(dev, info, &tile_info, &phys_total_el, - array_pitch_el_rows, row_pitch_B, &size_B)) + &image_align_el, array_pitch_el_rows, + row_pitch_B, &size_B)) return false; const uint32_t base_alignment_B = @@ -3768,7 +3914,8 @@ isl_surf_from_mem(const struct isl_device *isl_dev, /* Create the surface. */ isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT | ISL_SURF_USAGE_RENDER_TARGET_BIT | - ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT; + ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT | + ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; ASSERTED bool ok = isl_surf_init(isl_dev, surf, .dim = ISL_SURF_DIM_2D, .format = fmtl->format, @@ -4808,6 +4955,8 @@ isl_surf_get_image_surf(const struct isl_device *dev, usage &= ~ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT; } + usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; + bool ok UNUSED; ok = isl_surf_init(dev, image_surf, .dim = ISL_SURF_DIM_2D, @@ -4922,6 +5071,8 @@ isl_surf_get_uncompressed_surf(const struct isl_device *dev, usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; } + usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; + bool ok UNUSED; ok = isl_surf_init(dev, ucompr_surf, .dim = surf->dim, @@ -5036,6 +5187,8 @@ isl_surf_get_uncompressed_surf(const struct isl_device *dev, usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; } + usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; + bool ok UNUSED; ok = isl_surf_init(dev, ucompr_surf, .dim = ISL_SURF_DIM_2D, diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 2c40c5f7fc9..2cd2ebd0399 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1173,6 +1173,7 @@ typedef uint64_t isl_surf_usage_flags_t; #define ISL_SURF_USAGE_SOFTWARE_DETILING (1u << 26) #define ISL_SURF_USAGE_PREFER_4K_ALIGNMENT (1u << 27) #define ISL_SURF_USAGE_NO_ARRAY_OVERFETCH_BIT (1u << 28) +#define ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT (1u << 29) /** @} */ /** @@ -1386,6 +1387,12 @@ struct isl_device { */ bool buffer_length_in_aux_addr; + /** + * True if the driver is running with scratch page disabled and requires + * extra padding on some surfaces to avoid page faults. + */ + bool requires_padding; + uint64_t dummy_aux_address; void (*surf_fill_state_s)(const struct isl_device *dev, void *state, diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index fcca0dd8d14..b8f1d2f4ef4 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -245,6 +245,8 @@ get_blorp_surf_for_anv_address(struct anv_cmd_buffer *cmd_buffer, }, }; + usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; + ok = isl_surf_init(&cmd_buffer->device->isl_dev, isl_surf, .dim = ISL_SURF_DIM_2D, .format = format,