From 3cd9b14c804352de7c319bdc9f4862939b072ed0 Mon Sep 17 00:00:00 2001 From: Calder Young Date: Wed, 29 Apr 2026 15:48:42 -0700 Subject: [PATCH] isl: Optimize the sampler cache to overlap as few 64B cachelines as possible Since we now have a ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT flag to turn extra padding calculations on and off, we can align the row pitch of linear surfaces that are accessed through the sampler to minimize the number of L3 cachelines that each sampler cacheline overlaps for added efficiency. Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/blorp/blorp_blit.c | 4 ++++ src/intel/isl/isl.c | 39 +++++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index d152d94149a..e81b7411a92 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2482,6 +2482,10 @@ shrink_surface_params(const struct isl_device *dev, info->surf.phys_level0_sa.height = size * px_size_sa.h; info->surf.usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; + + /* Stomp the 64B alignment because we set NO_OVERFETCH_PADDING_BIT */ + if (info->surf.tiling == ISL_TILING_LINEAR) + info->surf.alignment_B = 1; } static void diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index e6197b83d8a..d16beaf3740 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -2824,7 +2824,8 @@ isl_calc_phys_total_extent_el(const struct isl_device *dev, static uint32_t isl_calc_row_pitch_alignment(const struct isl_device *dev, const struct isl_surf_init_info *surf_info, - const struct isl_tile_info *tile_info) + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_el) { if (tile_info->tiling != ISL_TILING_LINEAR) { @@ -2921,6 +2922,27 @@ isl_calc_row_pitch_alignment(const struct isl_device *dev, alignment = isl_align(alignment, 64); } + /* SKL PRMs, Volume 5: Memory Views, Buffer Padding Requirements: + * BSpec 58780: + * + * "It is possible that a cache line will straddle a page boundary if + * the base address or pitch is not aligned" + * + * The row pitch of the surface needs to be aligned to HAlign if we want to + * avoid having the sampler cache straddling extra cachelines/pages. + * + * Empirical testing has shown that the straddle of each row is just + * relative to the start of the row, so we can take care of the necessary + * padding in isl_calc_sampler_padding_last_row to avoid page faults, and + * then just choose the minimum of either the horizontal alignment or 64B + * for the row pitch alignment as an extra optimization to minimize the + * number of total 64B cachelines in L3 that a sampler cacheline overlaps. + */ + if (dev->requires_padding && surf_info->row_pitch_B == 0 && + (surf_info->usage & ISL_SURF_USAGE_TEXTURE_BIT) && + !(surf_info->usage & ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT)) + alignment = isl_lcm_u32(alignment, MIN(bs * image_align_el->w, 64)); + return alignment; } @@ -3066,10 +3088,11 @@ isl_calc_row_pitch(const struct isl_device *dev, const struct isl_tile_info *tile_info, enum isl_dim_layout dim_layout, const struct isl_extent4d *phys_total_el, + const struct isl_extent3d *image_align_el, uint32_t *out_row_pitch_B) { uint32_t alignment_B = - isl_calc_row_pitch_alignment(dev, surf_info, tile_info); + isl_calc_row_pitch_alignment(dev, surf_info, tile_info, image_align_el); const uint32_t min_row_pitch_B = isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el, @@ -3448,6 +3471,15 @@ isl_calc_base_alignment(const struct isl_device *dev, */ if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) base_alignment_B = MAX(base_alignment_B, 64); + + /* Even though the sampler requirement is 1B, we should request at + * least 64B of alignment so that we don't end up straddling more + * cachelines/pages than needed in the next level. + */ + if (dev->requires_padding && + (info->usage & ISL_SURF_USAGE_TEXTURE_BIT) && + !(info->usage & ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT)) + base_alignment_B = MAX(base_alignment_B, 64); } else { const uint32_t tile_size_B = tile_info->phys_extent_B.width * tile_info->phys_extent_B.height; @@ -3575,7 +3607,8 @@ isl_surf_init_s_with_tiling(const struct isl_device *dev, uint32_t row_pitch_B; if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout, - &phys_total_el, &row_pitch_B)) + &phys_total_el, &image_align_el, + &row_pitch_B)) return false; uint64_t size_B;