From fcdae4d4c09cfe3f8f38011a6e2b533cbb5e04f7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sun, 2 Mar 2025 10:44:35 -0800 Subject: [PATCH] intel: Add and use isl_surf_from_mem() Unify code which creates surfaces from buffers. The behavior is slightly changed to use array layers to enable arrayed buffer clears (as needed). Reviewed-by: Rohan Garg Part-of: --- src/intel/blorp/blorp_blit.c | 130 +++++------------------------ src/intel/isl/isl.c | 99 ++++++++++++++++++++++ src/intel/isl/isl.h | 8 ++ src/intel/vulkan/anv_blorp.c | 120 ++++++-------------------- src/intel/vulkan_hasvk/anv_blorp.c | 90 +++++--------------- 5 files changed, 175 insertions(+), 272 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index a8a648a0a1e..cc3c59b0ead 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -3081,122 +3081,38 @@ blorp_copy(struct blorp_batch *batch, do_blorp_blit(batch, ¶ms, &key, &coords); } -static enum isl_format -isl_format_for_size(unsigned size_B) -{ - switch (size_B) { - case 1: return ISL_FORMAT_R8_UINT; - case 2: return ISL_FORMAT_R8G8_UINT; - case 4: return ISL_FORMAT_R8G8B8A8_UINT; - case 8: return ISL_FORMAT_R16G16B16A16_UINT; - case 16: return ISL_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Not a power-of-two format size"); - } -} - -/** - * Returns the greatest common divisor of a and b that is a power of two. - */ -static uint64_t -gcd_pow2_u64(uint64_t a, uint64_t b) -{ - assert(a > 0 || b > 0); - - unsigned a_log2 = ffsll(a) - 1; - unsigned b_log2 = ffsll(b) - 1; - - /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which - * case, the MIN2() will take the other one. If both are 0 then we will - * hit the assert above. - */ - return 1 << MIN2(a_log2, b_log2); -} - -static void -do_buffer_copy(struct blorp_batch *batch, - struct blorp_address *src, - struct blorp_address *dst, - int width, int height, int block_size) -{ - /* The actual format we pick doesn't matter as blorp will throw it away. - * The only thing that actually matters is the size. - */ - enum isl_format format = isl_format_for_size(block_size); - - UNUSED bool ok; - struct isl_surf surf; - ok = isl_surf_init(batch->blorp->isl_dev, &surf, - .dim = ISL_SURF_DIM_2D, - .format = format, - .width = width, - .height = height, - .depth = 1, - .levels = 1, - .array_len = 1, - .samples = 1, - .row_pitch_B = width * block_size, - .usage = ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_RENDER_TARGET_BIT, - .tiling_flags = ISL_TILING_LINEAR_BIT); - assert(ok); - - struct blorp_surf src_blorp_surf = { - .surf = &surf, - .addr = *src, - }; - - struct blorp_surf dst_blorp_surf = { - .surf = &surf, - .addr = *dst, - }; - - blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, - 0, 0, 0, 0, width, height); -} - void blorp_buffer_copy(struct blorp_batch *batch, struct blorp_address src, struct blorp_address dst, uint64_t size) { - const struct intel_device_info *devinfo = batch->blorp->isl_dev->info; - uint64_t copy_size = size; + struct isl_surf surf; + struct blorp_surf src_blorp_surf = { + .surf = &surf, + .addr = src, + }; - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << (devinfo->ver >= 7 ? 14 : 13); + struct blorp_surf dst_blorp_surf = { + .surf = &surf, + .addr = dst, + }; - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - bs = gcd_pow2_u64(bs, src.offset); - bs = gcd_pow2_u64(bs, dst.offset); - bs = gcd_pow2_u64(bs, size); + while (size != 0) { + isl_surf_from_mem(batch->blorp->isl_dev, &surf, + src_blorp_surf.addr.offset | + dst_blorp_surf.addr.offset, size, ISL_TILING_LINEAR); - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(batch, &src, &dst, max_surface_dim, max_surface_dim, bs); - copy_size -= max_copy_size; - src.offset += max_copy_size; - dst.offset += max_copy_size; - } + for (int i = 0; i < surf.logical_level0_px.a; i++) { + blorp_copy(batch, + &src_blorp_surf, 0, i, + &dst_blorp_surf, 0, i, 0, 0, 0, 0, + surf.logical_level0_px.w, + surf.logical_level0_px.h); + } - /* Now make a max-width copy */ - uint64_t height = copy_size / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * bs; - do_buffer_copy(batch, &src, &dst, max_surface_dim, height, bs); - copy_size -= rect_copy_size; - src.offset += rect_copy_size; - dst.offset += rect_copy_size; - } - - /* Finally, make a small copy to finish it off */ - if (copy_size != 0) { - do_buffer_copy(batch, &src, &dst, copy_size / bs, 1, bs); + size -= surf.size_B; + src_blorp_surf.addr.offset += surf.size_B; + dst_blorp_surf.addr.offset += surf.size_B; } } diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 06e1351cc3e..6ba343dda81 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -3468,6 +3468,105 @@ isl_surf_init_s(const struct isl_device *dev, return true; } +/* Returns divisor+1 if divisor >= num. */ +static int64_t +find_next_divisor(int64_t divisor, int64_t num) +{ + if (divisor >= num) { + return divisor + 1; + } else { + while (num % ++divisor != 0); + return divisor; + } +} + +/* Return an extent which holds at most the given number of tiles and has a + * minimum array length. + */ +static struct isl_extent4d +get_2d_array_extent(const struct isl_device *isl_dev, + const struct isl_tile_info *tile_info, int64_t max_tiles) +{ + int max_surface_dim = 1 << (ISL_GFX_VER(isl_dev) >= 7 ? 14 : 13); + int max_array_len = 2048; + + for (int64_t tiles = max_tiles; tiles > 0; tiles--) { + for (int array_len = 1; array_len <= MIN2(tiles, max_array_len); + array_len = find_next_divisor(array_len, tiles)) { + int64_t layer_tiles = tiles / array_len; + for (int64_t h_tl = 1; h_tl <= layer_tiles; + h_tl = find_next_divisor(h_tl, layer_tiles)) { + int64_t w_tl = layer_tiles / h_tl; + int64_t w_el = w_tl * tile_info->logical_extent_el.w; + int64_t h_el = h_tl * tile_info->logical_extent_el.h; + + if (w_el > max_surface_dim) + continue; + + if (h_el > max_surface_dim) + continue; + + /* SurfaceQPitch must be multiple of 4. */ + if (array_len > 1 && h_el % 4 != 0) + continue; + + return isl_extent4d(w_el, h_el, 1, array_len); + } + } + } + + unreachable("extent not found for given number of tiles."); +} + +void +isl_surf_from_mem(const struct isl_device *isl_dev, + struct isl_surf *surf, + int64_t offset, + int64_t mem_size_B, + enum isl_tiling tiling) +{ + /* Get the surface format. */ + const struct isl_format_layout *fmtl; + switch (ffs(offset | mem_size_B)) { + default: fmtl = isl_format_get_layout(ISL_FORMAT_R32G32B32A32_UINT); break; + case 4: fmtl = isl_format_get_layout(ISL_FORMAT_R32G32_UINT); break; + case 3: fmtl = isl_format_get_layout(ISL_FORMAT_R32_UINT); break; + case 2: fmtl = isl_format_get_layout(ISL_FORMAT_R16_UINT); break; + case 1: fmtl = isl_format_get_layout(ISL_FORMAT_R8_UINT); break; + } + + /* Get the surface extent. */ + struct isl_tile_info tile_info; + isl_tiling_get_info(tiling, ISL_SURF_DIM_2D, ISL_MSAA_LAYOUT_NONE, + fmtl->bpb, 1 /* samples */, &tile_info); + int tile_size_B = tile_info.phys_extent_B.w * tile_info.phys_extent_B.h; + int64_t max_tiles = mem_size_B / tile_size_B; + struct isl_extent4d extent = + get_2d_array_extent(isl_dev, &tile_info, max_tiles); + + /* Create the surface. */ + isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_RENDER_TARGET_BIT | + ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT; + ASSERTED bool ok = isl_surf_init(isl_dev, surf, + .dim = ISL_SURF_DIM_2D, + .format = fmtl->format, + .width = extent.w, + .height = extent.h, + .depth = extent.d, + .levels = 1, + .array_len = extent.a, + .samples = 1, + .row_pitch_B = extent.w * fmtl->bpb / 8, + .usage = usage, + .tiling_flags = 1 << tiling); + assert(ok); + if (extent.a > 1) + assert(surf->array_pitch_el_rows == extent.h); + assert(surf->size_B == surf->row_pitch_B * extent.h * extent.a); + assert(surf->size_B <= max_tiles * tile_size_B); +} + void isl_surf_get_tile_info(const struct isl_surf *surf, struct isl_tile_info *tile_info) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 2d5c8754dcc..c83b28cd412 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -2732,6 +2732,14 @@ isl_surf_init_s(const struct isl_device *dev, struct isl_surf *surf, const struct isl_surf_init_info *restrict info); +/* Return the largest surface possible for the specified memory range. */ +void +isl_surf_from_mem(const struct isl_device *isl_dev, + struct isl_surf *surf, + int64_t offset, + int64_t mem_size_B, + enum isl_tiling tiling); + void isl_surf_get_tile_info(const struct isl_surf *surf, struct isl_tile_info *tile_info); diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index e66fa0b4b18..b35f56c1703 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -723,24 +723,6 @@ void anv_CmdCopyImage2( end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done); } -static enum isl_format -isl_format_for_size(unsigned size_B) -{ - /* Prefer 32-bit per component formats for CmdFillBuffer */ - switch (size_B) { - case 1: return ISL_FORMAT_R8_UINT; - case 2: return ISL_FORMAT_R16_UINT; - case 3: return ISL_FORMAT_R8G8B8_UINT; - case 4: return ISL_FORMAT_R32_UINT; - case 6: return ISL_FORMAT_R16G16B16_UINT; - case 8: return ISL_FORMAT_R32G32_UINT; - case 12: return ISL_FORMAT_R32G32B32_UINT; - case 16: return ISL_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Unknown format size"); - } -} - static void copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, struct blorp_batch *batch, @@ -1150,24 +1132,6 @@ void anv_CmdBlitImage2( anv_blorp_batch_finish(&batch); } -/** - * Returns the greatest common divisor of a and b that is a power of two. - */ -static uint64_t -gcd_pow2_u64(uint64_t a, uint64_t b) -{ - assert(a > 0 || b > 0); - - unsigned a_log2 = ffsll(a) - 1; - unsigned b_log2 = ffsll(b) - 1; - - /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which - * case, the MIN2() will take the other one. If both are 0 then we will - * hit the assert above. - */ - return 1 << MIN2(a_log2, b_log2); -} - /* This is maximum possible width/height our HW can handle */ #define MAX_SURFACE_DIM (1ull << 14) @@ -1335,79 +1299,43 @@ anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer, VkDeviceSize size, uint32_t data) { - struct blorp_surf surf; - struct isl_surf isl_surf; - struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, cmd_buffer->state.current_pipeline == cmd_buffer->device->physical->gpgpu_pipeline_value ? BLORP_BATCH_USE_COMPUTE : 0); - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - uint64_t offset = address.offset; - bs = gcd_pow2_u64(bs, offset); - bs = gcd_pow2_u64(bs, size); - enum isl_format isl_format = isl_format_for_size(bs); - union isl_color_value color = { .u32 = { data, data, data, data }, }; - const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; - while (size >= max_fill_size) { - get_blorp_surf_for_anv_address(cmd_buffer, - (struct anv_address) { - .bo = address.bo, .offset = offset, - }, - MAX_SURFACE_DIM, MAX_SURFACE_DIM, - MAX_SURFACE_DIM * bs, isl_format, - true /* is_dest */, - &surf, &isl_surf); + isl_surf_usage_flags_t usage = + get_usage_flag_for_cmd_buffer(cmd_buffer, true /* is_dest */, + false /* is_depth */, address.protected); - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, + struct isl_surf isl_surf; + struct blorp_surf surf = { + .addr = { + .buffer = address.bo, + .offset = address.offset, + .mocs = anv_mocs(cmd_buffer->device, address.bo, usage), + }, + .surf = &isl_surf, + }; + + do { + isl_surf_from_mem(&cmd_buffer->device->isl_dev, &isl_surf, + surf.addr.offset, size, ISL_TILING_LINEAR); + + blorp_clear(&batch, &surf, isl_surf.format, ISL_SWIZZLE_IDENTITY, 0, 0, + isl_surf.logical_level0_px.a, 0, 0, + isl_surf.logical_level0_px.w, + isl_surf.logical_level0_px.h, color, 0 /* color_write_disable */); - size -= max_fill_size; - offset += max_fill_size; - } - uint64_t height = size / (MAX_SURFACE_DIM * bs); - assert(height < MAX_SURFACE_DIM); - if (height != 0) { - const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs; - get_blorp_surf_for_anv_address(cmd_buffer, - (struct anv_address) { - .bo = address.bo, .offset = offset, - }, - MAX_SURFACE_DIM, height, - MAX_SURFACE_DIM * bs, isl_format, - true /* is_dest */, - &surf, &isl_surf); - - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height, - color, 0 /* color_write_disable */); - size -= rect_fill_size; - offset += rect_fill_size; - } - - if (size != 0) { - const uint32_t width = size / bs; - get_blorp_surf_for_anv_address(cmd_buffer, - (struct anv_address) { - .bo = address.bo, .offset = offset, - }, - width, 1, width * bs, isl_format, - true /* is_dest */, &surf, &isl_surf); - - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - 0, 0, 1, 0, 0, width, 1, - color, 0 /* color_write_disable */); - } + size -= isl_surf.size_B; + surf.addr.offset += isl_surf.size_B; + } while (size != 0); anv_blorp_batch_finish(&batch); } diff --git a/src/intel/vulkan_hasvk/anv_blorp.c b/src/intel/vulkan_hasvk/anv_blorp.c index 50b262ad991..0554521e671 100644 --- a/src/intel/vulkan_hasvk/anv_blorp.c +++ b/src/intel/vulkan_hasvk/anv_blorp.c @@ -762,24 +762,6 @@ void anv_CmdBlitImage2( anv_blorp_batch_finish(&batch); } -/** - * Returns the greatest common divisor of a and b that is a power of two. - */ -static uint64_t -gcd_pow2_u64(uint64_t a, uint64_t b) -{ - assert(a > 0 || b > 0); - - unsigned a_log2 = ffsll(a) - 1; - unsigned b_log2 = ffsll(b) - 1; - - /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which - * case, the MIN2() will take the other one. If both are 0 then we will - * hit the assert above. - */ - return 1 << MIN2(a_log2, b_log2); -} - /* This is maximum possible width/height our HW can handle */ #define MAX_SURFACE_DIM (1ull << 14) @@ -898,8 +880,6 @@ void anv_CmdFillBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct blorp_surf surf; - struct isl_surf isl_surf; struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, 0); @@ -916,62 +896,34 @@ void anv_CmdFillBuffer( */ fillSize &= ~3ull; - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - bs = gcd_pow2_u64(bs, dstOffset); - bs = gcd_pow2_u64(bs, fillSize); - enum isl_format isl_format = isl_format_for_size(bs); - union isl_color_value color = { .u32 = { data, data, data, data }, }; - const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; - while (fillSize >= max_fill_size) { - get_blorp_surf_for_anv_buffer(cmd_buffer->device, - dst_buffer, dstOffset, - MAX_SURFACE_DIM, MAX_SURFACE_DIM, - MAX_SURFACE_DIM * bs, isl_format, true, - &surf, &isl_surf); + struct isl_surf isl_surf; + struct blorp_surf surf = { + .addr = { + .buffer = dst_buffer->address.bo, + .offset = dst_buffer->address.offset + dstOffset, + .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo, + ISL_SURF_USAGE_RENDER_TARGET_BIT), + }, + .surf = &isl_surf, + }; - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, + do { + isl_surf_from_mem(&cmd_buffer->device->isl_dev, &isl_surf, + surf.addr.offset, fillSize, ISL_TILING_LINEAR); + + blorp_clear(&batch, &surf, isl_surf.format, ISL_SWIZZLE_IDENTITY, 0, 0, + isl_surf.logical_level0_px.a, 0, 0, + isl_surf.logical_level0_px.w, + isl_surf.logical_level0_px.h, color, 0 /* color_write_disable */); - fillSize -= max_fill_size; - dstOffset += max_fill_size; - } - uint64_t height = fillSize / (MAX_SURFACE_DIM * bs); - assert(height < MAX_SURFACE_DIM); - if (height != 0) { - const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs; - get_blorp_surf_for_anv_buffer(cmd_buffer->device, - dst_buffer, dstOffset, - MAX_SURFACE_DIM, height, - MAX_SURFACE_DIM * bs, isl_format, true, - &surf, &isl_surf); - - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height, - color, 0 /* color_write_disable */); - fillSize -= rect_fill_size; - dstOffset += rect_fill_size; - } - - if (fillSize != 0) { - const uint32_t width = fillSize / bs; - get_blorp_surf_for_anv_buffer(cmd_buffer->device, - dst_buffer, dstOffset, - width, 1, - width * bs, isl_format, true, - &surf, &isl_surf); - - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - 0, 0, 1, 0, 0, width, 1, - color, 0 /* color_write_disable */); - } + fillSize -= isl_surf.size_B; + surf.addr.offset += isl_surf.size_B; + } while (fillSize != 0); anv_blorp_batch_finish(&batch);