intel: Add and use isl_surf_from_mem()

Unify code which creates surfaces from buffers. The behavior is slightly
changed to use array layers to enable arrayed buffer clears (as needed).

Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33776>
This commit is contained in:
Nanley Chery 2025-03-02 10:44:35 -08:00 committed by Marge Bot
parent 9a21ac2730
commit fcdae4d4c0
5 changed files with 175 additions and 272 deletions

View file

@ -3081,122 +3081,38 @@ blorp_copy(struct blorp_batch *batch,
do_blorp_blit(batch, &params, &key, &coords); do_blorp_blit(batch, &params, &key, &coords);
} }
static enum isl_format
isl_format_for_size(unsigned size_B)
{
switch (size_B) {
case 1: return ISL_FORMAT_R8_UINT;
case 2: return ISL_FORMAT_R8G8_UINT;
case 4: return ISL_FORMAT_R8G8B8A8_UINT;
case 8: return ISL_FORMAT_R16G16B16A16_UINT;
case 16: return ISL_FORMAT_R32G32B32A32_UINT;
default:
unreachable("Not a power-of-two format size");
}
}
/**
* Returns the greatest common divisor of a and b that is a power of two.
*/
static uint64_t
gcd_pow2_u64(uint64_t a, uint64_t b)
{
assert(a > 0 || b > 0);
unsigned a_log2 = ffsll(a) - 1;
unsigned b_log2 = ffsll(b) - 1;
/* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
* case, the MIN2() will take the other one. If both are 0 then we will
* hit the assert above.
*/
return 1 << MIN2(a_log2, b_log2);
}
static void
do_buffer_copy(struct blorp_batch *batch,
struct blorp_address *src,
struct blorp_address *dst,
int width, int height, int block_size)
{
/* The actual format we pick doesn't matter as blorp will throw it away.
* The only thing that actually matters is the size.
*/
enum isl_format format = isl_format_for_size(block_size);
UNUSED bool ok;
struct isl_surf surf;
ok = isl_surf_init(batch->blorp->isl_dev, &surf,
.dim = ISL_SURF_DIM_2D,
.format = format,
.width = width,
.height = height,
.depth = 1,
.levels = 1,
.array_len = 1,
.samples = 1,
.row_pitch_B = width * block_size,
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
ISL_SURF_USAGE_RENDER_TARGET_BIT,
.tiling_flags = ISL_TILING_LINEAR_BIT);
assert(ok);
struct blorp_surf src_blorp_surf = {
.surf = &surf,
.addr = *src,
};
struct blorp_surf dst_blorp_surf = {
.surf = &surf,
.addr = *dst,
};
blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
0, 0, 0, 0, width, height);
}
void void
blorp_buffer_copy(struct blorp_batch *batch, blorp_buffer_copy(struct blorp_batch *batch,
struct blorp_address src, struct blorp_address src,
struct blorp_address dst, struct blorp_address dst,
uint64_t size) uint64_t size)
{ {
const struct intel_device_info *devinfo = batch->blorp->isl_dev->info; struct isl_surf surf;
uint64_t copy_size = size; struct blorp_surf src_blorp_surf = {
.surf = &surf,
.addr = src,
};
/* This is maximum possible width/height our HW can handle */ struct blorp_surf dst_blorp_surf = {
uint64_t max_surface_dim = 1 << (devinfo->ver >= 7 ? 14 : 13); .surf = &surf,
.addr = dst,
};
/* First, we compute the biggest format that can be used with the while (size != 0) {
* given offsets and size. isl_surf_from_mem(batch->blorp->isl_dev, &surf,
*/ src_blorp_surf.addr.offset |
int bs = 16; dst_blorp_surf.addr.offset, size, ISL_TILING_LINEAR);
bs = gcd_pow2_u64(bs, src.offset);
bs = gcd_pow2_u64(bs, dst.offset);
bs = gcd_pow2_u64(bs, size);
/* First, we make a bunch of max-sized copies */ for (int i = 0; i < surf.logical_level0_px.a; i++) {
uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; blorp_copy(batch,
while (copy_size >= max_copy_size) { &src_blorp_surf, 0, i,
do_buffer_copy(batch, &src, &dst, max_surface_dim, max_surface_dim, bs); &dst_blorp_surf, 0, i, 0, 0, 0, 0,
copy_size -= max_copy_size; surf.logical_level0_px.w,
src.offset += max_copy_size; surf.logical_level0_px.h);
dst.offset += max_copy_size; }
}
/* Now make a max-width copy */ size -= surf.size_B;
uint64_t height = copy_size / (max_surface_dim * bs); src_blorp_surf.addr.offset += surf.size_B;
assert(height < max_surface_dim); dst_blorp_surf.addr.offset += surf.size_B;
if (height != 0) {
uint64_t rect_copy_size = height * max_surface_dim * bs;
do_buffer_copy(batch, &src, &dst, max_surface_dim, height, bs);
copy_size -= rect_copy_size;
src.offset += rect_copy_size;
dst.offset += rect_copy_size;
}
/* Finally, make a small copy to finish it off */
if (copy_size != 0) {
do_buffer_copy(batch, &src, &dst, copy_size / bs, 1, bs);
} }
} }

View file

@ -3468,6 +3468,105 @@ isl_surf_init_s(const struct isl_device *dev,
return true; return true;
} }
/* Returns divisor+1 if divisor >= num. */
static int64_t
find_next_divisor(int64_t divisor, int64_t num)
{
if (divisor >= num) {
return divisor + 1;
} else {
while (num % ++divisor != 0);
return divisor;
}
}
/* Return an extent which holds at most the given number of tiles and has a
* minimum array length.
*/
static struct isl_extent4d
get_2d_array_extent(const struct isl_device *isl_dev,
const struct isl_tile_info *tile_info, int64_t max_tiles)
{
int max_surface_dim = 1 << (ISL_GFX_VER(isl_dev) >= 7 ? 14 : 13);
int max_array_len = 2048;
for (int64_t tiles = max_tiles; tiles > 0; tiles--) {
for (int array_len = 1; array_len <= MIN2(tiles, max_array_len);
array_len = find_next_divisor(array_len, tiles)) {
int64_t layer_tiles = tiles / array_len;
for (int64_t h_tl = 1; h_tl <= layer_tiles;
h_tl = find_next_divisor(h_tl, layer_tiles)) {
int64_t w_tl = layer_tiles / h_tl;
int64_t w_el = w_tl * tile_info->logical_extent_el.w;
int64_t h_el = h_tl * tile_info->logical_extent_el.h;
if (w_el > max_surface_dim)
continue;
if (h_el > max_surface_dim)
continue;
/* SurfaceQPitch must be multiple of 4. */
if (array_len > 1 && h_el % 4 != 0)
continue;
return isl_extent4d(w_el, h_el, 1, array_len);
}
}
}
unreachable("extent not found for given number of tiles.");
}
void
isl_surf_from_mem(const struct isl_device *isl_dev,
struct isl_surf *surf,
int64_t offset,
int64_t mem_size_B,
enum isl_tiling tiling)
{
/* Get the surface format. */
const struct isl_format_layout *fmtl;
switch (ffs(offset | mem_size_B)) {
default: fmtl = isl_format_get_layout(ISL_FORMAT_R32G32B32A32_UINT); break;
case 4: fmtl = isl_format_get_layout(ISL_FORMAT_R32G32_UINT); break;
case 3: fmtl = isl_format_get_layout(ISL_FORMAT_R32_UINT); break;
case 2: fmtl = isl_format_get_layout(ISL_FORMAT_R16_UINT); break;
case 1: fmtl = isl_format_get_layout(ISL_FORMAT_R8_UINT); break;
}
/* Get the surface extent. */
struct isl_tile_info tile_info;
isl_tiling_get_info(tiling, ISL_SURF_DIM_2D, ISL_MSAA_LAYOUT_NONE,
fmtl->bpb, 1 /* samples */, &tile_info);
int tile_size_B = tile_info.phys_extent_B.w * tile_info.phys_extent_B.h;
int64_t max_tiles = mem_size_B / tile_size_B;
struct isl_extent4d extent =
get_2d_array_extent(isl_dev, &tile_info, max_tiles);
/* Create the surface. */
isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT |
ISL_SURF_USAGE_RENDER_TARGET_BIT |
ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT;
ASSERTED bool ok = isl_surf_init(isl_dev, surf,
.dim = ISL_SURF_DIM_2D,
.format = fmtl->format,
.width = extent.w,
.height = extent.h,
.depth = extent.d,
.levels = 1,
.array_len = extent.a,
.samples = 1,
.row_pitch_B = extent.w * fmtl->bpb / 8,
.usage = usage,
.tiling_flags = 1 << tiling);
assert(ok);
if (extent.a > 1)
assert(surf->array_pitch_el_rows == extent.h);
assert(surf->size_B == surf->row_pitch_B * extent.h * extent.a);
assert(surf->size_B <= max_tiles * tile_size_B);
}
void void
isl_surf_get_tile_info(const struct isl_surf *surf, isl_surf_get_tile_info(const struct isl_surf *surf,
struct isl_tile_info *tile_info) struct isl_tile_info *tile_info)

View file

@ -2732,6 +2732,14 @@ isl_surf_init_s(const struct isl_device *dev,
struct isl_surf *surf, struct isl_surf *surf,
const struct isl_surf_init_info *restrict info); const struct isl_surf_init_info *restrict info);
/* Return the largest surface possible for the specified memory range. */
void
isl_surf_from_mem(const struct isl_device *isl_dev,
struct isl_surf *surf,
int64_t offset,
int64_t mem_size_B,
enum isl_tiling tiling);
void void
isl_surf_get_tile_info(const struct isl_surf *surf, isl_surf_get_tile_info(const struct isl_surf *surf,
struct isl_tile_info *tile_info); struct isl_tile_info *tile_info);

View file

@ -723,24 +723,6 @@ void anv_CmdCopyImage2(
end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done); end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
} }
static enum isl_format
isl_format_for_size(unsigned size_B)
{
/* Prefer 32-bit per component formats for CmdFillBuffer */
switch (size_B) {
case 1: return ISL_FORMAT_R8_UINT;
case 2: return ISL_FORMAT_R16_UINT;
case 3: return ISL_FORMAT_R8G8B8_UINT;
case 4: return ISL_FORMAT_R32_UINT;
case 6: return ISL_FORMAT_R16G16B16_UINT;
case 8: return ISL_FORMAT_R32G32_UINT;
case 12: return ISL_FORMAT_R32G32B32_UINT;
case 16: return ISL_FORMAT_R32G32B32A32_UINT;
default:
unreachable("Unknown format size");
}
}
static void static void
copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
struct blorp_batch *batch, struct blorp_batch *batch,
@ -1150,24 +1132,6 @@ void anv_CmdBlitImage2(
anv_blorp_batch_finish(&batch); anv_blorp_batch_finish(&batch);
} }
/**
* Returns the greatest common divisor of a and b that is a power of two.
*/
static uint64_t
gcd_pow2_u64(uint64_t a, uint64_t b)
{
assert(a > 0 || b > 0);
unsigned a_log2 = ffsll(a) - 1;
unsigned b_log2 = ffsll(b) - 1;
/* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
* case, the MIN2() will take the other one. If both are 0 then we will
* hit the assert above.
*/
return 1 << MIN2(a_log2, b_log2);
}
/* This is maximum possible width/height our HW can handle */ /* This is maximum possible width/height our HW can handle */
#define MAX_SURFACE_DIM (1ull << 14) #define MAX_SURFACE_DIM (1ull << 14)
@ -1335,79 +1299,43 @@ anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
VkDeviceSize size, VkDeviceSize size,
uint32_t data) uint32_t data)
{ {
struct blorp_surf surf;
struct isl_surf isl_surf;
struct blorp_batch batch; struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, anv_blorp_batch_init(cmd_buffer, &batch,
cmd_buffer->state.current_pipeline == cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ? cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0); BLORP_BATCH_USE_COMPUTE : 0);
/* First, we compute the biggest format that can be used with the
* given offsets and size.
*/
int bs = 16;
uint64_t offset = address.offset;
bs = gcd_pow2_u64(bs, offset);
bs = gcd_pow2_u64(bs, size);
enum isl_format isl_format = isl_format_for_size(bs);
union isl_color_value color = { union isl_color_value color = {
.u32 = { data, data, data, data }, .u32 = { data, data, data, data },
}; };
const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; isl_surf_usage_flags_t usage =
while (size >= max_fill_size) { get_usage_flag_for_cmd_buffer(cmd_buffer, true /* is_dest */,
get_blorp_surf_for_anv_address(cmd_buffer, false /* is_depth */, address.protected);
(struct anv_address) {
.bo = address.bo, .offset = offset,
},
MAX_SURFACE_DIM, MAX_SURFACE_DIM,
MAX_SURFACE_DIM * bs, isl_format,
true /* is_dest */,
&surf, &isl_surf);
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, struct isl_surf isl_surf;
0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, struct blorp_surf surf = {
.addr = {
.buffer = address.bo,
.offset = address.offset,
.mocs = anv_mocs(cmd_buffer->device, address.bo, usage),
},
.surf = &isl_surf,
};
do {
isl_surf_from_mem(&cmd_buffer->device->isl_dev, &isl_surf,
surf.addr.offset, size, ISL_TILING_LINEAR);
blorp_clear(&batch, &surf, isl_surf.format, ISL_SWIZZLE_IDENTITY, 0, 0,
isl_surf.logical_level0_px.a, 0, 0,
isl_surf.logical_level0_px.w,
isl_surf.logical_level0_px.h,
color, 0 /* color_write_disable */); color, 0 /* color_write_disable */);
size -= max_fill_size;
offset += max_fill_size;
}
uint64_t height = size / (MAX_SURFACE_DIM * bs); size -= isl_surf.size_B;
assert(height < MAX_SURFACE_DIM); surf.addr.offset += isl_surf.size_B;
if (height != 0) { } while (size != 0);
const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
get_blorp_surf_for_anv_address(cmd_buffer,
(struct anv_address) {
.bo = address.bo, .offset = offset,
},
MAX_SURFACE_DIM, height,
MAX_SURFACE_DIM * bs, isl_format,
true /* is_dest */,
&surf, &isl_surf);
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
color, 0 /* color_write_disable */);
size -= rect_fill_size;
offset += rect_fill_size;
}
if (size != 0) {
const uint32_t width = size / bs;
get_blorp_surf_for_anv_address(cmd_buffer,
(struct anv_address) {
.bo = address.bo, .offset = offset,
},
width, 1, width * bs, isl_format,
true /* is_dest */, &surf, &isl_surf);
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
0, 0, 1, 0, 0, width, 1,
color, 0 /* color_write_disable */);
}
anv_blorp_batch_finish(&batch); anv_blorp_batch_finish(&batch);
} }

View file

@ -762,24 +762,6 @@ void anv_CmdBlitImage2(
anv_blorp_batch_finish(&batch); anv_blorp_batch_finish(&batch);
} }
/**
* Returns the greatest common divisor of a and b that is a power of two.
*/
static uint64_t
gcd_pow2_u64(uint64_t a, uint64_t b)
{
assert(a > 0 || b > 0);
unsigned a_log2 = ffsll(a) - 1;
unsigned b_log2 = ffsll(b) - 1;
/* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
* case, the MIN2() will take the other one. If both are 0 then we will
* hit the assert above.
*/
return 1 << MIN2(a_log2, b_log2);
}
/* This is maximum possible width/height our HW can handle */ /* This is maximum possible width/height our HW can handle */
#define MAX_SURFACE_DIM (1ull << 14) #define MAX_SURFACE_DIM (1ull << 14)
@ -898,8 +880,6 @@ void anv_CmdFillBuffer(
{ {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
struct blorp_surf surf;
struct isl_surf isl_surf;
struct blorp_batch batch; struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0); anv_blorp_batch_init(cmd_buffer, &batch, 0);
@ -916,62 +896,34 @@ void anv_CmdFillBuffer(
*/ */
fillSize &= ~3ull; fillSize &= ~3ull;
/* First, we compute the biggest format that can be used with the
* given offsets and size.
*/
int bs = 16;
bs = gcd_pow2_u64(bs, dstOffset);
bs = gcd_pow2_u64(bs, fillSize);
enum isl_format isl_format = isl_format_for_size(bs);
union isl_color_value color = { union isl_color_value color = {
.u32 = { data, data, data, data }, .u32 = { data, data, data, data },
}; };
const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; struct isl_surf isl_surf;
while (fillSize >= max_fill_size) { struct blorp_surf surf = {
get_blorp_surf_for_anv_buffer(cmd_buffer->device, .addr = {
dst_buffer, dstOffset, .buffer = dst_buffer->address.bo,
MAX_SURFACE_DIM, MAX_SURFACE_DIM, .offset = dst_buffer->address.offset + dstOffset,
MAX_SURFACE_DIM * bs, isl_format, true, .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
&surf, &isl_surf); ISL_SURF_USAGE_RENDER_TARGET_BIT),
},
.surf = &isl_surf,
};
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, do {
0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, isl_surf_from_mem(&cmd_buffer->device->isl_dev, &isl_surf,
surf.addr.offset, fillSize, ISL_TILING_LINEAR);
blorp_clear(&batch, &surf, isl_surf.format, ISL_SWIZZLE_IDENTITY, 0, 0,
isl_surf.logical_level0_px.a, 0, 0,
isl_surf.logical_level0_px.w,
isl_surf.logical_level0_px.h,
color, 0 /* color_write_disable */); color, 0 /* color_write_disable */);
fillSize -= max_fill_size;
dstOffset += max_fill_size;
}
uint64_t height = fillSize / (MAX_SURFACE_DIM * bs); fillSize -= isl_surf.size_B;
assert(height < MAX_SURFACE_DIM); surf.addr.offset += isl_surf.size_B;
if (height != 0) { } while (fillSize != 0);
const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
get_blorp_surf_for_anv_buffer(cmd_buffer->device,
dst_buffer, dstOffset,
MAX_SURFACE_DIM, height,
MAX_SURFACE_DIM * bs, isl_format, true,
&surf, &isl_surf);
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
color, 0 /* color_write_disable */);
fillSize -= rect_fill_size;
dstOffset += rect_fill_size;
}
if (fillSize != 0) {
const uint32_t width = fillSize / bs;
get_blorp_surf_for_anv_buffer(cmd_buffer->device,
dst_buffer, dstOffset,
width, 1,
width * bs, isl_format, true,
&surf, &isl_surf);
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
0, 0, 1, 0, 0, width, 1,
color, 0 /* color_write_disable */);
}
anv_blorp_batch_finish(&batch); anv_blorp_batch_finish(&batch);