blorp: Work around sampler overfetch for buffer copies

First, the surface dimensions are used to determine the range of valid
pages that the data in the buffer overlaps, then rows are removed from
the surface until it does not overfetch into any neighboring pages. If
any rows were removed, an extra BTI is set up with a texel buffer that
views the contents of all the rows that were removed, and the shader is
compiled with a branch to sample the last rows through the texel buffer
instead of the main surface.

Using the texel buffer allows it to access the last rows without dealing
with overfetch or weird alignment hacks, and restricting texel buffer
usage to just the part of the surface that can't be accessed safely
ensures that we don't significantly impact performance for any buffer to
image copy that is unlucky enough to be close to a page boundry.

Co-authored-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40149>
This commit is contained in:
Calder Young 2026-03-20 16:48:55 -07:00 committed by Marge Bot
parent fd7c094f7b
commit 64b5823d33
12 changed files with 314 additions and 83 deletions

View file

@ -447,5 +447,6 @@ genX(crocus_init_blorp)(struct crocus_context *ice)
blorp_init_elk(&ice->blorp, ice, &screen->isl_dev, screen->compiler, NULL);
ice->blorp.lookup_shader = crocus_blorp_lookup_shader;
ice->blorp.upload_shader = crocus_blorp_upload_shader;
ice->blorp.get_surface_address = blorp_get_surface_address;
ice->blorp.exec = crocus_blorp_exec;
}

View file

@ -513,6 +513,7 @@ genX(init_blorp)(struct iris_context *ice)
#endif
ice->blorp.lookup_shader = iris_blorp_lookup_shader;
ice->blorp.upload_shader = iris_blorp_upload_shader;
ice->blorp.get_surface_address = blorp_get_surface_address;
ice->blorp.exec = iris_blorp_exec;
ice->blorp.enable_tbimr = screen->driconf.enable_tbimr;
}

View file

@ -253,6 +253,26 @@ blorp_surface_info_init(struct blorp_batch *batch,
info->surf.phys_level0_sa.w += surf->tile_x_sa;
info->surf.phys_level0_sa.h += surf->tile_y_sa;
}
if (blorp->isl_dev->requires_padding && !is_dest &&
(batch->flags & BLORP_BATCH_SRC_UNPADDED)) {
blorp_assert_is_buffer(info->surf, info->view);
/* Infers the page boundaries for a buffer to image copy based on the
* surface address and dimensions, following Vulkan semantics to
* determine the extent of the final row.
*/
uint64_t size_B =
(uint64_t) info->surf.phys_level0_sa.w *
(isl_format_get_layout(info->view.format)->bpb / 8) +
(uint64_t) (info->surf.phys_level0_sa.h - 1) *
info->surf.row_pitch_B;
uint64_t mask = blorp->isl_dev->info->mem_alignment - 1;
uint64_t address = batch->blorp->get_surface_address(batch, info->addr);
info->page_base = address & ~mask;
info->page_limit = (address + size_B + mask) & ~mask;
}
}

View file

@ -79,6 +79,20 @@ enum blorp_dynamic_state {
BLORP_DYNAMIC_STATE_COUNT,
};
struct blorp_address {
void *buffer;
int64_t offset;
unsigned reloc_flags;
uint32_t mocs;
/**
* True if this buffer is intended to live in device-local memory.
* This is only a performance hint; it's OK to set it to true even
* if eviction has temporarily forced the buffer to system memory.
*/
bool local_hint;
};
struct blorp_context {
void *driver_ctx;
@ -105,6 +119,8 @@ struct blorp_context {
const void *prog_data,
uint32_t prog_data_size,
uint32_t *kernel_out, void *prog_data_out);
uint64_t (*get_surface_address)(struct blorp_batch *batch,
struct blorp_address addr);
void (*exec)(struct blorp_batch *batch, const struct blorp_params *params);
struct blorp_config config;
@ -155,6 +171,10 @@ enum blorp_batch_flags {
* Mostly for debug
*/
BLORP_BATCH_DISABLE_VF_DISTRIBUTION = BITFIELD_BIT(6),
/** Source buffer is unpadded and needs careful accesses
*/
BLORP_BATCH_SRC_UNPADDED = BITFIELD_BIT(7),
};
struct blorp_batch {
@ -186,20 +206,6 @@ blorp_batch_isl_copy_usage(const struct blorp_batch *batch, bool is_dest,
return usage;
}
struct blorp_address {
void *buffer;
int64_t offset;
unsigned reloc_flags;
uint32_t mocs;
/**
* True if this buffer is intended to live in device-local memory.
* This is only a performance hint; it's OK to set it to true even
* if eviction has temporarily forced the buffer to system memory.
*/
bool local_hint;
};
static inline bool
blorp_address_is_null(struct blorp_address address)
{

View file

@ -45,6 +45,8 @@ struct blorp_blit_vars {
nir_variable *v_src_offset;
nir_variable *v_dst_offset;
nir_variable *v_src_inv_size;
nir_variable *v_src_buffer_first_row;
nir_variable *v_src_buffer_row_pitch;
};
static void
@ -60,6 +62,8 @@ blorp_blit_vars_init(nir_builder *b, struct blorp_blit_vars *v)
LOAD_INPUT(src_offset, glsl_vector_type(GLSL_TYPE_UINT, 2))
LOAD_INPUT(dst_offset, glsl_vector_type(GLSL_TYPE_UINT, 2))
LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2))
LOAD_INPUT(src_buffer_first_row, glsl_uint_type())
LOAD_INPUT(src_buffer_row_pitch, glsl_uint_type())
#undef LOAD_INPUT
}
@ -224,6 +228,47 @@ blorp_nir_txf(nir_builder *b, struct blorp_blit_vars *v,
return &tex->def;
}
/* Same as blorp_nir_txf, except the last few rows may be loaded from a texel
* buffer bound to BLORP_TEXBUF_BT_INDEX instead to avoid page faults due to
* an unaligned source.
*/
static nir_def *
blorp_nir_txf_buf(nir_builder *b, struct blorp_blit_vars *v,
nir_def *pos, nir_alu_type dst_type,
const struct intel_device_info *devinfo)
{
nir_def *buf_start = nir_load_var(b, v->v_src_buffer_first_row);
/* Just use if statements, non uniform texture access is expensive */
nir_push_if(b, nir_ilt(b, nir_channel(b, pos, 1), buf_start));
nir_def *tex = blorp_nir_txf(b, v, pos, dst_type, devinfo);
nir_push_else(b, NULL);
/* Get the offset into the buffer if we're beyond src_buffer_first_row */
pos = nir_vec2(b,
nir_iadd(b,
nir_imul(b,
nir_isub(b,
nir_channel(b, pos, 1),
buf_start),
nir_load_var(b, v->v_src_buffer_row_pitch)),
nir_channel(b, pos, 0)),
nir_imm_int(b, 0));
nir_tex_instr *buf =
blorp_create_nir_tex_instr(b, v, nir_texop_txf, pos, 1, dst_type, devinfo);
buf->texture_index = BLORP_TEXBUF_BT_INDEX;
buf->sampler_dim = GLSL_SAMPLER_DIM_BUF;
nir_builder_instr_insert(b, &buf->instr);
nir_pop_if(b, NULL);
return nir_if_phi(b, tex, &buf->def);
}
static nir_def *
blorp_nir_txf_ms(nir_builder *b, struct blorp_blit_vars *v,
nir_def *pos, nir_alu_type dst_type,
@ -1322,6 +1367,7 @@ blorp_build_nir_shader(struct blorp_context *blorp,
case BLORP_FILTER_NONE:
case BLORP_FILTER_NEAREST:
case BLORP_FILTER_SAMPLE_0:
assert(!key->need_src_buffer || key->src_samples == 1);
/* We're going to use texelFetch, so we need integers */
if (src_pos->num_components == 2) {
src_pos = nir_f2i32(&b, src_pos);
@ -1364,7 +1410,9 @@ blorp_build_nir_shader(struct blorp_context *blorp,
* the texturing unit, will cause data to be read from the correct
* memory location. So we can fetch the texel now.
*/
if (key->src_samples == 1) {
if (key->need_src_buffer) {
color = blorp_nir_txf_buf(&b, &v, src_pos, key->texture_data_type, devinfo);
} else if (key->src_samples == 1) {
color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type, devinfo);
} else {
color = blorp_nir_txf_ms(&b, &v, src_pos, key->texture_data_type, devinfo);
@ -1373,6 +1421,7 @@ blorp_build_nir_shader(struct blorp_context *blorp,
case BLORP_FILTER_BILINEAR:
assert(!key->src_tiled_w);
assert(!key->need_src_buffer);
assert(key->tex_samples == key->src_samples);
assert(key->tex_layout == key->src_layout);
@ -1389,6 +1438,7 @@ blorp_build_nir_shader(struct blorp_context *blorp,
case BLORP_FILTER_MIN_SAMPLE:
case BLORP_FILTER_MAX_SAMPLE:
assert(!key->src_tiled_w);
assert(!key->need_src_buffer);
assert(key->tex_samples == key->src_samples);
assert(key->tex_layout == key->src_layout);
@ -2011,6 +2061,48 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev,
info->surf.format = info->view.format = red_format;
}
/**
* Converts the overfetching part of a linear 2D surface to a 1D buffer, this
* is part of a workaround for performing buffer-to-image-copies when source
* straddles an extra page due to a misaligned sampler cache.
*/
static inline void
blorp_surf_convert_overfetch_to_buffer(struct blorp_batch *batch,
struct blorp_surface_info *info)
{
const struct isl_device *isl_dev = batch->blorp->isl_dev;
blorp_assert_is_buffer(info->surf, info->view);
assert(isl_format_block_is_1x1x1(info->view.format));
uint64_t address = batch->blorp->get_surface_address(batch, info->addr);
uint64_t max_size_B = info->page_limit - address;
uint64_t overfetch_B =
isl_surf_get_sampler_overfetch_size_B(isl_dev, &info->surf, &info->view);
if (overfetch_B > max_size_B) {
uint32_t rows = (uint32_t) DIV_ROUND_UP(overfetch_B - max_size_B,
info->surf.row_pitch_B);
/* We could overflow the subtraction below in some cases */
rows = MIN2(rows, info->surf.logical_level0_px.h);
info->buffer = true;
info->buffer_rows = rows;
info->surf.logical_level0_px.h -= rows;
info->surf.phys_level0_sa.h -= rows;
info->surf.size_B -= rows * info->surf.row_pitch_B;
if (info->surf.logical_level0_px.h == 0) {
info->surf.size_B = 0;
return;
}
assert(isl_surf_get_sampler_overfetch_size_B(isl_dev,
&info->surf, &info->view) <= max_size_B);
}
}
enum blit_shrink_status {
BLIT_NO_SHRINK = 0,
BLIT_SRC_WIDTH_SHRINK = (1 << 0),
@ -2359,6 +2451,21 @@ try_blorp_blit(struct blorp_batch *batch,
key->use_kill = true;
}
if (batch->blorp->isl_dev->requires_padding &&
(batch->flags & BLORP_BATCH_SRC_UNPADDED)) {
params->src.view.usage |= ISL_SURF_USAGE_NO_ARRAY_OVERFETCH_BIT;
blorp_surf_convert_overfetch_to_buffer(batch, &params->src);
}
key->need_src_buffer = params->src.buffer;
if (key->need_src_buffer) {
params->wm_inputs.blit.src_buffer_first_row =
params->src.surf.logical_level0_px.h;
params->wm_inputs.blit.src_buffer_row_pitch =
params->src.surf.row_pitch_B /
(isl_format_get_layout(params->src.view.format)->bpb / 8);
}
if (compute) {
if (!blorp_get_blit_kernel_cs(batch, params, key))
return 0;
@ -2434,9 +2541,11 @@ shrink_surface_params(const struct isl_device *dev,
struct blorp_surface_info *info,
double *x0, double *x1, double *y0, double *y1)
{
uint64_t offset_B;
uint64_t start_offset_B;
uint64_t end_offset_B;
uint32_t x_offset_sa, y_offset_sa, size;
struct isl_extent2d px_size_sa;
struct isl_extent4d surf_size_sa;
int adjust;
blorp_surf_convert_to_single_slice(dev, info);
@ -2449,19 +2558,28 @@ shrink_surface_params(const struct isl_device *dev,
*/
x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
surf_size_sa = (struct isl_extent4d) {
.w = (uint32_t)ceil(*x1) * px_size_sa.w + info->tile_x_sa,
.h = (uint32_t)ceil(*y1) * px_size_sa.h + info->tile_y_sa,
.d = 1,
.a = 1,
};
uint32_t tile_z_sa, tile_a;
isl_tiling_get_intratile_offset_sa(info->surf.tiling, info->surf.dim,
info->surf.msaa_layout,
info->surf.format, info->surf.samples,
info->surf.row_pitch_B,
info->surf.array_pitch_el_rows,
x_offset_sa, y_offset_sa, 0, 0,
&offset_B,
&info->tile_x_sa, &info->tile_y_sa,
&tile_z_sa, &tile_a);
isl_tiling_get_intratile_range_sa(info->surf.tiling, info->surf.dim,
info->surf.msaa_layout,
info->surf.format, info->surf.samples,
info->surf.row_pitch_B,
info->surf.array_pitch_el_rows,
x_offset_sa, y_offset_sa, 0, 0,
surf_size_sa,
&start_offset_B,
&end_offset_B,
&info->tile_x_sa, &info->tile_y_sa,
&tile_z_sa, &tile_a);
assert(tile_z_sa == 0 && tile_a == 0);
info->addr.offset += offset_B;
info->addr.offset += start_offset_B;
adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0;
*x0 += adjust;
@ -2481,6 +2599,7 @@ shrink_surface_params(const struct isl_device *dev,
info->surf.logical_level0_px.height = size;
info->surf.phys_level0_sa.height = size * px_size_sa.h;
info->surf.size_B = end_offset_B - start_offset_B;
info->surf.usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT;
/* Stomp the 64B alignment because we set NO_OVERFETCH_PADDING_BIT */
@ -3085,7 +3204,8 @@ blorp_copy_get_formats(const struct isl_device *isl_dev,
static int
get_max_format_scale(const struct isl_device *isl_dev,
const struct blorp_surface_info *info,
uint32_t x, uint32_t width, uint32_t height)
uint32_t x, uint32_t width, uint32_t height,
bool unpadded)
{
const bool full_width = u_minify(info->surf.logical_level0_px.width,
info->view.base_level) == width;
@ -3168,9 +3288,10 @@ get_max_format_scale(const struct isl_device *isl_dev,
continue;
}
if (!(info->view.usage & ISL_SURF_USAGE_TEXTURE_BIT)) {
/* All surface types except for textures need their row pitch aligned
* to the pixel block size.
if (!(info->view.usage & ISL_SURF_USAGE_TEXTURE_BIT) ||
(isl_dev->requires_padding && unpadded)) {
/* All surface types except for padded textures need their row pitch
* aligned to the pixel block size.
*/
if (info->surf.row_pitch_B * 8 % max_bpb)
continue;
@ -3336,9 +3457,11 @@ blorp_copy(struct blorp_batch *batch,
dst_width = src_width * src_fmtl->bpb / dst_fmtl->bpb;
int max_fmt_scale_src = get_max_format_scale(isl_dev, &params.src, src_x,
src_width, src_height);
src_width, src_height,
batch->flags & BLORP_BATCH_SRC_UNPADDED);
int max_fmt_scale_dst = get_max_format_scale(isl_dev, &params.dst, dst_x,
dst_width, dst_height);
dst_width, dst_height,
false);
int copy_fmt_bpb = MIN2(src_fmtl->bpb * max_fmt_scale_src,
dst_fmtl->bpb * max_fmt_scale_dst);

View file

@ -108,10 +108,6 @@ static void
blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
struct blorp_address address, uint32_t delta);
static uint64_t
blorp_get_surface_address(struct blorp_batch *batch,
struct blorp_address address);
#if GFX_VER < 10
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch);
@ -1241,11 +1237,11 @@ blorp_emit_surface_state(struct blorp_batch *batch,
.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
.aux_format = surface->aux_format,
.address =
blorp_get_surface_address(batch, surface->addr),
batch->blorp->get_surface_address(batch, surface->addr),
.aux_address = !use_aux_address ? 0 :
blorp_get_surface_address(batch, surface->aux_addr),
batch->blorp->get_surface_address(batch, surface->aux_addr),
.clear_address = !use_clear_address ? 0 :
blorp_get_surface_address(batch, op_clear_addr),
batch->blorp->get_surface_address(batch, op_clear_addr),
.mocs = surface->addr.mocs,
.clear_color = surface->clear_color,
.use_clear_address = use_clear_address);
@ -1287,6 +1283,45 @@ blorp_emit_surface_state(struct blorp_batch *batch,
blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
}
/**
* Emits the remaining rows of the 2D linear surface as a texel buffer, this
* is part of a workaround for performing buffer to image copies when the
* surface is straddling an extra page due to a misaligned sampler cache.
*/
static void
blorp_emit_buffer_surface_state(struct blorp_batch *batch,
const struct blorp_surface_info *surface,
void *state, uint32_t state_offset)
{
blorp_assert_is_buffer(surface->surf, surface->view);
assert(isl_format_block_is_1x1x1(surface->view.format));
const struct isl_device *isl_dev = batch->blorp->isl_dev;
struct blorp_address buffer_addr = surface->addr;
buffer_addr.offset +=
surface->surf.row_pitch_B * surface->surf.logical_level0_px.h;
uint32_t element_size_B =
isl_format_get_layout(surface->view.format)->bpb / 8;
uint64_t surface_size_B =
(uint64_t) surface->surf.row_pitch_B * (surface->buffer_rows - 1) +
surface->surf.logical_level0_px.w * element_size_B;
isl_buffer_fill_state(isl_dev, state,
.address =
batch->blorp->get_surface_address(batch, buffer_addr),
.size_B = surface_size_B,
.stride_B = element_size_B,
.format = surface->view.format,
.swizzle = surface->view.swizzle,
.mocs = surface->addr.mocs,
.usage = surface->surf.usage | surface->view.usage);
blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
buffer_addr, 0);
}
static void
blorp_emit_null_surface_state(struct blorp_batch *batch,
const struct blorp_surface_info *surface,
@ -1295,8 +1330,8 @@ blorp_emit_null_surface_state(struct blorp_batch *batch,
struct GENX(RENDER_SURFACE_STATE) ss = {
.SurfaceType = SURFTYPE_NULL,
.SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,
.Width = surface->surf.logical_level0_px.width - 1,
.Height = surface->surf.logical_level0_px.height - 1,
.Width = MAX2(surface->surf.logical_level0_px.width, 1) - 1,
.Height = MAX2(surface->surf.logical_level0_px.height, 1) - 1,
.MIPCountLOD = surface->view.base_level,
.MinimumArrayElement = surface->view.base_array_layer,
.Depth = surface->view.array_len - 1,
@ -1329,7 +1364,7 @@ blorp_setup_binding_table(struct blorp_batch *batch,
if (params->use_pre_baked_binding_table) {
bind_offset = params->pre_baked_binding_table_offset;
} else {
unsigned num_surfaces = 1 + params->src.enabled;
unsigned num_surfaces = 1 + params->src.enabled + params->src.buffer;
if (!blorp_alloc_binding_table(batch, num_surfaces,
isl_dev->ss.size, isl_dev->ss.align,
&bind_offset, surface_offsets, surface_maps))
@ -1350,11 +1385,23 @@ blorp_setup_binding_table(struct blorp_batch *batch,
}
if (params->src.enabled) {
blorp_emit_surface_state(batch, &params->src,
params->fast_clear_op,
surface_maps[BLORP_TEXTURE_BT_INDEX],
surface_offsets[BLORP_TEXTURE_BT_INDEX],
0, false);
if (params->src.surf.size_B != 0) {
blorp_emit_surface_state(batch, &params->src,
params->fast_clear_op,
surface_maps[BLORP_TEXTURE_BT_INDEX],
surface_offsets[BLORP_TEXTURE_BT_INDEX],
0, false);
} else {
/* Nothing to do, the entire surface got converted to a buffer */
blorp_emit_null_surface_state(batch, &params->src,
surface_maps[BLORP_TEXTURE_BT_INDEX]);
}
if (params->src.buffer) {
blorp_emit_buffer_surface_state(batch, &params->src,
surface_maps[BLORP_TEXBUF_BT_INDEX],
surface_offsets[BLORP_TEXBUF_BT_INDEX]);
}
}
}

View file

@ -107,10 +107,6 @@ static void
blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
struct blorp_address address, uint32_t delta);
static uint64_t
blorp_get_surface_address(struct blorp_batch *batch,
struct blorp_address address);
#if GFX_VER >= 7
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch);
@ -1445,6 +1441,7 @@ blorp_emit_surface_state(struct blorp_batch *batch,
uint8_t color_write_disable,
bool is_render_target)
{
assert(!surface->buffer);
const struct isl_device *isl_dev = batch->blorp->isl_dev;
struct isl_surf surf = surface->surf;
@ -1483,9 +1480,9 @@ blorp_emit_surface_state(struct blorp_batch *batch,
.surf = &surf, .view = &surface->view,
.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
.address =
blorp_get_surface_address(batch, surface->addr),
batch->blorp->get_surface_address(batch, surface->addr),
.aux_address = !use_aux_address ? 0 :
blorp_get_surface_address(batch, surface->aux_addr),
batch->blorp->get_surface_address(batch, surface->aux_addr),
.mocs = surface->addr.mocs,
.clear_color = surface->clear_color,
.write_disables = write_disable_mask);

View file

@ -75,6 +75,7 @@ struct blorp_compiler {
enum {
BLORP_RENDERBUFFER_BT_INDEX,
BLORP_TEXTURE_BT_INDEX,
BLORP_TEXBUF_BT_INDEX,
BLORP_NUM_BT_ENTRIES
};
@ -84,9 +85,16 @@ struct blorp_surface_info
{
bool enabled;
/* Should we unpack the last few rows using a texel buffer? */
bool buffer;
uint32_t buffer_rows;
struct isl_surf surf;
struct blorp_address addr;
/* Inferred page boundaries of the surface address */
uint64_t page_base, page_limit;
struct isl_surf aux_surf;
struct blorp_address aux_addr;
enum isl_aux_usage aux_usage;
@ -182,6 +190,9 @@ struct blorp_wm_inputs_blit
/* (1/width, 1/height) for the source surface */
float src_inv_size[2];
uint32_t src_buffer_first_row;
uint32_t src_buffer_row_pitch;
/* Minimum layer setting works for all the textures types but texture_3d
* for which the setting has no effect. Use the z-coordinate instead.
*/
@ -204,7 +215,7 @@ struct blorp_wm_inputs
/* Note: Pad out to an integral number of registers when extending, but
* make sure subgroup_id is the last 32-bit item.
*/
uint32_t pad[4];
uint32_t pad[2];
uint32_t subgroup_id;
};
@ -434,6 +445,12 @@ struct blorp_blit_prog_key
*/
bool need_src_offset;
/* True if this blit operation is unpacking the last few rows of the 2D image
* from a 1D buffer. This is part of a workaround for performing buffer-to-image
* copies when the source is straddling an extra page due to a misaligned cache.
*/
bool need_src_buffer;
/* True if this blit operation may involve intratile offsets on the
* destination. In this case, we need to add the offset to gl_FragCoord.
*/
@ -580,6 +597,20 @@ blorp_op_type_is_clear(enum blorp_op op)
}
}
/* Asserts unless the surface is a buffer to image copy */
#define blorp_assert_is_buffer(surf, view) \
do { \
assert((surf).dim == ISL_SURF_DIM_2D); \
assert((surf).tiling == ISL_TILING_LINEAR); \
assert((surf).logical_level0_px.d == 1); \
assert((surf).logical_level0_px.array_len == 1); \
assert((surf).samples == 1); \
assert((surf).levels == 1); \
UNUSED const struct isl_format_layout *fmtl = \
isl_format_get_layout((view).format); \
assert((surf).row_pitch_B % (fmtl->bpb / 8) == 0); \
} while (false)
/** \} */
#ifdef __cplusplus

View file

@ -110,6 +110,17 @@ get_fp64_nir(struct blorp_context *context)
return device->fp64_nir;
}
static uint64_t
blorp_get_surface_address(struct blorp_batch *blorp_batch,
struct blorp_address address)
{
struct anv_address anv_addr = {
.bo = address.buffer,
.offset = address.offset,
};
return anv_address_physical(anv_addr);
}
void
anv_device_init_blorp(struct anv_device *device)
{
@ -126,6 +137,7 @@ anv_device_init_blorp(struct anv_device *device)
device->blorp.context.lookup_shader = lookup_blorp_shader;
device->blorp.context.upload_shader = upload_blorp_shader;
device->blorp.context.enable_tbimr = device->physical->instance->enable_tbimr;
device->blorp.context.get_surface_address = blorp_get_surface_address;
device->blorp.context.exec = anv_genX(device->info, blorp_exec);
device->blorp.context.upload_dynamic_state = upload_dynamic_state;
@ -853,7 +865,7 @@ void anv_CmdCopyBufferToImage2(
anv_cmd_require_rcs(cmd_buffer, blorp_execute_on_companion) {
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0);
anv_blorp_batch_init(cmd_buffer, &batch, BLORP_BATCH_SRC_UNPADDED);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
@ -1183,9 +1195,10 @@ anv_cmd_copy_addr(struct anv_cmd_buffer *cmd_buffer,
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch,
cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0);
BLORP_BATCH_SRC_UNPADDED |
(cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0));
copy_memory(device, &batch, src_addr, dst_addr, size);
@ -1203,9 +1216,10 @@ void anv_CmdCopyBuffer2(
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch,
cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0);
BLORP_BATCH_SRC_UNPADDED |
(cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0));
for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
@ -1230,9 +1244,10 @@ anv_cmd_buffer_update_addr(
{
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch,
cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0);
BLORP_BATCH_SRC_UNPADDED |
(cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0));
/* We can't quite grab a full block because the state stream needs a
* little data at the top to build its linked list.

View file

@ -95,17 +95,6 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
anv_batch_set_error(&cmd_buffer->batch, result);
}
static uint64_t
blorp_get_surface_address(struct blorp_batch *blorp_batch,
struct blorp_address address)
{
struct anv_address anv_addr = {
.bo = address.buffer,
.offset = address.offset,
};
return anv_address_physical(anv_addr);
}
#if GFX_VER == 9
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch)

View file

@ -84,6 +84,14 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
return true;
}
static uint64_t
blorp_get_surface_address(struct blorp_batch *blorp_batch,
struct blorp_address address)
{
/* We'll let blorp_surface_reloc write the address. */
return 0;
}
void
anv_device_init_blorp(struct anv_device *device)
{
@ -93,6 +101,7 @@ anv_device_init_blorp(struct anv_device *device)
device->physical->compiler, &config);
device->blorp.lookup_shader = lookup_blorp_shader;
device->blorp.upload_shader = upload_blorp_shader;
device->blorp.get_surface_address = blorp_get_surface_address;
switch (device->info->verx10) {
case 70:
device->blorp.exec = gfx7_blorp_exec;

View file

@ -100,14 +100,6 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
write_reloc(cmd_buffer->device, dest, address_u64, false);
}
static uint64_t
blorp_get_surface_address(struct blorp_batch *blorp_batch,
struct blorp_address address)
{
/* We'll let blorp_surface_reloc write the address. */
return 0;
}
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch)
{