OPTIONAL: iris: Perform BLORP buffer barriers outside of iris_blorp_exec() hook.

The iris_blorp_exec() hook needs to be executed under a single
indivisible sync region, which means that in cases where we need to
emit a PIPE_CONTROL for a buffer barrier we won't be able to track the
subsequent commands separately from the previous commands, which will
prevent us from optimizing out subsequent PIPE_CONTROLs if we
encounter the same buffers again.  In particular I've encountered this
situation in some SynMark test-cases which perform lots of BLORP
operations with the same buffer bound as both source and destination
(in order to generate mipmaps): In such a scenario if the source
requires flushing we'd also end up flushing for the destination
redundantly, even though a single PIPE_CONTROL would have been
sufficient.

This avoids a 4.5% FPS regression in SynMark OglHdrBloom and a 3.5%
FPS regression in SynMark OglMultithread.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3875>
This commit is contained in:
Francisco Jerez 2020-05-06 15:40:30 -07:00 committed by Marge Bot
parent 4b00338bde
commit 8252bb0ec6
4 changed files with 24 additions and 15 deletions

View file

@ -382,6 +382,7 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
iris_resource_prepare_access(ice, src_res, info->src.level, 1,
info->src.box.z, info->src.box.depth,
src_aux_usage, src_clear_supported);
iris_emit_buffer_barrier_for(batch, src_res->bo, IRIS_DOMAIN_OTHER_READ);
struct iris_format_info dst_fmt =
iris_format_for_usage(devinfo, info->dst.format,
@ -401,6 +402,7 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
iris_resource_prepare_access(ice, dst_res, info->dst.level, 1,
info->dst.box.z, info->dst.box.depth,
dst_aux_usage, dst_clear_supported);
iris_emit_buffer_barrier_for(batch, dst_res->bo, IRIS_DOMAIN_RENDER_WRITE);
float src_x0 = info->src.box.x;
float src_x1 = info->src.box.x + info->src.box.width;
@ -527,9 +529,11 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
iris_resource_prepare_access(ice, src_res, info->src.level, 1,
info->src.box.z, info->src.box.depth,
stc_src_aux_usage, false);
iris_emit_buffer_barrier_for(batch, src_res->bo, IRIS_DOMAIN_OTHER_READ);
iris_resource_prepare_access(ice, stc_dst, info->dst.level, 1,
info->dst.box.z, info->dst.box.depth,
stc_dst_aux_usage, false);
iris_emit_buffer_barrier_for(batch, stc_dst->bo, IRIS_DOMAIN_RENDER_WRITE);
iris_blorp_surf_for_resource(&screen->isl_dev, &src_surf,
&src_res->base, stc_src_aux_usage,
info->src.level, false);
@ -664,6 +668,11 @@ iris_copy_region(struct blorp_context *blorp,
.reloc_flags = EXEC_OBJECT_WRITE,
};
iris_emit_buffer_barrier_for(batch, iris_resource_bo(src),
IRIS_DOMAIN_OTHER_READ);
iris_emit_buffer_barrier_for(batch, iris_resource_bo(dst),
IRIS_DOMAIN_RENDER_WRITE);
iris_batch_maybe_flush(batch, 1500);
iris_batch_sync_region_start(batch);
@ -687,6 +696,11 @@ iris_copy_region(struct blorp_context *blorp,
dstz, src_box->depth,
dst_aux_usage, dst_clear_supported);
iris_emit_buffer_barrier_for(batch, iris_resource_bo(src),
IRIS_DOMAIN_OTHER_READ);
iris_emit_buffer_barrier_for(batch, iris_resource_bo(dst),
IRIS_DOMAIN_RENDER_WRITE);
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
for (int slice = 0; slice < src_box->depth; slice++) {

View file

@ -274,27 +274,17 @@ iris_blorp_exec(struct blorp_batch *blorp_batch,
PIPE_CONTROL_STALL_AT_SCOREBOARD);
#endif
/* Flush the sampler and render caches. We definitely need to flush the
* sampler cache so that we get updated contents from the render cache for
* the glBlitFramebuffer() source. Also, we are sometimes warned in the
* docs to flush the cache between reinterpretations of the same surface
* data with different formats, which blorp does for stencil and depth
* data.
/* Flush the render cache in cases where the same surface is reinterpreted
* with a differernt format, which blorp does for stencil and depth data
* among other things. Invalidation of sampler caches and flushing of any
* caches which had previously written the source surfaces should already
* have been handled by the caller.
*/
if (params->src.enabled)
iris_emit_buffer_barrier_for(batch, params->src.addr.buffer,
IRIS_DOMAIN_OTHER_READ);
if (params->dst.enabled) {
iris_cache_flush_for_render(batch, params->dst.addr.buffer,
params->dst.view.format,
params->dst.aux_usage);
}
if (params->depth.enabled)
iris_emit_buffer_barrier_for(batch, params->depth.addr.buffer,
IRIS_DOMAIN_DEPTH_WRITE);
if (params->stencil.enabled)
iris_emit_buffer_barrier_for(batch, params->stencil.addr.buffer,
IRIS_DOMAIN_DEPTH_WRITE);
iris_require_command_space(batch, 1400);

View file

@ -378,6 +378,7 @@ clear_color(struct iris_context *ice,
iris_resource_prepare_render(ice, batch, res, level,
box->z, box->depth, aux_usage);
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE);
struct blorp_surf surf;
iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
@ -596,6 +597,7 @@ clear_depth_stencil(struct iris_context *ice,
if (clear_depth && z_res) {
iris_resource_prepare_depth(ice, batch, z_res, level, box->z, box->depth);
iris_emit_buffer_barrier_for(batch, z_res->bo, IRIS_DOMAIN_DEPTH_WRITE);
iris_blorp_surf_for_resource(&batch->screen->isl_dev,
&z_surf, &z_res->base, z_res->aux.usage,
level, true);
@ -605,6 +607,8 @@ clear_depth_stencil(struct iris_context *ice,
if (stencil_mask) {
iris_resource_prepare_access(ice, stencil_res, level, 1, box->z,
box->depth, stencil_res->aux.usage, false);
iris_emit_buffer_barrier_for(batch, stencil_res->bo,
IRIS_DOMAIN_DEPTH_WRITE);
iris_blorp_surf_for_resource(&batch->screen->isl_dev,
&stencil_surf, &stencil_res->base,
stencil_res->aux.usage, level, true);

View file

@ -450,6 +450,7 @@ iris_mcs_partial_resolve(struct iris_context *ice,
struct blorp_surf surf;
iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
&res->base, res->aux.usage, 0, true);
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE);
struct blorp_batch blorp_batch;
iris_batch_sync_region_start(batch);