iris: Use separate dirty bits for UBO and SSBO flushes.

This moves UBO+SSBO flushing into a dirty bit separate from the one
used for image and sampler views, which saves some CPU overhead in the
frequent case where buffers from only one or the other set are updated.

Omitting this commit would lead to the following statistically
significant Piglit Draw Overhead regressions:

 107/DrawArrays (16 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:           XXX ±2.31% x22 -> XXX ±1.80% x21  d=-24.31% ±1.91%  p=0.00%
 78/DrawArrays ( 1 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:            XXX ±2.64% x22 -> XXX ±2.21% x21  d=-24.13% ±2.22%  p=0.00%
 45/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:          XXX ±2.53% x22 -> XXX ±1.90% x21  d=-23.63% ±2.07%  p=0.00%
 16/DrawElements ( 1 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:          XXX ±2.44% x22 -> XXX ±1.97% x21  d=-23.23% ±2.04%  p=0.00%
 108/DrawArrays (16 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change:          XXX ±2.10% x22 -> XXX ±1.50% x21  d=-22.15% ±1.71%  p=0.00%
 79/DrawArrays ( 1 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change:           XXX ±1.90% x22 -> XXX ±1.70% x21  d=-22.12% ±1.64%  p=0.00%
 17/DrawElements ( 1 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change:         XXX ±2.85% x22 -> XXX ±1.59% x21  d=-21.03% ±2.22%  p=0.00%
 46/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change:         XXX ±2.93% x22 -> XXX ±1.09% x21  d=-20.62% ±2.18%  p=0.00%
 7/DrawElements ( 1 VBO| 8 UBO|  8 Tex) w/ vertex attrib change:   XXX ±9.30% x22 -> XXX ±7.02% x21   d=-6.49% ±8.08%  p=1.19%
 68/DrawArrays ( 1 VBO| 8 UBO|  8 Tex) w/ shader program change:   XXX ±1.60% x22 -> XXX ±1.93% x21   d=-2.23% ±1.75%  p=0.01%
 6/DrawElements ( 1 VBO| 8 UBO|  8 Tex) w/ shader program change:  XXX ±2.90% x22 -> XXX ±2.71% x21   d=-2.04% ±2.78%  p=2.08%

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12691>
This commit is contained in:
Francisco Jerez 2020-05-29 16:36:23 -07:00 committed by Marge Bot
parent 5c44df011f
commit 8be320117b
4 changed files with 63 additions and 39 deletions

View file

@ -113,8 +113,11 @@ enum {
#define IRIS_DIRTY_RENDER_BUFFER (1ull << 30)
#define IRIS_DIRTY_STENCIL_REF (1ull << 31)
#define IRIS_DIRTY_VERTEX_BUFFER_FLUSHES (1ull << 32)
#define IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES (1ull << 33)
#define IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES (1ull << 34)
#define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
#define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES | \
IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES)
#define IRIS_ALL_DIRTY_FOR_RENDER (~IRIS_ALL_DIRTY_FOR_COMPUTE)
@ -1045,6 +1048,9 @@ void iris_predraw_resolve_inputs(struct iris_context *ice,
void iris_predraw_resolve_framebuffer(struct iris_context *ice,
struct iris_batch *batch,
bool *draw_aux_buffer_disabled);
void iris_predraw_flush_buffers(struct iris_context *ice,
struct iris_batch *batch,
gl_shader_stage stage);
void iris_postdraw_update_resolve_tracking(struct iris_context *ice,
struct iris_batch *batch);
void iris_cache_flush_for_render(struct iris_batch *batch,

View file

@ -293,6 +293,11 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
iris_predraw_resolve_framebuffer(ice, batch, draw_aux_buffer_disabled);
}
if (ice->state.dirty & IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES) {
for (gl_shader_stage stage = 0; stage < MESA_SHADER_COMPUTE; stage++)
iris_predraw_flush_buffers(ice, batch, stage);
}
iris_binder_reserve_3d(ice);
batch->screen->vtbl.update_surface_base_address(batch, &ice->state.binder);
@ -385,6 +390,9 @@ iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)
if (ice->state.dirty & IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
iris_predraw_resolve_inputs(ice, batch, NULL, MESA_SHADER_COMPUTE, false);
if (ice->state.dirty & IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES)
iris_predraw_flush_buffers(ice, batch, MESA_SHADER_COMPUTE);
iris_batch_maybe_flush(batch, 1500);
iris_update_compiled_compute_shader(ice);

View file

@ -151,34 +151,6 @@ resolve_image_views(struct iris_context *ice,
}
}
static void
flush_ubos(struct iris_batch *batch,
struct iris_shader_state *shs)
{
uint32_t cbufs = shs->bound_cbufs;
while (cbufs) {
const int i = u_bit_scan(&cbufs);
struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
struct iris_resource *res = (void *)cbuf->buffer;
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ);
}
}
static void
flush_ssbos(struct iris_batch *batch,
struct iris_shader_state *shs)
{
uint32_t ssbos = shs->bound_ssbos;
while (ssbos) {
const int i = u_bit_scan(&ssbos);
struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
struct iris_resource *res = (void *)ssbo->buffer;
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE);
}
}
/**
* \brief Resolve buffers before drawing.
*
@ -203,11 +175,7 @@ iris_predraw_resolve_inputs(struct iris_context *ice,
consider_framebuffer);
resolve_image_views(ice, batch, shs, info, draw_aux_buffer_disabled,
consider_framebuffer);
flush_ssbos(batch, shs);
}
if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage))
flush_ubos(batch, shs);
}
void
@ -403,6 +371,48 @@ iris_cache_flush_for_render(struct iris_batch *batch,
}
}
static void
flush_ubos(struct iris_batch *batch,
struct iris_shader_state *shs)
{
uint32_t cbufs = shs->bound_cbufs;
while (cbufs) {
const int i = u_bit_scan(&cbufs);
struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
struct iris_resource *res = (void *)cbuf->buffer;
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ);
}
}
static void
flush_ssbos(struct iris_batch *batch,
struct iris_shader_state *shs)
{
uint32_t ssbos = shs->bound_ssbos;
while (ssbos) {
const int i = u_bit_scan(&ssbos);
struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
struct iris_resource *res = (void *)ssbo->buffer;
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE);
}
}
void
iris_predraw_flush_buffers(struct iris_context *ice,
struct iris_batch *batch,
gl_shader_stage stage)
{
struct iris_shader_state *shs = &ice->state.shaders[stage];
if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage))
flush_ubos(batch, shs);
if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage))
flush_ssbos(batch, shs);
}
static void
iris_resolve_color(struct iris_context *ice,
struct iris_batch *batch,

View file

@ -3232,8 +3232,8 @@ iris_set_constant_buffer(struct pipe_context *ctx,
memcpy(map, input->user_buffer, input->buffer_size);
} else if (input->buffer) {
if (cbuf->buffer != input->buffer) {
ice->state.dirty |= (IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES);
ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES);
}
if (take_ownership) {
@ -3405,8 +3405,8 @@ iris_set_shader_buffers(struct pipe_context *ctx,
}
}
ice->state.dirty |= (IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES);
ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES);
ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
}
@ -7315,8 +7315,8 @@ iris_rebind_buffer(struct iris_context *ice,
if (res->bo == iris_resource_bo(cbuf->buffer)) {
pipe_resource_reference(&surf_state->res, NULL);
ice->state.dirty |= (IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES);
ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES);
ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << s;
}
}