mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-29 13:50:25 +01:00
nv50,nvc0: optimize coherent buffer checking at draw time
Instead of iterating over all the buffer resources looking for coherent buffers, we keep track of a context-wide count. This will save some iterations (and CPU cycles) in 99.99% case because usually coherent buffers are not so used. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
28dea26626
commit
53dddab78c
6 changed files with 82 additions and 68 deletions
|
|
@ -134,9 +134,11 @@ struct nv50_context {
|
|||
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
|
||||
uint16_t constbuf_dirty[3];
|
||||
uint16_t constbuf_valid[3];
|
||||
uint16_t constbuf_coherent[3];
|
||||
|
||||
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vtxbufs;
|
||||
uint32_t vtxbufs_coherent;
|
||||
struct pipe_index_buffer idxbuf;
|
||||
uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
|
||||
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
|
||||
|
|
@ -148,6 +150,7 @@ struct nv50_context {
|
|||
|
||||
struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_textures[3];
|
||||
uint32_t textures_coherent[3];
|
||||
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_samplers[3];
|
||||
|
||||
|
|
|
|||
|
|
@ -664,6 +664,17 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
|
|||
if (old)
|
||||
nv50_screen_tic_unlock(nv50->screen, old);
|
||||
|
||||
if (views[i] && views[i]->texture) {
|
||||
struct pipe_resource *res = views[i]->texture;
|
||||
if (res->target == PIPE_BUFFER &&
|
||||
(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
|
||||
nv50->textures_coherent[s] |= 1 << i;
|
||||
else
|
||||
nv50->textures_coherent[s] &= ~(1 << i);
|
||||
} else {
|
||||
nv50->textures_coherent[s] &= ~(1 << i);
|
||||
}
|
||||
|
||||
pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
|
||||
}
|
||||
|
||||
|
|
@ -847,13 +858,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
|||
nv50->constbuf[s][i].u.data = cb->user_buffer;
|
||||
nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
|
||||
nv50->constbuf_valid[s] |= 1 << i;
|
||||
nv50->constbuf_coherent[s] &= ~(1 << i);
|
||||
} else
|
||||
if (res) {
|
||||
nv50->constbuf[s][i].offset = cb->buffer_offset;
|
||||
nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
|
||||
nv50->constbuf_valid[s] |= 1 << i;
|
||||
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nv50->constbuf_coherent[s] |= 1 << i;
|
||||
else
|
||||
nv50->constbuf_coherent[s] &= ~(1 << i);
|
||||
} else {
|
||||
nv50->constbuf_valid[s] &= ~(1 << i);
|
||||
nv50->constbuf_coherent[s] &= ~(1 << i);
|
||||
}
|
||||
nv50->constbuf_dirty[s] |= 1 << i;
|
||||
|
||||
|
|
@ -1003,6 +1020,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
|
|||
if (!vb) {
|
||||
nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
|
||||
nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
|
||||
nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1015,9 +1033,16 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
|
|||
nv50->vbo_constant |= 1 << dst_index;
|
||||
else
|
||||
nv50->vbo_constant &= ~(1 << dst_index);
|
||||
nv50->vtxbufs_coherent &= ~(1 << dst_index);
|
||||
} else {
|
||||
nv50->vbo_user &= ~(1 << dst_index);
|
||||
nv50->vbo_constant &= ~(1 << dst_index);
|
||||
|
||||
if (vb[i].buffer &&
|
||||
vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nv50->vtxbufs_coherent |= (1 << dst_index);
|
||||
else
|
||||
nv50->vtxbufs_coherent &= ~(1 << dst_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -765,7 +765,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
bool tex_dirty = false;
|
||||
int i, s;
|
||||
int s;
|
||||
|
||||
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
|
||||
nv50->vb_elt_first = info->min_index + info->index_bias;
|
||||
|
|
@ -794,27 +794,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
|
||||
push->kick_notify = nv50_draw_vbo_kick_notify;
|
||||
|
||||
/* TODO: Instead of iterating over all the buffer resources looking for
|
||||
* coherent buffers, keep track of a context-wide count.
|
||||
*/
|
||||
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
|
||||
uint32_t valid = nv50->constbuf_valid[s];
|
||||
|
||||
while (valid && !nv50->cb_dirty) {
|
||||
const unsigned i = ffs(valid) - 1;
|
||||
struct pipe_resource *res;
|
||||
|
||||
valid &= ~(1 << i);
|
||||
if (nv50->constbuf[s][i].user)
|
||||
continue;
|
||||
|
||||
res = nv50->constbuf[s][i].u.buf;
|
||||
if (!res)
|
||||
continue;
|
||||
|
||||
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nv50->cb_dirty = true;
|
||||
}
|
||||
if (nv50->constbuf_coherent[s])
|
||||
nv50->cb_dirty = true;
|
||||
}
|
||||
|
||||
/* If there are any coherent constbufs, flush the cache */
|
||||
|
|
@ -825,15 +807,10 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
}
|
||||
|
||||
for (s = 0; s < 3 && !tex_dirty; ++s) {
|
||||
for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
|
||||
if (!nv50->textures[s][i] ||
|
||||
nv50->textures[s][i]->texture->target != PIPE_BUFFER)
|
||||
continue;
|
||||
if (nv50->textures[s][i]->texture->flags &
|
||||
PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
tex_dirty = true;
|
||||
}
|
||||
if (nv50->textures_coherent[s])
|
||||
tex_dirty = true;
|
||||
}
|
||||
|
||||
if (tex_dirty) {
|
||||
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, 0x20);
|
||||
|
|
@ -853,12 +830,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
PUSH_DATA (push, info->start_instance);
|
||||
}
|
||||
|
||||
for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) {
|
||||
if (!nv50->vtxbuf[i].buffer)
|
||||
continue;
|
||||
if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nv50->base.vbo_dirty = true;
|
||||
}
|
||||
nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent;
|
||||
|
||||
if (nv50->base.vbo_dirty) {
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
|
||||
|
|
|
|||
|
|
@ -134,10 +134,12 @@ struct nvc0_context {
|
|||
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
|
||||
uint16_t constbuf_dirty[6];
|
||||
uint16_t constbuf_valid[6];
|
||||
uint16_t constbuf_coherent[6];
|
||||
bool cb_dirty;
|
||||
|
||||
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vtxbufs;
|
||||
uint32_t vtxbufs_coherent;
|
||||
struct pipe_index_buffer idxbuf;
|
||||
uint32_t constant_vbos;
|
||||
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
|
||||
|
|
@ -149,6 +151,7 @@ struct nvc0_context {
|
|||
struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_textures[6];
|
||||
uint32_t textures_dirty[6];
|
||||
uint32_t textures_coherent[6];
|
||||
struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_samplers[6];
|
||||
uint16_t samplers_dirty[6];
|
||||
|
|
|
|||
|
|
@ -554,6 +554,17 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
|
|||
continue;
|
||||
nvc0->textures_dirty[s] |= 1 << i;
|
||||
|
||||
if (views[i] && views[i]->texture) {
|
||||
struct pipe_resource *res = views[i]->texture;
|
||||
if (res->target == PIPE_BUFFER &&
|
||||
(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
|
||||
nvc0->textures_coherent[s] |= 1 << i;
|
||||
else
|
||||
nvc0->textures_coherent[s] &= ~(1 << i);
|
||||
} else {
|
||||
nvc0->textures_coherent[s] &= ~(1 << i);
|
||||
}
|
||||
|
||||
if (old) {
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
|
||||
nvc0_screen_tic_unlock(nvc0->screen, old);
|
||||
|
|
@ -596,6 +607,17 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
|
|||
continue;
|
||||
nvc0->textures_dirty[s] |= 1 << i;
|
||||
|
||||
if (views[p] && views[p]->texture) {
|
||||
struct pipe_resource *res = views[p]->texture;
|
||||
if (res->target == PIPE_BUFFER &&
|
||||
(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
|
||||
nvc0->textures_coherent[s] |= 1 << i;
|
||||
else
|
||||
nvc0->textures_coherent[s] &= ~(1 << i);
|
||||
} else {
|
||||
nvc0->textures_coherent[s] &= ~(1 << i);
|
||||
}
|
||||
|
||||
if (nvc0->textures[s][i]) {
|
||||
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
nouveau_bufctx_reset(bctx, bin + i);
|
||||
|
|
@ -842,14 +864,20 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
|||
nvc0->constbuf[s][i].u.data = cb->user_buffer;
|
||||
nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
|
||||
nvc0->constbuf_valid[s] |= 1 << i;
|
||||
nvc0->constbuf_coherent[s] &= ~(1 << i);
|
||||
} else
|
||||
if (cb) {
|
||||
nvc0->constbuf[s][i].offset = cb->buffer_offset;
|
||||
nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
|
||||
nvc0->constbuf_valid[s] |= 1 << i;
|
||||
if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nvc0->constbuf_coherent[s] |= 1 << i;
|
||||
else
|
||||
nvc0->constbuf_coherent[s] &= ~(1 << i);
|
||||
}
|
||||
else {
|
||||
nvc0->constbuf_valid[s] &= ~(1 << i);
|
||||
nvc0->constbuf_coherent[s] &= ~(1 << i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1009,6 +1037,7 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
|
|||
if (!vb) {
|
||||
nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot);
|
||||
nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot);
|
||||
nvc0->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1021,9 +1050,16 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
|
|||
nvc0->constant_vbos |= 1 << dst_index;
|
||||
else
|
||||
nvc0->constant_vbos &= ~(1 << dst_index);
|
||||
nvc0->vtxbufs_coherent &= ~(1 << dst_index);
|
||||
} else {
|
||||
nvc0->vbo_user &= ~(1 << dst_index);
|
||||
nvc0->constant_vbos &= ~(1 << dst_index);
|
||||
|
||||
if (vb[i].buffer &&
|
||||
vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nvc0->vtxbufs_coherent |= (1 << dst_index);
|
||||
else
|
||||
nvc0->vtxbufs_coherent &= ~(1 << dst_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -871,7 +871,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
int i, s;
|
||||
int s;
|
||||
|
||||
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
|
||||
nvc0->vb_elt_first = info->min_index + info->index_bias;
|
||||
|
|
@ -922,27 +922,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
|
||||
push->kick_notify = nvc0_draw_vbo_kick_notify;
|
||||
|
||||
/* TODO: Instead of iterating over all the buffer resources looking for
|
||||
* coherent buffers, keep track of a context-wide count.
|
||||
*/
|
||||
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
|
||||
uint32_t valid = nvc0->constbuf_valid[s];
|
||||
|
||||
while (valid && !nvc0->cb_dirty) {
|
||||
const unsigned i = ffs(valid) - 1;
|
||||
struct pipe_resource *res;
|
||||
|
||||
valid &= ~(1 << i);
|
||||
if (nvc0->constbuf[s][i].user)
|
||||
continue;
|
||||
|
||||
res = nvc0->constbuf[s][i].u.buf;
|
||||
if (!res)
|
||||
continue;
|
||||
|
||||
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nvc0->cb_dirty = true;
|
||||
}
|
||||
if (nvc0->constbuf_coherent[s])
|
||||
nvc0->cb_dirty = true;
|
||||
}
|
||||
|
||||
if (nvc0->cb_dirty) {
|
||||
|
|
@ -951,14 +933,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
}
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
if (!nvc0->textures_coherent[s])
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
struct pipe_resource *res;
|
||||
if (!tic)
|
||||
continue;
|
||||
res = nvc0->textures[s][i]->texture;
|
||||
if (res->target != PIPE_BUFFER ||
|
||||
!(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
|
||||
if (!(nvc0->textures_coherent[s] & (1 << i)))
|
||||
continue;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
|
||||
|
|
@ -984,12 +964,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
PUSH_DATA (push, info->start_instance);
|
||||
}
|
||||
|
||||
for (i = 0; i < nvc0->num_vtxbufs && !nvc0->base.vbo_dirty; ++i) {
|
||||
if (!nvc0->vtxbuf[i].buffer)
|
||||
continue;
|
||||
if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nvc0->base.vbo_dirty = true;
|
||||
}
|
||||
nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent;
|
||||
|
||||
if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer &&
|
||||
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue