mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-03 00:00:25 +01:00
nvc0/vbo: wrap draw_vbo for multidraw performance
This patch is to avoid the high overhead that exists when trying to kick ever single draw during multidraw. glMultiDrawArrays performance profiling: 342.5 thousand draws/second -> 40 million draws/second Special thanks to Arthur Huillet for helping getting this profiled in irc. Signed-off-by: Yusuf Khan <yusisamerican@gmail.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> --- v2: fix typos pointed out by Arthur v3: nvc0_draw_vbo -> nvc0_draw_single_vbo, intialize count v4: remove num_draws from wrapped function and add mutex assert Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28641>
This commit is contained in:
parent
3fb1a64918
commit
225f2aac96
1 changed files with 144 additions and 115 deletions
|
|
@ -925,6 +925,126 @@ nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_draw_single_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
int s;
|
||||
|
||||
simple_mtx_assert_locked(&nvc0->screen->state_lock);
|
||||
|
||||
if (nvc0->vertprog->vp.need_draw_parameters && (!indirect || indirect->count_from_stream_output)) {
|
||||
PUSH_SPACE(push, 9);
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
|
||||
PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
|
||||
PUSH_DATA (push, info->index_size ? draws->index_bias : 0);
|
||||
PUSH_DATA (push, info->start_instance);
|
||||
PUSH_DATA (push, drawid_offset);
|
||||
}
|
||||
|
||||
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS &&
|
||||
nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) {
|
||||
nvc0->state.seamless_cube_map = nvc0->seamless_cube_map;
|
||||
PUSH_SPACE(push, 1);
|
||||
IMMED_NVC0(push, NVC0_3D(TEX_MISC),
|
||||
nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
|
||||
}
|
||||
|
||||
nvc0->base.kick_notify = nvc0_draw_vbo_kick_notify;
|
||||
|
||||
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
|
||||
if (nvc0->constbuf_coherent[s])
|
||||
nvc0->cb_dirty = true;
|
||||
}
|
||||
|
||||
if (nvc0->cb_dirty) {
|
||||
PUSH_SPACE(push, 1);
|
||||
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
|
||||
nvc0->cb_dirty = false;
|
||||
}
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
if (!nvc0->textures_coherent[s])
|
||||
continue;
|
||||
|
||||
PUSH_SPACE(push, nvc0->num_textures[s] * 2);
|
||||
|
||||
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
if (!(nvc0->textures_coherent[s] & (1 << i)))
|
||||
continue;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, (tic->id << 4) | 1);
|
||||
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nvc0->state.vbo_mode) {
|
||||
if (indirect && indirect->buffer)
|
||||
nvc0_push_vbo_indirect(nvc0, info, drawid_offset, indirect, &draws[0]);
|
||||
else
|
||||
nvc0_push_vbo(nvc0, info, indirect, &draws[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
/* space for base instance, flush, and prim restart */
|
||||
PUSH_SPACE(push, 8);
|
||||
|
||||
if (nvc0->state.instance_base != info->start_instance) {
|
||||
nvc0->state.instance_base = info->start_instance;
|
||||
/* NOTE: this does not affect the shader input, should it ? */
|
||||
BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1);
|
||||
PUSH_DATA (push, info->start_instance);
|
||||
}
|
||||
|
||||
nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent;
|
||||
|
||||
if (!nvc0->base.vbo_dirty && info->index_size && !info->has_user_indices &&
|
||||
info->index.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nvc0->base.vbo_dirty = true;
|
||||
|
||||
nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
|
||||
|
||||
if (nvc0->base.vbo_dirty) {
|
||||
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
|
||||
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
|
||||
nvc0->base.vbo_dirty = false;
|
||||
}
|
||||
|
||||
if (unlikely(indirect && indirect->buffer)) {
|
||||
nvc0_draw_indirect(nvc0, info, drawid_offset, indirect);
|
||||
} else
|
||||
if (unlikely(indirect && indirect->count_from_stream_output)) {
|
||||
nvc0_draw_stream_output(nvc0, info, indirect);
|
||||
} else
|
||||
if (info->index_size) {
|
||||
bool shorten = info->index_bounds_valid && info->max_index <= 65535;
|
||||
|
||||
if (info->primitive_restart && info->restart_index > 65535)
|
||||
shorten = false;
|
||||
|
||||
nvc0_draw_elements(nvc0, shorten, info,
|
||||
info->mode, draws[0].start, draws[0].count,
|
||||
info->instance_count, draws->index_bias, info->index_size);
|
||||
} else {
|
||||
nvc0_draw_arrays(nvc0,
|
||||
info->mode, draws[0].start, draws[0].count,
|
||||
info->instance_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* Thin wrapper to avoid kicking every 3 ns during multidraw */
|
||||
|
||||
void
|
||||
nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
|
|
@ -932,19 +1052,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
|
|||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws)
|
||||
{
|
||||
if (num_draws > 1) {
|
||||
util_draw_multi(pipe, info, drawid_offset, indirect, draws, num_draws);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!indirect && (!draws[0].count || !info->instance_count))
|
||||
return;
|
||||
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
unsigned vram_domain = NV_VRAM_DOMAIN(&screen->base);
|
||||
int s;
|
||||
unsigned count_total = 0;
|
||||
|
||||
/* The rest is copied straight from util_multi_draw
|
||||
*
|
||||
* XXX: Properly rewrite vbo handling in nvc0/nv50
|
||||
*
|
||||
*/
|
||||
|
||||
unsigned drawid = drawid_offset;
|
||||
|
||||
/* dont wanna start trippin */
|
||||
simple_mtx_lock(&nvc0->screen->state_lock);
|
||||
|
||||
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
|
||||
if (info->index_bounds_valid) {
|
||||
|
|
@ -957,12 +1081,17 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
|
|||
nvc0->instance_off = info->start_instance;
|
||||
nvc0->instance_max = info->instance_count - 1;
|
||||
|
||||
/* Get total amount of draw counts to determine whether to push or upload vertices */
|
||||
for (unsigned i = 0; i < num_draws; i++) {
|
||||
count_total += draws[i].count;
|
||||
}
|
||||
|
||||
/* For picking only a few vertices from a large user buffer, push is better,
|
||||
* if index count is larger and we expect repeated vertices, suggest upload.
|
||||
*/
|
||||
nvc0->vbo_push_hint =
|
||||
(!indirect || indirect->count_from_stream_output) && info->index_size &&
|
||||
(nvc0->vb_elt_limit >= (draws[0].count * 2));
|
||||
(nvc0->vb_elt_limit >= (count_total * 2));
|
||||
|
||||
if (nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))
|
||||
nvc0->constant_vbos = nvc0->vertex->constant_vbos & nvc0->vbo_user;
|
||||
|
|
@ -1028,115 +1157,15 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
|
|||
BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, vram_domain | NOUVEAU_BO_RD,
|
||||
screen->text);
|
||||
|
||||
simple_mtx_lock(&nvc0->screen->state_lock);
|
||||
|
||||
nvc0_state_validate_3d(nvc0, ~0);
|
||||
|
||||
if (nvc0->vertprog->vp.need_draw_parameters && (!indirect || indirect->count_from_stream_output)) {
|
||||
PUSH_SPACE(push, 9);
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
|
||||
PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
|
||||
PUSH_DATA (push, info->index_size ? draws->index_bias : 0);
|
||||
PUSH_DATA (push, info->start_instance);
|
||||
PUSH_DATA (push, drawid_offset);
|
||||
for (unsigned i = 0; i < num_draws; i++) {
|
||||
if (indirect || (draws[i].count && info->instance_count))
|
||||
nvc0_draw_single_vbo(pipe, info, drawid, indirect, &draws[i]);
|
||||
if (info->increment_draw_id)
|
||||
drawid++;
|
||||
}
|
||||
|
||||
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS &&
|
||||
nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) {
|
||||
nvc0->state.seamless_cube_map = nvc0->seamless_cube_map;
|
||||
PUSH_SPACE(push, 1);
|
||||
IMMED_NVC0(push, NVC0_3D(TEX_MISC),
|
||||
nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
|
||||
}
|
||||
|
||||
nvc0->base.kick_notify = nvc0_draw_vbo_kick_notify;
|
||||
|
||||
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
|
||||
if (nvc0->constbuf_coherent[s])
|
||||
nvc0->cb_dirty = true;
|
||||
}
|
||||
|
||||
if (nvc0->cb_dirty) {
|
||||
PUSH_SPACE(push, 1);
|
||||
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
|
||||
nvc0->cb_dirty = false;
|
||||
}
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
if (!nvc0->textures_coherent[s])
|
||||
continue;
|
||||
|
||||
PUSH_SPACE(push, nvc0->num_textures[s] * 2);
|
||||
|
||||
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
if (!(nvc0->textures_coherent[s] & (1 << i)))
|
||||
continue;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, (tic->id << 4) | 1);
|
||||
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nvc0->state.vbo_mode) {
|
||||
if (indirect && indirect->buffer)
|
||||
nvc0_push_vbo_indirect(nvc0, info, drawid_offset, indirect, &draws[0]);
|
||||
else
|
||||
nvc0_push_vbo(nvc0, info, indirect, &draws[0]);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* space for base instance, flush, and prim restart */
|
||||
PUSH_SPACE(push, 8);
|
||||
|
||||
if (nvc0->state.instance_base != info->start_instance) {
|
||||
nvc0->state.instance_base = info->start_instance;
|
||||
/* NOTE: this does not affect the shader input, should it ? */
|
||||
BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1);
|
||||
PUSH_DATA (push, info->start_instance);
|
||||
}
|
||||
|
||||
nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent;
|
||||
|
||||
if (!nvc0->base.vbo_dirty && info->index_size && !info->has_user_indices &&
|
||||
info->index.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nvc0->base.vbo_dirty = true;
|
||||
|
||||
nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
|
||||
|
||||
if (nvc0->base.vbo_dirty) {
|
||||
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
|
||||
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
|
||||
nvc0->base.vbo_dirty = false;
|
||||
}
|
||||
|
||||
if (unlikely(indirect && indirect->buffer)) {
|
||||
nvc0_draw_indirect(nvc0, info, drawid_offset, indirect);
|
||||
} else
|
||||
if (unlikely(indirect && indirect->count_from_stream_output)) {
|
||||
nvc0_draw_stream_output(nvc0, info, indirect);
|
||||
} else
|
||||
if (info->index_size) {
|
||||
bool shorten = info->index_bounds_valid && info->max_index <= 65535;
|
||||
|
||||
if (info->primitive_restart && info->restart_index > 65535)
|
||||
shorten = false;
|
||||
|
||||
nvc0_draw_elements(nvc0, shorten, info,
|
||||
info->mode, draws[0].start, draws[0].count,
|
||||
info->instance_count, draws->index_bias, info->index_size);
|
||||
} else {
|
||||
nvc0_draw_arrays(nvc0,
|
||||
info->mode, draws[0].start, draws[0].count,
|
||||
info->instance_count);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
PUSH_KICK(push);
|
||||
simple_mtx_unlock(&nvc0->screen->state_lock);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue