gallium: bypass u_vbuf if it's not needed (no fallbacks and no user VBOs)

This decreases CPU overhead, because u_vbuf is completely bypassed
in those cases.

Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Marek Olšák 2019-12-29 23:00:53 -05:00
parent 9f6020abc6
commit eb1e10d0be
7 changed files with 113 additions and 31 deletions

View file

@ -64,7 +64,10 @@ struct sampler_info
struct cso_context {
struct pipe_context *pipe;
struct cso_cache *cache;
struct u_vbuf *vbuf;
struct u_vbuf *vbuf_current;
bool always_use_vbuf;
boolean has_geometry_shader;
boolean has_tessellation;
@ -296,6 +299,8 @@ static void cso_init_vbuf(struct cso_context *cso, unsigned flags)
(uses_user_vertex_buffers &&
caps.fallback_only_for_user_vbuffers)) {
cso->vbuf = u_vbuf_create(cso->pipe, &caps);
cso->vbuf_current = cso->vbuf;
cso->always_use_vbuf = caps.fallback_always;
}
}
@ -1112,7 +1117,7 @@ cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
const struct pipe_vertex_element *states)
{
struct u_vbuf *vbuf = ctx->vbuf;
struct u_vbuf *vbuf = ctx->vbuf_current;
if (vbuf) {
u_vbuf_set_vertex_elements(vbuf, count, states);
@ -1126,7 +1131,7 @@ cso_set_vertex_elements(struct cso_context *ctx,
static void
cso_save_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
struct u_vbuf *vbuf = ctx->vbuf_current;
if (vbuf) {
u_vbuf_save_vertex_elements(vbuf);
@ -1140,7 +1145,7 @@ cso_save_vertex_elements(struct cso_context *ctx)
static void
cso_restore_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
struct u_vbuf *vbuf = ctx->vbuf_current;
if (vbuf) {
u_vbuf_restore_vertex_elements(vbuf);
@ -1181,7 +1186,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
struct u_vbuf *vbuf = ctx->vbuf;
struct u_vbuf *vbuf = ctx->vbuf_current;
if (!count)
return;
@ -1197,7 +1202,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
static void
cso_save_vertex_buffer0(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
struct u_vbuf *vbuf = ctx->vbuf_current;
if (vbuf) {
u_vbuf_save_vertex_buffer0(vbuf);
@ -1211,7 +1216,7 @@ cso_save_vertex_buffer0(struct cso_context *ctx)
static void
cso_restore_vertex_buffer0(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
struct u_vbuf *vbuf = ctx->vbuf_current;
if (vbuf) {
u_vbuf_restore_vertex_buffer0(vbuf);
@ -1222,6 +1227,68 @@ cso_restore_vertex_buffer0(struct cso_context *ctx)
pipe_vertex_buffer_unreference(&ctx->vertex_buffer0_saved);
}
/**
* Set vertex buffers and vertex elements. Skip u_vbuf if it's only needed
* for user vertex buffers and user vertex buffers are not set by this call.
* u_vbuf will be disabled. To re-enable u_vbuf, call this function again.
*
* Skipping u_vbuf decreases CPU overhead for draw calls that don't need it,
* such as VBOs, glBegin/End, and display lists.
*
* Internal operations that do "save states, draw, restore states" shouldn't
* use this, because the states are only saved in either cso_context or
* u_vbuf, not both.
*/
void
cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
unsigned velem_count,
const struct pipe_vertex_element *velems,
unsigned vb_count,
unsigned unbind_trailing_vb_count,
const struct pipe_vertex_buffer *vbuffers,
bool uses_user_vertex_buffers)
{
struct u_vbuf *vbuf = ctx->vbuf;
if (vbuf && (ctx->always_use_vbuf || uses_user_vertex_buffers)) {
if (!ctx->vbuf_current) {
/* Unbind all buffers in cso_context, because we'll use u_vbuf. */
unsigned unbind_vb_count = vb_count + unbind_trailing_vb_count;
if (unbind_vb_count)
cso_set_vertex_buffers_direct(ctx, 0, unbind_vb_count, NULL);
/* Unset this to make sure the CSO is re-bound on the next use. */
ctx->velements = NULL;
ctx->vbuf_current = vbuf;
} else if (unbind_trailing_vb_count) {
u_vbuf_set_vertex_buffers(vbuf, vb_count, unbind_trailing_vb_count,
NULL);
}
if (vb_count)
u_vbuf_set_vertex_buffers(vbuf, 0, vb_count, vbuffers);
u_vbuf_set_vertex_elements(vbuf, velem_count, velems);
return;
}
if (ctx->vbuf_current) {
/* Unbind all buffers in u_vbuf, because we'll use cso_context. */
unsigned unbind_vb_count = vb_count + unbind_trailing_vb_count;
if (unbind_vb_count)
u_vbuf_set_vertex_buffers(vbuf, 0, unbind_vb_count, NULL);
/* Unset this to make sure the CSO is re-bound on the next use. */
u_vbuf_unset_vertex_elements(vbuf);
ctx->vbuf_current = NULL;
} else if (unbind_trailing_vb_count) {
cso_set_vertex_buffers_direct(ctx, vb_count, unbind_trailing_vb_count,
NULL);
}
if (vb_count)
cso_set_vertex_buffers_direct(ctx, 0, vb_count, vbuffers);
cso_set_vertex_elements_direct(ctx, velem_count, velems);
}
void
cso_single_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage,
@ -1717,7 +1784,7 @@ void
cso_draw_vbo(struct cso_context *cso,
const struct pipe_draw_info *info)
{
struct u_vbuf *vbuf = cso->vbuf;
struct u_vbuf *vbuf = cso->vbuf_current;
/* We can't have both indirect drawing and SO-vertex-count drawing */
assert(info->indirect == NULL || info->count_from_stream_output == NULL);

View file

@ -219,6 +219,15 @@ void cso_save_constant_buffer_slot0(struct cso_context *cso,
void cso_restore_constant_buffer_slot0(struct cso_context *cso,
enum pipe_shader_type shader_stage);
/* Optimized version. */
void
cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
unsigned velem_count,
const struct pipe_vertex_element *velems,
unsigned vb_count,
unsigned unbind_trailing_vb_count,
const struct pipe_vertex_buffer *vbuffers,
bool uses_user_vertex_buffers);
/* drawing */

View file

@ -373,6 +373,11 @@ void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
}
void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
{
mgr->ve = NULL;
}
void u_vbuf_destroy(struct u_vbuf *mgr)
{
struct pipe_screen *screen = mgr->pipe->screen;

View file

@ -71,6 +71,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr);
/* State and draw functions. */
void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
const struct pipe_vertex_element *states);
void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr);
void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer *bufs);

View file

@ -63,7 +63,8 @@ st_setup_arrays(struct st_context *st,
const struct st_vertex_program *vp,
const struct st_common_variant *vp_variant,
struct pipe_vertex_element *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers,
bool *has_user_vertex_buffers);
void
st_setup_current(struct st_context *st,

View file

@ -364,36 +364,19 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
}
}
static void
set_vertex_attribs(struct st_context *st,
struct pipe_vertex_buffer *vbuffers,
unsigned num_vbuffers,
struct pipe_vertex_element *velements,
unsigned num_velements)
{
struct cso_context *cso = st->cso_context;
cso_set_vertex_buffers(cso, 0, num_vbuffers, vbuffers);
if (st->last_num_vbuffers > num_vbuffers) {
/* Unbind remaining buffers, if any. */
cso_set_vertex_buffers(cso, num_vbuffers,
st->last_num_vbuffers - num_vbuffers, NULL);
}
st->last_num_vbuffers = num_vbuffers;
cso_set_vertex_elements(cso, num_velements, velements);
}
void
st_setup_arrays(struct st_context *st,
const struct st_vertex_program *vp,
const struct st_common_variant *vp_variant,
struct pipe_vertex_element *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers,
bool *has_user_vertex_buffers)
{
struct gl_context *ctx = st->ctx;
const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
const ubyte *input_to_index = vp->input_to_index;
bool uses_user_vertex_buffers = false;
/* Process attribute array data. */
GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
@ -429,6 +412,7 @@ st_setup_arrays(struct st_context *st,
vbuffer[bufidx].is_user_buffer = true;
vbuffer[bufidx].buffer_offset = 0;
uses_user_vertex_buffers = true;
if (!binding->InstanceDivisor)
st->draw_needs_minmax_index = true;
}
@ -451,6 +435,7 @@ st_setup_arrays(struct st_context *st,
input_to_index[attr]);
}
}
*has_user_vertex_buffers = uses_user_vertex_buffers;
}
void
@ -555,12 +540,14 @@ st_update_array(struct st_context *st)
unsigned num_vbuffers = 0, first_upload_vbuffer;
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_velements;
bool uses_user_vertex_buffers;
st->draw_needs_minmax_index = false;
/* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
/* Setup arrays */
st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers);
st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers,
&uses_user_vertex_buffers);
/* _NEW_CURRENT_ATTRIB */
/* Setup current uploads */
@ -569,7 +556,17 @@ st_update_array(struct st_context *st)
/* Set the array into cso */
num_velements = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_velements);
/* Set vertex buffers and elements. */
struct cso_context *cso = st->cso_context;
unsigned unbind_trailing_vbuffers =
st->last_num_vbuffers > num_vbuffers ?
st->last_num_vbuffers - num_vbuffers : 0;
cso_set_vertex_buffers_and_elements(cso, num_velements, velements,
num_vbuffers,
unbind_trailing_vbuffers,
vbuffer, uses_user_vertex_buffers);
st->last_num_vbuffers = num_vbuffers;
/* Unreference uploaded buffer resources. */
for (unsigned i = first_upload_vbuffer; i < num_vbuffers; ++i) {

View file

@ -160,7 +160,9 @@ st_feedback_draw_vbo(struct gl_context *ctx,
/* Must setup these after state validation! */
/* Setup arrays */
st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers);
bool uses_user_vertex_buffers;
st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers,
&uses_user_vertex_buffers);
/* Setup current values as userspace arrays */
st_setup_current_user(st, vp, vp_variant, velements, vbuffers, &num_vbuffers);