st/mesa: optimize st_update_arrays using lots of C++ template variants

This adds the following template options:
- add an option to fill TC set_vertex_buffers from st_update_array directly
  (always true without u_vbuf, so always used with radeonsi)
- add an option saying that there are no zero-stride attribs
- add an option saying that there are no user buffers
  (always true with glthread, so always used with radeonsi)
- add an option saying that there is an identity mapping between vertex
  buffers and vertex attribs

I have specifically chosen those options because they improve performance.
I also had other options that didn't, like unrolling the setup_arrays loop.

This adds a total of 42 variants of st_update_array_templ for various cases.
Usually only a few of them are used in practice.

Overhead of st_prepare_draw in VP2020/Catia:
    Before: 8.5% of CPU used
    After: 6.13% of CPU used

That's 2.37% improvement. Since there are 4 threads using the CPU and
the percentage includes all threads in the system, the improvement for
the GL thread is about 8% (roughly 2.17% * 4; each thread at 25% of global
utilization means 100% utilization in 4 cores).

Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27731>
This commit is contained in:
Marek Olšák 2024-01-04 12:44:12 -05:00 committed by Marge Bot
parent 045b1cda57
commit 11dbdedf46
2 changed files with 278 additions and 38 deletions

View file

@ -44,16 +44,38 @@
#include "util/u_cpu_detect.h"
#include "util/u_math.h"
#include "util/u_upload_mgr.h"
#include "util/u_threaded_context.h"
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
#include "main/arrayobj.h"
enum st_fill_tc_set_vb {
FILL_TC_SET_VB_OFF, /* always works */
FILL_TC_SET_VB_ON, /* specialized version (faster) */
};
enum st_use_vao_fast_path {
VAO_FAST_PATH_OFF, /* more complicated version (slower) */
VAO_FAST_PATH_ON, /* always works (faster) */
};
enum st_allow_zero_stride_attribs {
ZERO_STRIDE_ATTRIBS_OFF, /* specialized version (faster) */
ZERO_STRIDE_ATTRIBS_ON, /* always works */
};
/* Whether vertex attrib indices are equal to their vertex buffer indices. */
enum st_identity_attrib_mapping {
IDENTITY_ATTRIB_MAPPING_OFF, /* always works */
IDENTITY_ATTRIB_MAPPING_ON, /* specialized version (faster) */
};
enum st_allow_user_buffers {
USER_BUFFERS_OFF, /* specialized version (faster) */
USER_BUFFERS_ON, /* always works */
};
enum st_update_velems {
UPDATE_VELEMS_OFF, /* specialized version (faster) */
UPDATE_VELEMS_ON, /* always works */
@ -82,7 +104,11 @@ init_velement(struct pipe_vertex_element *velements,
* on the stack.
*/
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_use_vao_fast_path USE_VAO_FAST_PATH,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
st_allow_user_buffers ALLOW_USER_BUFFERS,
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
setup_arrays(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
@ -95,38 +121,73 @@ setup_arrays(struct gl_context *ctx,
/* Set up enabled vertex arrays. */
if (USE_VAO_FAST_PATH) {
const GLubyte *attribute_map =
_mesa_vao_attribute_map[vao->_AttributeMapMode];
!HAS_IDENTITY_ATTRIB_MAPPING ?
_mesa_vao_attribute_map[vao->_AttributeMapMode] : NULL;
struct pipe_context *pipe = ctx->pipe;
struct tc_buffer_list *next_buffer_list = NULL;
if (FILL_TC_SET_VB)
next_buffer_list = tc_get_next_buffer_list(pipe);
/* Note: I did try to unroll this loop by passing the number of
* iterations as a template parameter, but it resulted in more overhead.
*/
while (mask) {
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&mask);
const struct gl_array_attributes *const attrib =
&vao->VertexAttrib[attribute_map[attr]];
const struct gl_vertex_buffer_binding *const binding =
&vao->BufferBinding[attrib->BufferBindingIndex];
const struct gl_array_attributes *attrib;
const struct gl_vertex_buffer_binding *binding;
if (HAS_IDENTITY_ATTRIB_MAPPING) {
attrib = &vao->VertexAttrib[attr];
binding = &vao->BufferBinding[attr];
} else {
attrib = &vao->VertexAttrib[attribute_map[attr]];
binding = &vao->BufferBinding[attrib->BufferBindingIndex];
}
const unsigned bufidx = (*num_vbuffers)++;
/* Set the vertex buffer. */
if (binding->BufferObj) {
vbuffer[bufidx].buffer.resource =
if (!ALLOW_USER_BUFFERS || binding->BufferObj) {
assert(binding->BufferObj);
struct pipe_resource *buf =
_mesa_get_bufferobj_reference(ctx, binding->BufferObj);
vbuffer[bufidx].buffer.resource = buf;
vbuffer[bufidx].is_user_buffer = false;
vbuffer[bufidx].buffer_offset = binding->Offset +
attrib->RelativeOffset;
if (FILL_TC_SET_VB)
tc_track_vertex_buffer(pipe, bufidx, buf, next_buffer_list);
} else {
vbuffer[bufidx].buffer.user = attrib->Ptr;
vbuffer[bufidx].is_user_buffer = true;
vbuffer[bufidx].buffer_offset = 0;
assert(!FILL_TC_SET_VB);
}
if (!UPDATE_VELEMS)
continue;
/* Determine the vertex element index without popcnt
* if !ALLOW_ZERO_STRIDE_ATTRIBS, which means that we don't need
* to leave any holes for zero-stride attribs, thus the mapping from
* vertex elements to vertex buffers is identity.
*/
unsigned index;
if (ALLOW_ZERO_STRIDE_ATTRIBS) {
assert(POPCNT != POPCNT_INVALID);
index = util_bitcount_fast<POPCNT>(inputs_read &
BITFIELD_MASK(attr));
} else {
index = bufidx;
assert(index == util_bitcount(inputs_read &
BITFIELD_MASK(attr)));
}
/* Set the vertex element. */
init_velement(velements->velems, &attrib->Format, 0,
binding->Stride,
init_velement(velements->velems, &attrib->Format, 0, binding->Stride,
binding->InstanceDivisor, bufidx,
dual_slot_inputs & BITFIELD_BIT(attr),
util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
dual_slot_inputs & BITFIELD_BIT(attr), index);
}
return;
}
@ -136,6 +197,15 @@ setup_arrays(struct gl_context *ctx,
*/
assert(!ctx->Const.UseVAOFastPath || vao->SharedAndImmutable);
/* Require these because we don't use them here and we don't want to
* generate identical template variants.
*/
assert(!FILL_TC_SET_VB);
assert(ALLOW_ZERO_STRIDE_ATTRIBS);
assert(!HAS_IDENTITY_ATTRIB_MAPPING);
assert(ALLOW_USER_BUFFERS);
assert(UPDATE_VELEMS);
while (mask) {
/* The attribute index to start pulling a binding */
const gl_vert_attrib i = (gl_vert_attrib)(ffs(mask) - 1);
@ -164,8 +234,6 @@ setup_arrays(struct gl_context *ctx,
/* We can assume that we have array for the binding */
assert(attrmask);
if (!UPDATE_VELEMS)
continue;
/* Walk attributes belonging to the binding */
do {
@ -173,10 +241,13 @@ setup_arrays(struct gl_context *ctx,
const struct gl_array_attributes *const attrib
= _mesa_draw_array_attrib(vao, attr);
const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
assert(POPCNT != POPCNT_INVALID);
init_velement(velements->velems, &attrib->Format, off,
binding->Stride, binding->InstanceDivisor, bufidx,
dual_slot_inputs & BITFIELD_BIT(attr),
util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
util_bitcount_fast<POPCNT>(inputs_read &
BITFIELD_MASK(attr)));
} while (attrmask);
}
}
@ -192,7 +263,9 @@ st_setup_arrays(struct st_context *st,
struct gl_context *ctx = st->ctx;
GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
setup_arrays<POPCNT_NO, VAO_FAST_PATH_ON, UPDATE_VELEMS_ON>
setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_ON,
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
(ctx, ctx->Array._DrawVAO, vp->Base.DualSlotInputs,
vp_variant->vert_attrib_mask,
vp_variant->vert_attrib_mask & enabled_arrays,
@ -205,7 +278,9 @@ st_setup_arrays(struct st_context *st,
* Return the index of the vertex buffer where current attribs have been
* uploaded.
*/
template<util_popcnt POPCNT, st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_setup_current(struct st_context *st,
const GLbitfield dual_slot_inputs,
const GLbitfield inputs_read,
@ -216,6 +291,7 @@ st_setup_current(struct st_context *st,
/* Process values that should have better been uniforms in the application */
if (curmask) {
struct gl_context *ctx = st->ctx;
assert(POPCNT != POPCNT_INVALID);
unsigned num_attribs = util_bitcount_fast<POPCNT>(curmask);
unsigned num_dual_attribs = util_bitcount_fast<POPCNT>(curmask &
dual_slot_inputs);
@ -245,6 +321,12 @@ st_setup_current(struct st_context *st,
&vbuffer[bufidx].buffer.resource, (void**)&ptr);
uint8_t *cursor = ptr;
if (FILL_TC_SET_VB) {
struct pipe_context *pipe = ctx->pipe;
tc_track_vertex_buffer(pipe, bufidx, vbuffer[bufidx].buffer.resource,
tc_get_next_buffer_list(pipe));
}
do {
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
const struct gl_array_attributes *const attrib
@ -264,7 +346,8 @@ st_setup_current(struct st_context *st,
if (UPDATE_VELEMS) {
init_velement(velements->velems, &attrib->Format, cursor - ptr,
0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
util_bitcount_fast<POPCNT>(inputs_read &
BITFIELD_MASK(attr)));
}
cursor += size;
@ -308,7 +391,11 @@ st_setup_current_user(struct st_context *st,
}
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_use_vao_fast_path USE_VAO_FAST_PATH,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
st_allow_user_buffers ALLOW_USER_BUFFERS,
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_update_array_templ(struct st_context *st,
const GLbitfield enabled_arrays,
@ -324,47 +411,174 @@ st_update_array_templ(struct st_context *st,
const struct st_common_variant *vp_variant = st->vp_variant;
const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
const GLbitfield userbuf_arrays = inputs_read & enabled_user_arrays;
const GLbitfield userbuf_arrays =
ALLOW_USER_BUFFERS ? inputs_read & enabled_user_arrays : 0;
bool uses_user_vertex_buffers = userbuf_arrays != 0;
st->draw_needs_minmax_index =
(userbuf_arrays & ~nonzero_divisor_arrays) != 0;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
struct pipe_vertex_buffer vbuffer_local[PIPE_MAX_ATTRIBS];
struct pipe_vertex_buffer *vbuffer;
unsigned num_vbuffers = 0, num_vbuffers_tc;
struct cso_velems_state velements;
if (FILL_TC_SET_VB) {
assert(!uses_user_vertex_buffers);
assert(POPCNT != POPCNT_INVALID);
num_vbuffers_tc = util_bitcount_fast<POPCNT>(inputs_read &
enabled_arrays);
/* Add up to 1 vertex buffer for zero-stride vertex attribs. */
num_vbuffers_tc += ALLOW_ZERO_STRIDE_ATTRIBS &&
inputs_read & ~enabled_arrays;
vbuffer = tc_add_set_vertex_buffers_call(st->pipe, num_vbuffers_tc);
} else {
vbuffer = vbuffer_local;
}
/* ST_NEW_VERTEX_ARRAYS */
/* Setup arrays */
setup_arrays<POPCNT, USE_VAO_FAST_PATH, UPDATE_VELEMS>
setup_arrays<POPCNT, FILL_TC_SET_VB, USE_VAO_FAST_PATH,
ALLOW_ZERO_STRIDE_ATTRIBS, HAS_IDENTITY_ATTRIB_MAPPING,
ALLOW_USER_BUFFERS, UPDATE_VELEMS>
(ctx, ctx->Array._DrawVAO, dual_slot_inputs, inputs_read,
inputs_read & enabled_arrays, &velements, vbuffer, &num_vbuffers);
/* _NEW_CURRENT_ATTRIB */
/* Setup zero-stride attribs. */
st_setup_current<POPCNT, UPDATE_VELEMS>
(st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
&velements, vbuffer, &num_vbuffers);
if (ALLOW_ZERO_STRIDE_ATTRIBS) {
st_setup_current<POPCNT, FILL_TC_SET_VB, UPDATE_VELEMS>
(st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
&velements, vbuffer, &num_vbuffers);
} else {
assert(!(inputs_read & ~enabled_arrays));
}
struct cso_context *cso = st->cso_context;
if (FILL_TC_SET_VB)
assert(num_vbuffers == num_vbuffers_tc);
if (UPDATE_VELEMS) {
struct cso_context *cso = st->cso_context;
velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
/* Set vertex buffers and elements. */
cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
uses_user_vertex_buffers, vbuffer);
if (FILL_TC_SET_VB) {
cso_set_vertex_elements(cso, &velements);
} else {
cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
uses_user_vertex_buffers, vbuffer);
}
/* The driver should clear this after it has processed the update. */
ctx->Array.NewVertexElements = false;
st->uses_user_vertex_buffers = uses_user_vertex_buffers;
} else {
/* Only vertex buffers. */
cso_set_vertex_buffers(cso, num_vbuffers, true, vbuffer);
if (!FILL_TC_SET_VB)
cso_set_vertex_buffers(st->cso_context, num_vbuffers, true, vbuffer);
/* This can change only when we update vertex elements. */
assert(st->uses_user_vertex_buffers == uses_user_vertex_buffers);
}
}
typedef void (*update_array_func)(struct st_context *st,
const GLbitfield enabled_arrays,
const GLbitfield enabled_user_attribs,
const GLbitfield nonzero_divisor_attribs);
/* This just initializes the table of all st_update_array variants. */
struct st_update_array_table {
update_array_func funcs[2][2][2][2][2][2];
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
st_allow_user_buffers ALLOW_USER_BUFFERS,
st_update_velems UPDATE_VELEMS>
void init_one()
{
/* These conditions reduce the number of compiled variants. */
/* The TC path is only valid without user buffers.
*/
constexpr st_fill_tc_set_vb fill_tc_set_vb =
!ALLOW_USER_BUFFERS ? FILL_TC_SET_VB : FILL_TC_SET_VB_OFF;
/* POPCNT is unused without zero-stride attribs and without TC. */
constexpr util_popcnt popcnt =
!ALLOW_ZERO_STRIDE_ATTRIBS && !fill_tc_set_vb ?
POPCNT_INVALID : POPCNT;
funcs[POPCNT][FILL_TC_SET_VB][ALLOW_ZERO_STRIDE_ATTRIBS]
[HAS_IDENTITY_ATTRIB_MAPPING][ALLOW_USER_BUFFERS][UPDATE_VELEMS] =
st_update_array_templ<
popcnt,
fill_tc_set_vb,
VAO_FAST_PATH_ON,
ALLOW_ZERO_STRIDE_ATTRIBS,
HAS_IDENTITY_ATTRIB_MAPPING,
ALLOW_USER_BUFFERS,
UPDATE_VELEMS>;
}
/* We have to do this in stages because of the combinatorial explosion of
* variants.
*/
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS>
void init_last_3_args()
{
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF, USER_BUFFERS_OFF,
UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_OFF, UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
}
st_update_array_table()
{
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_ON>();
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_ON>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_ON>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_ON>();
}
};
static st_update_array_table update_array_table;
template<util_popcnt POPCNT,
st_use_vao_fast_path USE_VAO_FAST_PATH> void ALWAYS_INLINE
st_update_array_impl(struct st_context *st)
@ -384,19 +598,42 @@ st_update_array_impl(struct st_context *st)
_mesa_get_derived_vao_masks(ctx, enabled_arrays, &enabled_user_arrays,
&nonzero_divisor_arrays);
/* Execute the slow path without using multiple C++ template variants. */
if (!USE_VAO_FAST_PATH) {
st_update_array_templ<POPCNT, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
return;
}
/* The fast path that selects from multiple C++ template variants. */
const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
const GLbitfield enabled_arrays_read = inputs_read & enabled_arrays;
/* Check cso_context whether it goes directly to TC. */
bool fill_tc_set_vbs = st->cso_context->draw_vbo == tc_draw_vbo;
bool has_zero_stride_attribs = inputs_read & ~enabled_arrays;
uint32_t non_identity_attrib_mapping =
vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_IDENTITY ? 0 :
vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_POSITION ? VERT_BIT_GENERIC0
: VERT_BIT_POS;
bool has_identity_mapping = !(enabled_arrays_read &
(vao->NonIdentityBufferAttribMapping |
non_identity_attrib_mapping));
/* has_user_buffers is always false with glthread. */
bool has_user_buffers = inputs_read & enabled_user_arrays;
/* Changing from user to non-user buffers and vice versa can switch between
* cso and u_vbuf, which means that we need to update vertex elements even
* when they have not changed.
*/
if (ctx->Array.NewVertexElements ||
st->uses_user_vertex_buffers !=
!!(st->vp_variant->vert_attrib_mask & enabled_user_arrays)) {
st_update_array_templ<POPCNT, USE_VAO_FAST_PATH, UPDATE_VELEMS_ON>
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
} else {
st_update_array_templ<POPCNT, USE_VAO_FAST_PATH, UPDATE_VELEMS_OFF>
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
}
bool update_velems = ctx->Array.NewVertexElements ||
st->uses_user_vertex_buffers != has_user_buffers;
update_array_table.funcs[POPCNT][fill_tc_set_vbs][has_zero_stride_attribs]
[has_identity_mapping][has_user_buffers]
[update_velems]
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
}
/* The default callback that must be present before st_init_update_array
@ -442,7 +679,9 @@ st_create_gallium_vertex_state(struct gl_context *ctx,
/* This should use the slow path because there is only 1 interleaved
* vertex buffers.
*/
setup_arrays<POPCNT_NO, VAO_FAST_PATH_OFF, UPDATE_VELEMS_ON>
setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
(ctx, vao, dual_slot_inputs, inputs_read, inputs_read, &velements,
vbuffer, &num_vbuffers);

View file

@ -399,6 +399,7 @@ util_widen_mask(uint32_t mask, unsigned multiplier)
enum util_popcnt {
POPCNT_NO,
POPCNT_YES,
POPCNT_INVALID,
};
/* Convenient function to select popcnt through a C++ template argument.