mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-05 10:20:36 +02:00
st/mesa: optimize st_update_arrays using lots of C++ template variants
This adds the following template options:
- add an option to fill TC set_vertex_buffers from st_update_array directly
(always true without u_vbuf, so always used with radeonsi)
- add an option saying that there are no zero-stride attribs
- add an option saying that there are no user buffers
(always true with glthread, so always used with radeonsi)
- add an option saying that there is an identity mapping between vertex
buffers and vertex attribs
I have specifically chosen those options because they improve performance.
I also had other options that didn't, like unrolling the setup_arrays loop.
This adds a total of 42 variants of st_update_array_templ for various cases.
Usually only a few of them are used in practice.
Overhead of st_prepare_draw in VP2020/Catia:
Before: 8.5% of CPU used
After: 6.13% of CPU used
That's 2.37% improvement. Since there are 4 threads using the CPU and
the percentage includes all threads in the system, the improvement for
the GL thread is about 8% (roughly 2.17% * 4; each thread at 25% of global
utilization means 100% utilization in 4 cores).
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27731>
This commit is contained in:
parent
045b1cda57
commit
11dbdedf46
2 changed files with 278 additions and 38 deletions
|
|
@ -44,16 +44,38 @@
|
|||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/u_threaded_context.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/glformats.h"
|
||||
#include "main/varray.h"
|
||||
#include "main/arrayobj.h"
|
||||
|
||||
enum st_fill_tc_set_vb {
|
||||
FILL_TC_SET_VB_OFF, /* always works */
|
||||
FILL_TC_SET_VB_ON, /* specialized version (faster) */
|
||||
};
|
||||
|
||||
enum st_use_vao_fast_path {
|
||||
VAO_FAST_PATH_OFF, /* more complicated version (slower) */
|
||||
VAO_FAST_PATH_ON, /* always works (faster) */
|
||||
};
|
||||
|
||||
enum st_allow_zero_stride_attribs {
|
||||
ZERO_STRIDE_ATTRIBS_OFF, /* specialized version (faster) */
|
||||
ZERO_STRIDE_ATTRIBS_ON, /* always works */
|
||||
};
|
||||
|
||||
/* Whether vertex attrib indices are equal to their vertex buffer indices. */
|
||||
enum st_identity_attrib_mapping {
|
||||
IDENTITY_ATTRIB_MAPPING_OFF, /* always works */
|
||||
IDENTITY_ATTRIB_MAPPING_ON, /* specialized version (faster) */
|
||||
};
|
||||
|
||||
enum st_allow_user_buffers {
|
||||
USER_BUFFERS_OFF, /* specialized version (faster) */
|
||||
USER_BUFFERS_ON, /* always works */
|
||||
};
|
||||
|
||||
enum st_update_velems {
|
||||
UPDATE_VELEMS_OFF, /* specialized version (faster) */
|
||||
UPDATE_VELEMS_ON, /* always works */
|
||||
|
|
@ -82,7 +104,11 @@ init_velement(struct pipe_vertex_element *velements,
|
|||
* on the stack.
|
||||
*/
|
||||
template<util_popcnt POPCNT,
|
||||
st_fill_tc_set_vb FILL_TC_SET_VB,
|
||||
st_use_vao_fast_path USE_VAO_FAST_PATH,
|
||||
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
|
||||
st_allow_user_buffers ALLOW_USER_BUFFERS,
|
||||
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
|
||||
setup_arrays(struct gl_context *ctx,
|
||||
const struct gl_vertex_array_object *vao,
|
||||
|
|
@ -95,38 +121,73 @@ setup_arrays(struct gl_context *ctx,
|
|||
/* Set up enabled vertex arrays. */
|
||||
if (USE_VAO_FAST_PATH) {
|
||||
const GLubyte *attribute_map =
|
||||
_mesa_vao_attribute_map[vao->_AttributeMapMode];
|
||||
!HAS_IDENTITY_ATTRIB_MAPPING ?
|
||||
_mesa_vao_attribute_map[vao->_AttributeMapMode] : NULL;
|
||||
struct pipe_context *pipe = ctx->pipe;
|
||||
struct tc_buffer_list *next_buffer_list = NULL;
|
||||
|
||||
if (FILL_TC_SET_VB)
|
||||
next_buffer_list = tc_get_next_buffer_list(pipe);
|
||||
|
||||
/* Note: I did try to unroll this loop by passing the number of
|
||||
* iterations as a template parameter, but it resulted in more overhead.
|
||||
*/
|
||||
while (mask) {
|
||||
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&mask);
|
||||
const struct gl_array_attributes *const attrib =
|
||||
&vao->VertexAttrib[attribute_map[attr]];
|
||||
const struct gl_vertex_buffer_binding *const binding =
|
||||
&vao->BufferBinding[attrib->BufferBindingIndex];
|
||||
const struct gl_array_attributes *attrib;
|
||||
const struct gl_vertex_buffer_binding *binding;
|
||||
|
||||
if (HAS_IDENTITY_ATTRIB_MAPPING) {
|
||||
attrib = &vao->VertexAttrib[attr];
|
||||
binding = &vao->BufferBinding[attr];
|
||||
} else {
|
||||
attrib = &vao->VertexAttrib[attribute_map[attr]];
|
||||
binding = &vao->BufferBinding[attrib->BufferBindingIndex];
|
||||
}
|
||||
const unsigned bufidx = (*num_vbuffers)++;
|
||||
|
||||
/* Set the vertex buffer. */
|
||||
if (binding->BufferObj) {
|
||||
vbuffer[bufidx].buffer.resource =
|
||||
if (!ALLOW_USER_BUFFERS || binding->BufferObj) {
|
||||
assert(binding->BufferObj);
|
||||
struct pipe_resource *buf =
|
||||
_mesa_get_bufferobj_reference(ctx, binding->BufferObj);
|
||||
vbuffer[bufidx].buffer.resource = buf;
|
||||
vbuffer[bufidx].is_user_buffer = false;
|
||||
vbuffer[bufidx].buffer_offset = binding->Offset +
|
||||
attrib->RelativeOffset;
|
||||
if (FILL_TC_SET_VB)
|
||||
tc_track_vertex_buffer(pipe, bufidx, buf, next_buffer_list);
|
||||
} else {
|
||||
vbuffer[bufidx].buffer.user = attrib->Ptr;
|
||||
vbuffer[bufidx].is_user_buffer = true;
|
||||
vbuffer[bufidx].buffer_offset = 0;
|
||||
assert(!FILL_TC_SET_VB);
|
||||
}
|
||||
|
||||
if (!UPDATE_VELEMS)
|
||||
continue;
|
||||
|
||||
/* Determine the vertex element index without popcnt
|
||||
* if !ALLOW_ZERO_STRIDE_ATTRIBS, which means that we don't need
|
||||
* to leave any holes for zero-stride attribs, thus the mapping from
|
||||
* vertex elements to vertex buffers is identity.
|
||||
*/
|
||||
unsigned index;
|
||||
|
||||
if (ALLOW_ZERO_STRIDE_ATTRIBS) {
|
||||
assert(POPCNT != POPCNT_INVALID);
|
||||
index = util_bitcount_fast<POPCNT>(inputs_read &
|
||||
BITFIELD_MASK(attr));
|
||||
} else {
|
||||
index = bufidx;
|
||||
assert(index == util_bitcount(inputs_read &
|
||||
BITFIELD_MASK(attr)));
|
||||
}
|
||||
|
||||
/* Set the vertex element. */
|
||||
init_velement(velements->velems, &attrib->Format, 0,
|
||||
binding->Stride,
|
||||
init_velement(velements->velems, &attrib->Format, 0, binding->Stride,
|
||||
binding->InstanceDivisor, bufidx,
|
||||
dual_slot_inputs & BITFIELD_BIT(attr),
|
||||
util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
|
||||
dual_slot_inputs & BITFIELD_BIT(attr), index);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -136,6 +197,15 @@ setup_arrays(struct gl_context *ctx,
|
|||
*/
|
||||
assert(!ctx->Const.UseVAOFastPath || vao->SharedAndImmutable);
|
||||
|
||||
/* Require these because we don't use them here and we don't want to
|
||||
* generate identical template variants.
|
||||
*/
|
||||
assert(!FILL_TC_SET_VB);
|
||||
assert(ALLOW_ZERO_STRIDE_ATTRIBS);
|
||||
assert(!HAS_IDENTITY_ATTRIB_MAPPING);
|
||||
assert(ALLOW_USER_BUFFERS);
|
||||
assert(UPDATE_VELEMS);
|
||||
|
||||
while (mask) {
|
||||
/* The attribute index to start pulling a binding */
|
||||
const gl_vert_attrib i = (gl_vert_attrib)(ffs(mask) - 1);
|
||||
|
|
@ -164,8 +234,6 @@ setup_arrays(struct gl_context *ctx,
|
|||
/* We can assume that we have array for the binding */
|
||||
assert(attrmask);
|
||||
|
||||
if (!UPDATE_VELEMS)
|
||||
continue;
|
||||
|
||||
/* Walk attributes belonging to the binding */
|
||||
do {
|
||||
|
|
@ -173,10 +241,13 @@ setup_arrays(struct gl_context *ctx,
|
|||
const struct gl_array_attributes *const attrib
|
||||
= _mesa_draw_array_attrib(vao, attr);
|
||||
const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
|
||||
assert(POPCNT != POPCNT_INVALID);
|
||||
|
||||
init_velement(velements->velems, &attrib->Format, off,
|
||||
binding->Stride, binding->InstanceDivisor, bufidx,
|
||||
dual_slot_inputs & BITFIELD_BIT(attr),
|
||||
util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
|
||||
util_bitcount_fast<POPCNT>(inputs_read &
|
||||
BITFIELD_MASK(attr)));
|
||||
} while (attrmask);
|
||||
}
|
||||
}
|
||||
|
|
@ -192,7 +263,9 @@ st_setup_arrays(struct st_context *st,
|
|||
struct gl_context *ctx = st->ctx;
|
||||
GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
|
||||
|
||||
setup_arrays<POPCNT_NO, VAO_FAST_PATH_ON, UPDATE_VELEMS_ON>
|
||||
setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_ON,
|
||||
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
|
||||
(ctx, ctx->Array._DrawVAO, vp->Base.DualSlotInputs,
|
||||
vp_variant->vert_attrib_mask,
|
||||
vp_variant->vert_attrib_mask & enabled_arrays,
|
||||
|
|
@ -205,7 +278,9 @@ st_setup_arrays(struct st_context *st,
|
|||
* Return the index of the vertex buffer where current attribs have been
|
||||
* uploaded.
|
||||
*/
|
||||
template<util_popcnt POPCNT, st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
|
||||
template<util_popcnt POPCNT,
|
||||
st_fill_tc_set_vb FILL_TC_SET_VB,
|
||||
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
|
||||
st_setup_current(struct st_context *st,
|
||||
const GLbitfield dual_slot_inputs,
|
||||
const GLbitfield inputs_read,
|
||||
|
|
@ -216,6 +291,7 @@ st_setup_current(struct st_context *st,
|
|||
/* Process values that should have better been uniforms in the application */
|
||||
if (curmask) {
|
||||
struct gl_context *ctx = st->ctx;
|
||||
assert(POPCNT != POPCNT_INVALID);
|
||||
unsigned num_attribs = util_bitcount_fast<POPCNT>(curmask);
|
||||
unsigned num_dual_attribs = util_bitcount_fast<POPCNT>(curmask &
|
||||
dual_slot_inputs);
|
||||
|
|
@ -245,6 +321,12 @@ st_setup_current(struct st_context *st,
|
|||
&vbuffer[bufidx].buffer.resource, (void**)&ptr);
|
||||
uint8_t *cursor = ptr;
|
||||
|
||||
if (FILL_TC_SET_VB) {
|
||||
struct pipe_context *pipe = ctx->pipe;
|
||||
tc_track_vertex_buffer(pipe, bufidx, vbuffer[bufidx].buffer.resource,
|
||||
tc_get_next_buffer_list(pipe));
|
||||
}
|
||||
|
||||
do {
|
||||
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
|
||||
const struct gl_array_attributes *const attrib
|
||||
|
|
@ -264,7 +346,8 @@ st_setup_current(struct st_context *st,
|
|||
if (UPDATE_VELEMS) {
|
||||
init_velement(velements->velems, &attrib->Format, cursor - ptr,
|
||||
0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
|
||||
util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
|
||||
util_bitcount_fast<POPCNT>(inputs_read &
|
||||
BITFIELD_MASK(attr)));
|
||||
}
|
||||
|
||||
cursor += size;
|
||||
|
|
@ -308,7 +391,11 @@ st_setup_current_user(struct st_context *st,
|
|||
}
|
||||
|
||||
template<util_popcnt POPCNT,
|
||||
st_fill_tc_set_vb FILL_TC_SET_VB,
|
||||
st_use_vao_fast_path USE_VAO_FAST_PATH,
|
||||
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
|
||||
st_allow_user_buffers ALLOW_USER_BUFFERS,
|
||||
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
|
||||
st_update_array_templ(struct st_context *st,
|
||||
const GLbitfield enabled_arrays,
|
||||
|
|
@ -324,47 +411,174 @@ st_update_array_templ(struct st_context *st,
|
|||
const struct st_common_variant *vp_variant = st->vp_variant;
|
||||
const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
|
||||
const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
|
||||
const GLbitfield userbuf_arrays = inputs_read & enabled_user_arrays;
|
||||
const GLbitfield userbuf_arrays =
|
||||
ALLOW_USER_BUFFERS ? inputs_read & enabled_user_arrays : 0;
|
||||
bool uses_user_vertex_buffers = userbuf_arrays != 0;
|
||||
|
||||
st->draw_needs_minmax_index =
|
||||
(userbuf_arrays & ~nonzero_divisor_arrays) != 0;
|
||||
|
||||
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vbuffers = 0;
|
||||
struct pipe_vertex_buffer vbuffer_local[PIPE_MAX_ATTRIBS];
|
||||
struct pipe_vertex_buffer *vbuffer;
|
||||
unsigned num_vbuffers = 0, num_vbuffers_tc;
|
||||
struct cso_velems_state velements;
|
||||
|
||||
if (FILL_TC_SET_VB) {
|
||||
assert(!uses_user_vertex_buffers);
|
||||
assert(POPCNT != POPCNT_INVALID);
|
||||
num_vbuffers_tc = util_bitcount_fast<POPCNT>(inputs_read &
|
||||
enabled_arrays);
|
||||
|
||||
/* Add up to 1 vertex buffer for zero-stride vertex attribs. */
|
||||
num_vbuffers_tc += ALLOW_ZERO_STRIDE_ATTRIBS &&
|
||||
inputs_read & ~enabled_arrays;
|
||||
vbuffer = tc_add_set_vertex_buffers_call(st->pipe, num_vbuffers_tc);
|
||||
} else {
|
||||
vbuffer = vbuffer_local;
|
||||
}
|
||||
|
||||
/* ST_NEW_VERTEX_ARRAYS */
|
||||
/* Setup arrays */
|
||||
setup_arrays<POPCNT, USE_VAO_FAST_PATH, UPDATE_VELEMS>
|
||||
setup_arrays<POPCNT, FILL_TC_SET_VB, USE_VAO_FAST_PATH,
|
||||
ALLOW_ZERO_STRIDE_ATTRIBS, HAS_IDENTITY_ATTRIB_MAPPING,
|
||||
ALLOW_USER_BUFFERS, UPDATE_VELEMS>
|
||||
(ctx, ctx->Array._DrawVAO, dual_slot_inputs, inputs_read,
|
||||
inputs_read & enabled_arrays, &velements, vbuffer, &num_vbuffers);
|
||||
|
||||
/* _NEW_CURRENT_ATTRIB */
|
||||
/* Setup zero-stride attribs. */
|
||||
st_setup_current<POPCNT, UPDATE_VELEMS>
|
||||
(st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
|
||||
&velements, vbuffer, &num_vbuffers);
|
||||
if (ALLOW_ZERO_STRIDE_ATTRIBS) {
|
||||
st_setup_current<POPCNT, FILL_TC_SET_VB, UPDATE_VELEMS>
|
||||
(st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
|
||||
&velements, vbuffer, &num_vbuffers);
|
||||
} else {
|
||||
assert(!(inputs_read & ~enabled_arrays));
|
||||
}
|
||||
|
||||
struct cso_context *cso = st->cso_context;
|
||||
if (FILL_TC_SET_VB)
|
||||
assert(num_vbuffers == num_vbuffers_tc);
|
||||
|
||||
if (UPDATE_VELEMS) {
|
||||
struct cso_context *cso = st->cso_context;
|
||||
velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
|
||||
|
||||
/* Set vertex buffers and elements. */
|
||||
cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
|
||||
uses_user_vertex_buffers, vbuffer);
|
||||
if (FILL_TC_SET_VB) {
|
||||
cso_set_vertex_elements(cso, &velements);
|
||||
} else {
|
||||
cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
|
||||
uses_user_vertex_buffers, vbuffer);
|
||||
}
|
||||
/* The driver should clear this after it has processed the update. */
|
||||
ctx->Array.NewVertexElements = false;
|
||||
st->uses_user_vertex_buffers = uses_user_vertex_buffers;
|
||||
} else {
|
||||
/* Only vertex buffers. */
|
||||
cso_set_vertex_buffers(cso, num_vbuffers, true, vbuffer);
|
||||
if (!FILL_TC_SET_VB)
|
||||
cso_set_vertex_buffers(st->cso_context, num_vbuffers, true, vbuffer);
|
||||
|
||||
/* This can change only when we update vertex elements. */
|
||||
assert(st->uses_user_vertex_buffers == uses_user_vertex_buffers);
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*update_array_func)(struct st_context *st,
|
||||
const GLbitfield enabled_arrays,
|
||||
const GLbitfield enabled_user_attribs,
|
||||
const GLbitfield nonzero_divisor_attribs);
|
||||
|
||||
/* This just initializes the table of all st_update_array variants. */
|
||||
struct st_update_array_table {
|
||||
update_array_func funcs[2][2][2][2][2][2];
|
||||
|
||||
template<util_popcnt POPCNT,
|
||||
st_fill_tc_set_vb FILL_TC_SET_VB,
|
||||
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
|
||||
st_allow_user_buffers ALLOW_USER_BUFFERS,
|
||||
st_update_velems UPDATE_VELEMS>
|
||||
void init_one()
|
||||
{
|
||||
/* These conditions reduce the number of compiled variants. */
|
||||
/* The TC path is only valid without user buffers.
|
||||
*/
|
||||
constexpr st_fill_tc_set_vb fill_tc_set_vb =
|
||||
!ALLOW_USER_BUFFERS ? FILL_TC_SET_VB : FILL_TC_SET_VB_OFF;
|
||||
|
||||
/* POPCNT is unused without zero-stride attribs and without TC. */
|
||||
constexpr util_popcnt popcnt =
|
||||
!ALLOW_ZERO_STRIDE_ATTRIBS && !fill_tc_set_vb ?
|
||||
POPCNT_INVALID : POPCNT;
|
||||
|
||||
funcs[POPCNT][FILL_TC_SET_VB][ALLOW_ZERO_STRIDE_ATTRIBS]
|
||||
[HAS_IDENTITY_ATTRIB_MAPPING][ALLOW_USER_BUFFERS][UPDATE_VELEMS] =
|
||||
st_update_array_templ<
|
||||
popcnt,
|
||||
fill_tc_set_vb,
|
||||
VAO_FAST_PATH_ON,
|
||||
ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
HAS_IDENTITY_ATTRIB_MAPPING,
|
||||
ALLOW_USER_BUFFERS,
|
||||
UPDATE_VELEMS>;
|
||||
}
|
||||
|
||||
/* We have to do this in stages because of the combinatorial explosion of
|
||||
* variants.
|
||||
*/
|
||||
template<util_popcnt POPCNT,
|
||||
st_fill_tc_set_vb FILL_TC_SET_VB,
|
||||
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS>
|
||||
void init_last_3_args()
|
||||
{
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_OFF, USER_BUFFERS_OFF,
|
||||
UPDATE_VELEMS_OFF>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_OFF,
|
||||
USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_OFF,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_OFF,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_ON,
|
||||
USER_BUFFERS_OFF, UPDATE_VELEMS_OFF>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_ON,
|
||||
USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_ON,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
|
||||
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
|
||||
IDENTITY_ATTRIB_MAPPING_ON,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
|
||||
}
|
||||
|
||||
st_update_array_table()
|
||||
{
|
||||
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
|
||||
ZERO_STRIDE_ATTRIBS_OFF>();
|
||||
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
|
||||
ZERO_STRIDE_ATTRIBS_ON>();
|
||||
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
|
||||
ZERO_STRIDE_ATTRIBS_OFF>();
|
||||
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
|
||||
ZERO_STRIDE_ATTRIBS_ON>();
|
||||
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
|
||||
ZERO_STRIDE_ATTRIBS_OFF>();
|
||||
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
|
||||
ZERO_STRIDE_ATTRIBS_ON>();
|
||||
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
|
||||
ZERO_STRIDE_ATTRIBS_OFF>();
|
||||
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
|
||||
ZERO_STRIDE_ATTRIBS_ON>();
|
||||
}
|
||||
};
|
||||
|
||||
static st_update_array_table update_array_table;
|
||||
|
||||
template<util_popcnt POPCNT,
|
||||
st_use_vao_fast_path USE_VAO_FAST_PATH> void ALWAYS_INLINE
|
||||
st_update_array_impl(struct st_context *st)
|
||||
|
|
@ -384,19 +598,42 @@ st_update_array_impl(struct st_context *st)
|
|||
_mesa_get_derived_vao_masks(ctx, enabled_arrays, &enabled_user_arrays,
|
||||
&nonzero_divisor_arrays);
|
||||
|
||||
/* Execute the slow path without using multiple C++ template variants. */
|
||||
if (!USE_VAO_FAST_PATH) {
|
||||
st_update_array_templ<POPCNT, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
|
||||
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
|
||||
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
|
||||
return;
|
||||
}
|
||||
|
||||
/* The fast path that selects from multiple C++ template variants. */
|
||||
const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
|
||||
const GLbitfield enabled_arrays_read = inputs_read & enabled_arrays;
|
||||
|
||||
/* Check cso_context whether it goes directly to TC. */
|
||||
bool fill_tc_set_vbs = st->cso_context->draw_vbo == tc_draw_vbo;
|
||||
bool has_zero_stride_attribs = inputs_read & ~enabled_arrays;
|
||||
uint32_t non_identity_attrib_mapping =
|
||||
vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_IDENTITY ? 0 :
|
||||
vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_POSITION ? VERT_BIT_GENERIC0
|
||||
: VERT_BIT_POS;
|
||||
bool has_identity_mapping = !(enabled_arrays_read &
|
||||
(vao->NonIdentityBufferAttribMapping |
|
||||
non_identity_attrib_mapping));
|
||||
/* has_user_buffers is always false with glthread. */
|
||||
bool has_user_buffers = inputs_read & enabled_user_arrays;
|
||||
/* Changing from user to non-user buffers and vice versa can switch between
|
||||
* cso and u_vbuf, which means that we need to update vertex elements even
|
||||
* when they have not changed.
|
||||
*/
|
||||
if (ctx->Array.NewVertexElements ||
|
||||
st->uses_user_vertex_buffers !=
|
||||
!!(st->vp_variant->vert_attrib_mask & enabled_user_arrays)) {
|
||||
st_update_array_templ<POPCNT, USE_VAO_FAST_PATH, UPDATE_VELEMS_ON>
|
||||
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
|
||||
} else {
|
||||
st_update_array_templ<POPCNT, USE_VAO_FAST_PATH, UPDATE_VELEMS_OFF>
|
||||
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
|
||||
}
|
||||
bool update_velems = ctx->Array.NewVertexElements ||
|
||||
st->uses_user_vertex_buffers != has_user_buffers;
|
||||
|
||||
update_array_table.funcs[POPCNT][fill_tc_set_vbs][has_zero_stride_attribs]
|
||||
[has_identity_mapping][has_user_buffers]
|
||||
[update_velems]
|
||||
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
|
||||
}
|
||||
|
||||
/* The default callback that must be present before st_init_update_array
|
||||
|
|
@ -442,7 +679,9 @@ st_create_gallium_vertex_state(struct gl_context *ctx,
|
|||
/* This should use the slow path because there is only 1 interleaved
|
||||
* vertex buffers.
|
||||
*/
|
||||
setup_arrays<POPCNT_NO, VAO_FAST_PATH_OFF, UPDATE_VELEMS_ON>
|
||||
setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
|
||||
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
|
||||
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
|
||||
(ctx, vao, dual_slot_inputs, inputs_read, inputs_read, &velements,
|
||||
vbuffer, &num_vbuffers);
|
||||
|
||||
|
|
|
|||
|
|
@ -399,6 +399,7 @@ util_widen_mask(uint32_t mask, unsigned multiplier)
|
|||
enum util_popcnt {
|
||||
POPCNT_NO,
|
||||
POPCNT_YES,
|
||||
POPCNT_INVALID,
|
||||
};
|
||||
|
||||
/* Convenient function to select popcnt through a C++ template argument.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue