nvc0: improve vertex state validation

Now updating vertex attribute format only when necessary.
This commit is contained in:
Christoph Bumiller 2012-03-16 17:37:32 +01:00
parent 784f49e696
commit edbfeed56f
6 changed files with 157 additions and 95 deletions

View file

@ -1307,6 +1307,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE 0x00003800
#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820
#define NVC0_3D_BLEND_ENABLES 0x00003858

View file

@ -84,6 +84,8 @@ struct nvc0_context {
boolean prim_restart;
uint32_t instance_elts; /* bitmask of per-instance elements */
uint32_t instance_base;
uint32_t constant_vbos;
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
uint8_t num_vtxbufs;
@ -115,6 +117,7 @@ struct nvc0_context {
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
struct pipe_index_buffer idxbuf;
uint32_t constant_vbos;
uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
@ -240,7 +243,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
void
nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
void nvc0_vertex_arrays_validate(struct nvc0_context *);
void nvc0_idxbuf_validate(struct nvc0_context *);

View file

@ -8,6 +8,20 @@
* bra(n)z annul: no delay slot
*/
/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[].
* Args: size, bitfield
*/
static const uint32_t nvc0_9097_per_instance_bf[] =
{
0x00000301, /* parm $r3 (the bitfield) */
0x00000211, /* mov $r2 0 */
0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */
0xffffc911, /* mov $r1 (add $r1 -0x1) */
0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */
0xffff8897, /* exit branz $r1 0x3 */
0x00005211 /* mov $r2 (add $r2 0x1) */
};
/* The comments above the macros describe what they *should* be doing,
* but we use less functionality for now.
*/

View file

@ -623,6 +623,7 @@ nvc0_screen_create(struct nouveau_device *dev)
#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
i = 0;
MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf);
MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);

View file

@ -746,19 +746,44 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
const struct pipe_vertex_buffer *vb)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
uint32_t constant_vbos = 0;
unsigned i;
for (i = 0; i < count; ++i)
pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
for (; i < nvc0->num_vtxbufs; ++i)
pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
if (count != nvc0->num_vtxbufs) {
for (i = 0; i < count; ++i) {
pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
nvc0->vtxbuf[i].stride = vb[i].stride;
if (!vb[i].stride)
constant_vbos |= 1 << i;
}
for (; i < nvc0->num_vtxbufs; ++i)
pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
nvc0->num_vtxbufs = count;
nvc0->num_vtxbufs = count;
nvc0->dirty |= NVC0_NEW_ARRAYS;
} else {
for (i = 0; i < count; ++i) {
if (nvc0->vtxbuf[i].buffer == vb[i].buffer &&
nvc0->vtxbuf[i].buffer_offset == vb[i].buffer_offset &&
nvc0->vtxbuf[i].stride == vb[i].stride)
continue;
pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
nvc0->vtxbuf[i].stride = vb[i].stride;
if (likely(vb[i].stride))
nvc0->dirty |= NVC0_NEW_ARRAYS;
else
constant_vbos |= 1 << i;
}
}
if (constant_vbos != nvc0->constant_vbos) {
nvc0->constant_vbos = constant_vbos;
nvc0->dirty |= NVC0_NEW_ARRAYS;
}
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
nvc0->dirty |= NVC0_NEW_ARRAYS;
if (nvc0->dirty & NVC0_NEW_ARRAYS)
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
}
static void

View file

@ -126,26 +126,44 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
static void
nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb,
struct pipe_vertex_element *ve, unsigned attr)
nvc0_update_constant_vertex_attribs(struct nvc0_context *nvc0)
{
const void *data;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *res = nv04_resource(vb->buffer);
float v[4];
int i;
const unsigned nc = util_format_get_nr_components(ve->src_format);
uint32_t mask = nvc0->state.constant_elts;
data = nouveau_resource_map_offset(&nvc0->base, res, vb->buffer_offset +
ve->src_offset, NOUVEAU_BO_RD);
while (unlikely(mask)) {
const int i = ffs(mask) - 1;
uint32_t mode;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
const struct util_format_description *desc;
void *dst;
const void *src = nouveau_resource_map_offset(&nvc0->base,
nv04_resource(vb->buffer),
vb->buffer_offset + ve->src_offset, NOUVEAU_BO_RD);
util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
mask &= ~(1 << i);
PUSH_SPACE(push, 6);
BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), nc + 1);
PUSH_DATA (push, VTX_ATTR(attr, nc, FLOAT, 32));
for (i = 0; i < nc; ++i)
PUSH_DATAf(push, v[i]);
desc = util_format_description(ve->src_format);
PUSH_SPACE(push, 6);
BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5);
dst = push->cur + 1;
if (desc->channel[0].pure_integer) {
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
mode = VTX_ATTR(i, 4, SINT, 32);
desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1);
} else {
mode = VTX_ATTR(i, 4, UINT, 32);
desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1);
}
} else {
mode = VTX_ATTR(i, 4, FLOAT, 32);
desc->unpack_rgba_float(dst, 0, src, 0, 1, 1);
}
*push->cur = mode;
push->cur += 5;
}
}
static INLINE void
@ -225,13 +243,8 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
struct nv04_resource *buf = nv04_resource(vb->buffer);
if (!(nvc0->vbo_user & (1 << b)))
if (!(nvc0->vbo_user & (1 << b)) || !vb->stride)
continue;
if (!vb->stride) {
nvc0_emit_vtxattr(nvc0, vb, ve, i);
continue;
}
nvc0_vbuf_range(nvc0, b, &base, &size);
if (!(written & (1 << b))) {
@ -268,83 +281,88 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
struct pipe_vertex_buffer *vb;
struct nvc0_vertex_element *ve;
uint32_t const_vbos;
unsigned i;
boolean update_vertex;
if (unlikely(vertex->need_conversion) ||
unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
nvc0->vbo_fifo = ~0;
nvc0->vbo_user = 0;
nvc0->vbo_fifo = ~nvc0->constant_vbos;
} else {
nvc0_prevalidate_vbufs(nvc0);
nvc0->vbo_fifo &= ~nvc0->constant_vbos;
}
const_vbos = nvc0->vbo_fifo ? 0 : nvc0->constant_vbos;
PUSH_SPACE(push, vertex->num_elements + 1);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
for (i = 0; i < vertex->num_elements; ++i) {
ve = &vertex->element[i];
vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
(const_vbos != nvc0->state.constant_vbos);
if (update_vertex) {
uint32_t *restrict data;
const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts);
if (likely(vb->stride) || nvc0->vbo_fifo) {
PUSH_DATA(push, ve->state);
} else {
PUSH_DATA(push, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
nvc0->vbo_fifo &= ~(1 << i);
if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
nvc0->state.instance_elts = vertex->instance_elts;
assert(n); /* if (n == 0), both masks should be 0 */
PUSH_SPACE(push, 3);
BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
PUSH_DATA (push, n);
PUSH_DATA (push, vertex->instance_elts);
}
nvc0->state.num_vtxelts = vertex->num_elements;
nvc0->state.constant_vbos = const_vbos;
nvc0->state.constant_elts = 0;
PUSH_SPACE(push, n * 2 + 1);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
data = push->cur;
push->cur += n;
for (i = 0; i < vertex->num_elements; ++data, ++i) {
ve = &vertex->element[i];
*data = ve->state;
if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
*data |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
nvc0->state.constant_elts |= 1 << i;
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
}
}
for (; i < n; ++data, ++i) {
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
*data = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
}
}
PUSH_SPACE(push, vertex->num_elements * 16);
PUSH_SPACE(push, vertex->num_elements * 8);
for (i = 0; i < vertex->num_elements; ++i) {
struct nv04_resource *res;
unsigned size, offset;
if (nvc0->state.constant_elts & (1 << i))
continue;
ve = &vertex->element[i];
vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
if (unlikely(ve->pipe.instance_divisor)) {
if (!(nvc0->state.instance_elts & (1 << i))) {
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
}
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
PUSH_DATA (push, ve->pipe.instance_divisor);
} else
if (unlikely(nvc0->state.instance_elts & (1 << i))) {
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
}
res = nv04_resource(vb->buffer);
if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) {
if (!nvc0->vbo_fifo)
nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
PUSH_DATA (push, 0);
continue;
}
size = vb->buffer->width0;
offset = ve->pipe.src_offset + vb->buffer_offset;
size = vb->buffer->width0;
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
PUSH_DATA (push, (1 << 12) | vb->stride);
BEGIN_1IC0(push, NVC0_3D(VERTEX_ARRAY_SELECT), 5);
PUSH_DATA (push, i);
if (unlikely(ve->pipe.instance_divisor)) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
PUSH_DATA (push, (1 << 12) | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
PUSH_DATA (push, ve->pipe.instance_divisor);
} else {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
PUSH_DATA (push, (1 << 12) | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
PUSH_DATAh(push, res->address + size - 1);
PUSH_DATA (push, res->address + size - 1);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
for (; i < nvc0->state.num_vtxelts; ++i) {
PUSH_SPACE(push, 5);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
PUSH_DATA (push, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
if (unlikely(nvc0->state.instance_elts & (1 << i)))
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
PUSH_DATA (push, 0);
}
nvc0->state.num_vtxelts = vertex->num_elements;
nvc0->state.instance_elts = vertex->instance_elts;
}
void
@ -393,7 +411,6 @@ nvc0_prim_gl(unsigned prim)
NVC0_PRIM_GL_CASE(PATCHES); */
default:
return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
break;
}
}
@ -666,6 +683,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
push->kick_notify = nvc0_default_kick_notify;
return;
}
nvc0_update_constant_vertex_attribs(nvc0);
/* space for base instance, flush, and prim restart */
PUSH_SPACE(push, 8);
@ -678,19 +696,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
if (nvc0->base.vbo_dirty) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 1);
PUSH_DATA (push, 0);
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
nvc0->base.vbo_dirty = FALSE;
}
if (unlikely(info->count_from_stream_output)) {
nvc0_draw_stream_output(nvc0, info);
} else
if (!info->indexed) {
nvc0_draw_arrays(nvc0,
info->mode, info->start, info->count,
info->instance_count);
} else {
if (info->indexed) {
boolean shorten = info->max_index <= 65535;
assert(nvc0->idxbuf.buffer);
@ -719,6 +729,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_draw_elements(nvc0, shorten,
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
} else
if (unlikely(info->count_from_stream_output)) {
nvc0_draw_stream_output(nvc0, info);
} else {
nvc0_draw_arrays(nvc0,
info->mode, info->start, info->count,
info->instance_count);
}
push->kick_notify = nvc0_default_kick_notify;