nv50: implement stream output

This commit is contained in:
Christoph Bumiller 2012-05-03 12:50:08 +02:00
parent 8a44ecdae8
commit 02fac29305
12 changed files with 468 additions and 33 deletions

View file

@ -48,6 +48,7 @@
#define NV50_NEW_CONSTBUF (1 << 18)
#define NV50_NEW_TEXTURES (1 << 19)
#define NV50_NEW_SAMPLERS (1 << 20)
#define NV50_NEW_STRMOUT (1 << 21)
#define NV50_NEW_CONTEXT (1 << 31)
#define NV50_BIND_FB 0
@ -56,9 +57,10 @@
#define NV50_BIND_INDEX 3
#define NV50_BIND_TEXTURES 4
#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
#define NV50_BIND_SCREEN 53
#define NV50_BIND_TLS 54
#define NV50_BIND_COUNT 55
#define NV50_BIND_SO 53
#define NV50_BIND_SCREEN 54
#define NV50_BIND_TLS 55
#define NV50_BIND_COUNT 56
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
@ -92,11 +94,13 @@ struct nv50_context {
boolean point_sprite;
boolean rt_serialize;
boolean flushed;
boolean rasterizer_discard;
uint8_t tls_required;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[3];
uint8_t num_samplers[3];
uint8_t prim_size;
uint16_t scissor;
} state;
@ -126,6 +130,10 @@ struct nv50_context {
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
unsigned num_samplers[3];
uint8_t num_so_targets;
uint8_t so_targets_dirty;
struct pipe_stream_output_target *so_target[4];
struct pipe_framebuffer_state framebuffer;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
@ -168,6 +176,14 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
/* nv50_query.c */
void nv50_init_query_functions(struct nv50_context *);
void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
struct pipe_query *, unsigned result_offset);
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nva0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *,
unsigned index, boolean seralize);
#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
/* nv50_shader_state.c */
void nv50_vertprog_validate(struct nv50_context *);
@ -177,6 +193,7 @@ void nv50_fp_linkage_validate(struct nv50_context *);
void nv50_gp_linkage_validate(struct nv50_context *);
void nv50_constbufs_validate(struct nv50_context *);
void nv50_validate_derived_rs(struct nv50_context *);
void nv50_stream_output_validate(struct nv50_context *);
/* nv50_state.c */
extern void nv50_init_state_functions(struct nv50_context *);

View file

@ -235,6 +235,59 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
}
}
static struct nv50_stream_output_state *
nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
const struct pipe_stream_output_info *pso)
{
struct nv50_stream_output_state *so;
unsigned b, i, c;
unsigned base[4];
so = MALLOC_STRUCT(nv50_stream_output_state);
if (!so)
return NULL;
memset(so->map, 0xff, sizeof(so->map));
for (b = 0; b < 4; ++b)
so->num_attribs[b] = 0;
for (i = 0; i < pso->num_outputs; ++i) {
unsigned end = pso->output[i].dst_offset + pso->output[i].num_components;
b = pso->output[i].output_buffer;
assert(b < 4);
so->num_attribs[b] = MAX2(so->num_attribs[b], end);
}
so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
so->stride[0] = pso->stride[0] * 4;
base[0] = 0;
for (b = 1; b < 4; ++b) {
assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
so->stride[b] = so->num_attribs[b] * 4;
if (so->num_attribs[b])
so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
}
if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
}
so->map_size = base[3] + so->num_attribs[3];
for (i = 0; i < pso->num_outputs; ++i) {
const unsigned s = pso->output[i].start_component;
const unsigned p = pso->output[i].dst_offset;
const unsigned r = pso->output[i].register_index;
b = pso->output[i].output_buffer;
for (c = 0; c < pso->output[i].num_components; ++c)
so->map[base[b] + p + c] = info->out[r].slot[s + c];
}
return so;
}
boolean
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
@ -293,6 +346,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
}
if (prog->pipe.stream_output.num_outputs)
prog->so = nv50_program_create_strmout_state(info,
&prog->pipe.stream_output);
out:
FREE(info);
return !ret;

View file

@ -42,6 +42,15 @@ struct nv50_varying {
ubyte si; /* semantic index */
};
struct nv50_stream_output_state
{
uint32_t ctrl;
uint16_t stride[4];
uint8_t num_attribs[4];
uint8_t map_size;
uint8_t map[128];
};
struct nv50_program {
struct pipe_shader_state pipe;
@ -88,6 +97,8 @@ struct nv50_program {
void *fixups; /* relocation records */
struct nouveau_heap *mem;
struct nv50_stream_output_state *so;
};
boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);

View file

@ -210,7 +210,8 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
{
struct push_context ctx;
unsigned i, index_size;
unsigned inst = info->instance_count;
unsigned inst_count = info->instance_count;
unsigned vert_count = info->count;
boolean apply_bias = info->indexed && info->index_bias;
ctx.push = nv50->base.pushbuf;
@ -242,6 +243,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.primitive_restart = info->primitive_restart;
ctx.restart_index = info->restart_index;
} else {
if (unlikely(info->count_from_stream_output)) {
struct pipe_context *pipe = &nv50->base.pipe;
struct nv50_so_target *targ;
targ = nv50_so_target(info->count_from_stream_output);
if (!targ->pq) {
NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
return;
}
pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
vert_count /= targ->stride;
}
ctx.idxbuf = NULL;
index_size = 0;
ctx.primitive_restart = FALSE;
@ -262,21 +274,21 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
}
nv50->state.prim_restart = info->primitive_restart;
while (inst--) {
while (inst_count--) {
BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (ctx.push, ctx.prim);
switch (index_size) {
case 0:
emit_vertices_seq(&ctx, info->start, info->count);
emit_vertices_seq(&ctx, info->start, vert_count);
break;
case 1:
emit_vertices_i08(&ctx, info->start, info->count);
emit_vertices_i08(&ctx, info->start, vert_count);
break;
case 2:
emit_vertices_i16(&ctx, info->start, info->count);
emit_vertices_i16(&ctx, info->start, vert_count);
break;
case 4:
emit_vertices_i32(&ctx, info->start, info->count);
emit_vertices_i32(&ctx, info->start, vert_count);
break;
default:
assert(0);

View file

@ -36,7 +36,8 @@
struct nv50_query {
uint32_t *data;
uint32_t type;
uint16_t type;
uint16_t index;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
@ -170,21 +171,15 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
PUSH_DATA (push, 1);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
PUSH_SPACE(push, 2);
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0x10, 0x06805002);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
PUSH_SPACE(push, 2);
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
nv50_query_get(push, q, 0x10, 0x05805002);
break;
case PIPE_QUERY_SO_STATISTICS:
PUSH_SPACE(push, 3);
BEGIN_NI04(push, NV50_3D(COUNTER_RESET), 2);
PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
nv50_query_get(push, q, 0x20, 0x05805002);
nv50_query_get(push, q, 0x30, 0x06805002);
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
@ -227,6 +222,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
case PIPE_QUERY_GPU_FINISHED:
nv50_query_get(push, q, 0, 0x1000f010);
break;
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
break;
default:
assert(0);
break;
@ -247,6 +245,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q = nv50_query(pq);
uint64_t *res64 = (uint64_t *)result;
uint32_t *res32 = (uint32_t *)result;
boolean *res8 = (boolean *)result;
uint64_t *data64 = (uint64_t *)q->data;
@ -275,11 +274,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
res64[0] = data64[0];
res64[0] = data64[0] - data64[2];
break;
case PIPE_QUERY_SO_STATISTICS:
res64[0] = data64[0];
res64[1] = data64[1];
res64[0] = data64[0] - data64[4];
res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
res64[0] = 1000000000;
@ -288,6 +287,9 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
break;
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
res32[0] = q->data[1];
break;
default:
return FALSE;
}
@ -295,6 +297,21 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return TRUE;
}
void
nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
{
struct nv50_query *q = nv50_query(pq);
unsigned offset = q->offset;
PUSH_SPACE(push, 5);
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
PUSH_DATAh(push, q->bo->offset + offset);
PUSH_DATA (push, q->bo->offset + offset);
PUSH_DATA (push, q->sequence);
PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}
static void
nv50_render_condition(struct pipe_context *pipe,
struct pipe_query *pq, uint mode)
@ -324,6 +341,38 @@ nv50_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO);
}
void
nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
struct pipe_query *pq, unsigned result_offset)
{
struct nv50_query *q = nv50_query(pq);
/* XXX: does this exist ? */
#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
nouveau_pushbuf_space(push, 0, 0, 1);
nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
NV50_IB_ENTRY_1_NO_PREFETCH);
}
void
nva0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
unsigned index, boolean serialize)
{
struct nv50_so_target *targ = nv50_so_target(ptarg);
if (serialize) {
struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
PUSH_SPACE(push, 2);
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
}
nv50_query(targ->pq)->index = index;
nv50_query_end(pipe, targ->pq);
}
void
nv50_init_query_functions(struct nv50_context *nv50)
{

View file

@ -73,6 +73,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
static int
nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
switch (param) {
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 64;
@ -95,7 +97,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_SCALED_RESOLVE:
return 1;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
@ -121,11 +122,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_OCCLUSION_QUERY:
return 1;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
return 0;
return 4;
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
return 128;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
return 32;
return 64;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
return 1;

View file

@ -207,6 +207,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
PUSH_DATA (push, gp->gp.vert_count);
BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
PUSH_DATA (push, gp->code_base);
nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
}
nv50_program_update_context_state(nv50, gp, 2);
@ -278,6 +280,12 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
nv50_sprite_coords_validate(nv50);
if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
}
if (nv50->dirty & NV50_NEW_FRAGPROG)
return;
psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
@ -343,6 +351,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
uint32_t colors = fp->fp.colors;
uint32_t lin[4];
uint8_t map[64];
uint8_t so_map[64];
if (!(nv50->dirty & (NV50_NEW_VERTPROG |
NV50_NEW_FRAGPROG |
@ -411,6 +420,30 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
if (nv50->rast->pipe.clamp_vertex_color)
colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
if (unlikely(vp->so)) {
/* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
* gets written.
*
* TODO:
* Inverting vp->so->map (output -> offset) would probably speed this up.
*/
memset(so_map, 0, sizeof(so_map));
for (i = 0; i < vp->so->map_size; ++i) {
if (vp->so->map[i] == 0xff)
continue;
for (c = 0; c < m; ++c)
if (map[c] == vp->so->map[i] && !so_map[c])
break;
if (c == m) {
c = m;
map[m++] = vp->so->map[i];
}
so_map[c] = 0x80 | i;
}
for (c = m; c & 3; ++c)
so_map[c] = 0;
}
n = (m + 3) / 4;
assert(m <= 64);
@ -451,6 +484,11 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
if (vp->so) {
BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
PUSH_DATAp(push, so_map, n);
}
}
static int
@ -509,3 +547,75 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
PUSH_DATAp(push, map, n);
}
void
nv50_stream_output_validate(struct nv50_context *nv50)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_stream_output_state *so;
uint32_t ctrl;
unsigned i;
unsigned prims = ~0;
so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
if (!so || !nv50->num_so_targets) {
BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
PUSH_DATA (push, 0);
if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
PUSH_DATA (push, 0);
}
BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
PUSH_DATA (push, 1);
return;
}
ctrl = so->ctrl;
if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
PUSH_DATA (push, ctrl);
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
for (i = 0; i < nv50->num_so_targets; ++i) {
struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
if (n == 4 && !targ->clean)
nv84_query_fifo_wait(push, targ->pq);
BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
PUSH_DATA (push, so->num_attribs[i]);
if (n == 4) {
PUSH_DATA(push, targ->pipe.buffer_size);
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
if (!targ->clean) {
assert(targ->pq);
nv50_query_pushbuf_submit(push, targ->pq, 0x4);
} else {
PUSH_DATA(push, 0);
targ->clean = FALSE;
}
} else {
const unsigned limit = targ->pipe.buffer_size /
(so->stride[i] * nv50->state.prim_size);
prims = MIN2(prims, limit);
}
BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
}
if (prims != ~0) {
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
PUSH_DATA (push, prims);
}
BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
PUSH_DATA (push, 1);
}

View file

@ -680,6 +680,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
prog->type = type;
prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
return (void *)prog;
}
@ -909,6 +912,90 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
nv50->dirty |= NV50_NEW_VERTEX;
}
static struct pipe_stream_output_target *
nv50_so_target_create(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size)
{
struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target);
if (!targ)
return NULL;
if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
targ->pq = pipe->create_query(pipe,
NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET);
if (!targ->pq) {
FREE(targ);
return NULL;
}
} else {
targ->pq = NULL;
}
targ->clean = TRUE;
targ->pipe.buffer_size = size;
targ->pipe.buffer_offset = offset;
targ->pipe.context = pipe;
targ->pipe.buffer = NULL;
pipe_resource_reference(&targ->pipe.buffer, res);
pipe_reference_init(&targ->pipe.reference, 1);
return &targ->pipe;
}
static void
nv50_so_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg)
{
struct nv50_so_target *targ = nv50_so_target(ptarg);
if (targ->pq)
pipe->destroy_query(pipe, targ->pq);
FREE(targ);
}
static void
nv50_set_stream_output_targets(struct pipe_context *pipe,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_mask)
{
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
boolean serialize = TRUE;
const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
assert(num_targets <= 4);
for (i = 0; i < num_targets; ++i) {
const boolean changed = nv50->so_target[i] != targets[i];
if (!changed && (append_mask & (1 << i)))
continue;
nv50->so_targets_dirty |= 1 << i;
if (can_resume && changed && nv50->so_target[i]) {
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
serialize = FALSE;
}
if (targets[i] && !(append_mask & (1 << i)))
nv50_so_target(targets[i])->clean = TRUE;
pipe_so_target_reference(&nv50->so_target[i], targets[i]);
}
for (; i < nv50->num_so_targets; ++i) {
if (can_resume && nv50->so_target[i]) {
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
serialize = FALSE;
}
pipe_so_target_reference(&nv50->so_target[i], NULL);
nv50->so_targets_dirty |= 1 << i;
}
nv50->num_so_targets = num_targets;
if (nv50->so_targets_dirty)
nv50->dirty |= NV50_NEW_STRMOUT;
}
void
nv50_init_state_functions(struct nv50_context *nv50)
{
@ -965,6 +1052,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->set_vertex_buffers = nv50_set_vertex_buffers;
pipe->set_index_buffer = nv50_set_index_buffer;
pipe->create_stream_output_target = nv50_so_target_create;
pipe->stream_output_target_destroy = nv50_so_target_destroy;
pipe->set_stream_output_targets = nv50_set_stream_output_targets;
pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}

View file

@ -360,6 +360,8 @@ static struct state_validate {
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
{ nv50_validate_textures, NV50_NEW_TEXTURES },
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))

View file

@ -51,4 +51,17 @@ struct nv50_vertex_stateobj {
struct nv50_vertex_element element[0];
};
struct nv50_so_target {
struct pipe_stream_output_target pipe;
struct pipe_query *pq;
unsigned stride;
boolean clean;
};
static INLINE struct nv50_so_target *
nv50_so_target(struct pipe_stream_output_target *ptarg)
{
return (struct nv50_so_target *)ptarg;
}
#endif

View file

@ -405,6 +405,25 @@ nv50_prim_gl(unsigned prim)
}
}
/* For pre-nva0 transform feedback. */
static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =
{
[PIPE_PRIM_POINTS] = 1,
[PIPE_PRIM_LINES] = 2,
[PIPE_PRIM_LINE_LOOP] = 2,
[PIPE_PRIM_LINE_STRIP] = 2,
[PIPE_PRIM_TRIANGLES] = 3,
[PIPE_PRIM_TRIANGLE_STRIP] = 3,
[PIPE_PRIM_TRIANGLE_FAN] = 3,
[PIPE_PRIM_QUADS] = 3,
[PIPE_PRIM_QUAD_STRIP] = 3,
[PIPE_PRIM_POLYGON] = 3,
[PIPE_PRIM_LINES_ADJACENCY] = 2,
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3
};
static void
nv50_draw_arrays(struct nv50_context *nv50,
unsigned mode, unsigned start, unsigned count,
@ -623,6 +642,51 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
}
}
static void
nva0_draw_stream_output(struct nv50_context *nv50,
const struct pipe_draw_info *info)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output);
struct nv04_resource *res = nv04_resource(so->pipe.buffer);
unsigned num_instances = info->instance_count;
unsigned mode = nv50_prim_gl(info->mode);
if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {
/* A proper implementation without waiting doesn't seem possible,
* so don't bother.
*/
NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
return;
}
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
PUSH_SPACE(push, 4);
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
PUSH_DATA (push, 0);
}
assert(num_instances);
do {
PUSH_SPACE(push, 8);
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, mode);
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
nv50_query_pushbuf_submit(push, so->pq, 0x4);
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
PUSH_DATA (push, 0);
mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
} while (--num_instances);
}
static void
nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
{
@ -655,6 +719,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
nv50_update_user_vbufs(nv50);
if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
push->kick_notify = nv50_draw_vbo_kick_notify;
@ -679,11 +746,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50->base.vbo_dirty = FALSE;
}
if (!info->indexed) {
nv50_draw_arrays(nv50,
info->mode, info->start, info->count,
info->instance_count);
} else {
if (info->indexed) {
boolean shorten = info->max_index <= 65535;
assert(nv50->idxbuf.buffer);
@ -713,6 +776,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50_draw_elements(nv50, shorten,
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
} else
if (unlikely(info->count_from_stream_output)) {
nva0_draw_stream_output(nv50, info);
} else {
nv50_draw_arrays(nv50,
info->mode, info->start, info->count,
info->instance_count);
}
push->kick_notify = nv50_default_kick_notify;

View file

@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_3D(m) 3, (m)
#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
#define SUBC_2D(m) 4, (m)
#define NV50_2D(n) SUBC_2D(NV50_2D_##n)