mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
nv50: implement stream output
This commit is contained in:
parent
8a44ecdae8
commit
02fac29305
12 changed files with 468 additions and 33 deletions
|
|
@ -48,6 +48,7 @@
|
|||
#define NV50_NEW_CONSTBUF (1 << 18)
|
||||
#define NV50_NEW_TEXTURES (1 << 19)
|
||||
#define NV50_NEW_SAMPLERS (1 << 20)
|
||||
#define NV50_NEW_STRMOUT (1 << 21)
|
||||
#define NV50_NEW_CONTEXT (1 << 31)
|
||||
|
||||
#define NV50_BIND_FB 0
|
||||
|
|
@ -56,9 +57,10 @@
|
|||
#define NV50_BIND_INDEX 3
|
||||
#define NV50_BIND_TEXTURES 4
|
||||
#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
|
||||
#define NV50_BIND_SCREEN 53
|
||||
#define NV50_BIND_TLS 54
|
||||
#define NV50_BIND_COUNT 55
|
||||
#define NV50_BIND_SO 53
|
||||
#define NV50_BIND_SCREEN 54
|
||||
#define NV50_BIND_TLS 55
|
||||
#define NV50_BIND_COUNT 56
|
||||
#define NV50_BIND_2D 0
|
||||
#define NV50_BIND_M2MF 0
|
||||
#define NV50_BIND_FENCE 1
|
||||
|
|
@ -92,11 +94,13 @@ struct nv50_context {
|
|||
boolean point_sprite;
|
||||
boolean rt_serialize;
|
||||
boolean flushed;
|
||||
boolean rasterizer_discard;
|
||||
uint8_t tls_required;
|
||||
uint8_t num_vtxbufs;
|
||||
uint8_t num_vtxelts;
|
||||
uint8_t num_textures[3];
|
||||
uint8_t num_samplers[3];
|
||||
uint8_t prim_size;
|
||||
uint16_t scissor;
|
||||
} state;
|
||||
|
||||
|
|
@ -126,6 +130,10 @@ struct nv50_context {
|
|||
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_samplers[3];
|
||||
|
||||
uint8_t num_so_targets;
|
||||
uint8_t so_targets_dirty;
|
||||
struct pipe_stream_output_target *so_target[4];
|
||||
|
||||
struct pipe_framebuffer_state framebuffer;
|
||||
struct pipe_blend_color blend_colour;
|
||||
struct pipe_stencil_ref stencil_ref;
|
||||
|
|
@ -168,6 +176,14 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
|
|||
|
||||
/* nv50_query.c */
|
||||
void nv50_init_query_functions(struct nv50_context *);
|
||||
void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
|
||||
struct pipe_query *, unsigned result_offset);
|
||||
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
|
||||
void nva0_so_target_save_offset(struct pipe_context *,
|
||||
struct pipe_stream_output_target *,
|
||||
unsigned index, boolean seralize);
|
||||
|
||||
#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
|
||||
|
||||
/* nv50_shader_state.c */
|
||||
void nv50_vertprog_validate(struct nv50_context *);
|
||||
|
|
@ -177,6 +193,7 @@ void nv50_fp_linkage_validate(struct nv50_context *);
|
|||
void nv50_gp_linkage_validate(struct nv50_context *);
|
||||
void nv50_constbufs_validate(struct nv50_context *);
|
||||
void nv50_validate_derived_rs(struct nv50_context *);
|
||||
void nv50_stream_output_validate(struct nv50_context *);
|
||||
|
||||
/* nv50_state.c */
|
||||
extern void nv50_init_state_functions(struct nv50_context *);
|
||||
|
|
|
|||
|
|
@ -235,6 +235,59 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
|
|||
}
|
||||
}
|
||||
|
||||
static struct nv50_stream_output_state *
|
||||
nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
|
||||
const struct pipe_stream_output_info *pso)
|
||||
{
|
||||
struct nv50_stream_output_state *so;
|
||||
unsigned b, i, c;
|
||||
unsigned base[4];
|
||||
|
||||
so = MALLOC_STRUCT(nv50_stream_output_state);
|
||||
if (!so)
|
||||
return NULL;
|
||||
memset(so->map, 0xff, sizeof(so->map));
|
||||
|
||||
for (b = 0; b < 4; ++b)
|
||||
so->num_attribs[b] = 0;
|
||||
for (i = 0; i < pso->num_outputs; ++i) {
|
||||
unsigned end = pso->output[i].dst_offset + pso->output[i].num_components;
|
||||
b = pso->output[i].output_buffer;
|
||||
assert(b < 4);
|
||||
so->num_attribs[b] = MAX2(so->num_attribs[b], end);
|
||||
}
|
||||
|
||||
so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
|
||||
|
||||
so->stride[0] = pso->stride[0] * 4;
|
||||
base[0] = 0;
|
||||
for (b = 1; b < 4; ++b) {
|
||||
assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
|
||||
so->stride[b] = so->num_attribs[b] * 4;
|
||||
if (so->num_attribs[b])
|
||||
so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
|
||||
base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
|
||||
}
|
||||
if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
|
||||
assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
|
||||
so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
|
||||
}
|
||||
|
||||
so->map_size = base[3] + so->num_attribs[3];
|
||||
|
||||
for (i = 0; i < pso->num_outputs; ++i) {
|
||||
const unsigned s = pso->output[i].start_component;
|
||||
const unsigned p = pso->output[i].dst_offset;
|
||||
const unsigned r = pso->output[i].register_index;
|
||||
b = pso->output[i].output_buffer;
|
||||
|
||||
for (c = 0; c < pso->output[i].num_components; ++c)
|
||||
so->map[base[b] + p + c] = info->out[r].slot[s + c];
|
||||
}
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
boolean
|
||||
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
||||
{
|
||||
|
|
@ -293,6 +346,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
|||
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
|
||||
}
|
||||
|
||||
if (prog->pipe.stream_output.num_outputs)
|
||||
prog->so = nv50_program_create_strmout_state(info,
|
||||
&prog->pipe.stream_output);
|
||||
|
||||
out:
|
||||
FREE(info);
|
||||
return !ret;
|
||||
|
|
|
|||
|
|
@ -42,6 +42,15 @@ struct nv50_varying {
|
|||
ubyte si; /* semantic index */
|
||||
};
|
||||
|
||||
struct nv50_stream_output_state
|
||||
{
|
||||
uint32_t ctrl;
|
||||
uint16_t stride[4];
|
||||
uint8_t num_attribs[4];
|
||||
uint8_t map_size;
|
||||
uint8_t map[128];
|
||||
};
|
||||
|
||||
struct nv50_program {
|
||||
struct pipe_shader_state pipe;
|
||||
|
||||
|
|
@ -88,6 +97,8 @@ struct nv50_program {
|
|||
void *fixups; /* relocation records */
|
||||
|
||||
struct nouveau_heap *mem;
|
||||
|
||||
struct nv50_stream_output_state *so;
|
||||
};
|
||||
|
||||
boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
|
||||
|
|
|
|||
|
|
@ -210,7 +210,8 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
|
|||
{
|
||||
struct push_context ctx;
|
||||
unsigned i, index_size;
|
||||
unsigned inst = info->instance_count;
|
||||
unsigned inst_count = info->instance_count;
|
||||
unsigned vert_count = info->count;
|
||||
boolean apply_bias = info->indexed && info->index_bias;
|
||||
|
||||
ctx.push = nv50->base.pushbuf;
|
||||
|
|
@ -242,6 +243,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
|
|||
ctx.primitive_restart = info->primitive_restart;
|
||||
ctx.restart_index = info->restart_index;
|
||||
} else {
|
||||
if (unlikely(info->count_from_stream_output)) {
|
||||
struct pipe_context *pipe = &nv50->base.pipe;
|
||||
struct nv50_so_target *targ;
|
||||
targ = nv50_so_target(info->count_from_stream_output);
|
||||
if (!targ->pq) {
|
||||
NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
|
||||
return;
|
||||
}
|
||||
pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
|
||||
vert_count /= targ->stride;
|
||||
}
|
||||
ctx.idxbuf = NULL;
|
||||
index_size = 0;
|
||||
ctx.primitive_restart = FALSE;
|
||||
|
|
@ -262,21 +274,21 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
|
|||
}
|
||||
nv50->state.prim_restart = info->primitive_restart;
|
||||
|
||||
while (inst--) {
|
||||
while (inst_count--) {
|
||||
BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (ctx.push, ctx.prim);
|
||||
switch (index_size) {
|
||||
case 0:
|
||||
emit_vertices_seq(&ctx, info->start, info->count);
|
||||
emit_vertices_seq(&ctx, info->start, vert_count);
|
||||
break;
|
||||
case 1:
|
||||
emit_vertices_i08(&ctx, info->start, info->count);
|
||||
emit_vertices_i08(&ctx, info->start, vert_count);
|
||||
break;
|
||||
case 2:
|
||||
emit_vertices_i16(&ctx, info->start, info->count);
|
||||
emit_vertices_i16(&ctx, info->start, vert_count);
|
||||
break;
|
||||
case 4:
|
||||
emit_vertices_i32(&ctx, info->start, info->count);
|
||||
emit_vertices_i32(&ctx, info->start, vert_count);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
|
|||
|
|
@ -36,7 +36,8 @@
|
|||
|
||||
struct nv50_query {
|
||||
uint32_t *data;
|
||||
uint32_t type;
|
||||
uint16_t type;
|
||||
uint16_t index;
|
||||
uint32_t sequence;
|
||||
struct nouveau_bo *bo;
|
||||
uint32_t base;
|
||||
|
|
@ -170,21 +171,15 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
|
||||
PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nv50_query_get(push, q, 0x10, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
|
||||
PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
|
||||
nv50_query_get(push, q, 0x10, 0x05805002);
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
PUSH_SPACE(push, 3);
|
||||
BEGIN_NI04(push, NV50_3D(COUNTER_RESET), 2);
|
||||
PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
|
||||
PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
|
||||
nv50_query_get(push, q, 0x20, 0x05805002);
|
||||
nv50_query_get(push, q, 0x30, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
|
|
@ -227,6 +222,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
case PIPE_QUERY_GPU_FINISHED:
|
||||
nv50_query_get(push, q, 0, 0x1000f010);
|
||||
break;
|
||||
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
|
|
@ -247,6 +245,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
uint64_t *res64 = (uint64_t *)result;
|
||||
uint32_t *res32 = (uint32_t *)result;
|
||||
boolean *res8 = (boolean *)result;
|
||||
uint64_t *data64 = (uint64_t *)q->data;
|
||||
|
||||
|
|
@ -275,11 +274,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
|
||||
res64[0] = data64[0];
|
||||
res64[0] = data64[0] - data64[2];
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
res64[0] = data64[0];
|
||||
res64[1] = data64[1];
|
||||
res64[0] = data64[0] - data64[4];
|
||||
res64[1] = data64[2] - data64[6];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
|
||||
res64[0] = 1000000000;
|
||||
|
|
@ -288,6 +287,9 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
res64[0] = data64[1] - data64[3];
|
||||
break;
|
||||
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
res32[0] = q->data[1];
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
|
@ -295,6 +297,21 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
|
||||
{
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
unsigned offset = q->offset;
|
||||
|
||||
PUSH_SPACE(push, 5);
|
||||
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
|
||||
PUSH_DATAh(push, q->bo->offset + offset);
|
||||
PUSH_DATA (push, q->bo->offset + offset);
|
||||
PUSH_DATA (push, q->sequence);
|
||||
PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_render_condition(struct pipe_context *pipe,
|
||||
struct pipe_query *pq, uint mode)
|
||||
|
|
@ -324,6 +341,38 @@ nv50_render_condition(struct pipe_context *pipe,
|
|||
PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
|
||||
struct pipe_query *pq, unsigned result_offset)
|
||||
{
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
|
||||
/* XXX: does this exist ? */
|
||||
#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
|
||||
|
||||
nouveau_pushbuf_space(push, 0, 0, 1);
|
||||
nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
|
||||
NV50_IB_ENTRY_1_NO_PREFETCH);
|
||||
}
|
||||
|
||||
void
|
||||
nva0_so_target_save_offset(struct pipe_context *pipe,
|
||||
struct pipe_stream_output_target *ptarg,
|
||||
unsigned index, boolean serialize)
|
||||
{
|
||||
struct nv50_so_target *targ = nv50_so_target(ptarg);
|
||||
|
||||
if (serialize) {
|
||||
struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
nv50_query(targ->pq)->index = index;
|
||||
nv50_query_end(pipe, targ->pq);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_init_query_functions(struct nv50_context *nv50)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -73,6 +73,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
|
|||
static int
|
||||
nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
{
|
||||
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
|
||||
|
||||
switch (param) {
|
||||
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
|
||||
return 64;
|
||||
|
|
@ -95,7 +97,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_ANISOTROPIC_FILTER:
|
||||
case PIPE_CAP_SCALED_RESOLVE:
|
||||
return 1;
|
||||
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP:
|
||||
return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
|
|
@ -121,11 +122,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
return 1;
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
|
||||
return 0;
|
||||
return 4;
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
|
||||
return 128;
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
|
||||
return 32;
|
||||
return 64;
|
||||
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
|
||||
return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
case PIPE_CAP_INDEP_BLEND_ENABLE:
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -207,6 +207,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
|
|||
PUSH_DATA (push, gp->gp.vert_count);
|
||||
BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
|
||||
PUSH_DATA (push, gp->code_base);
|
||||
|
||||
nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
|
||||
}
|
||||
nv50_program_update_context_state(nv50, gp, 2);
|
||||
|
||||
|
|
@ -278,6 +280,12 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
|
|||
|
||||
nv50_sprite_coords_validate(nv50);
|
||||
|
||||
if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
|
||||
nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
|
||||
BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
|
||||
PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
|
||||
}
|
||||
|
||||
if (nv50->dirty & NV50_NEW_FRAGPROG)
|
||||
return;
|
||||
psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
|
||||
|
|
@ -343,6 +351,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
|
|||
uint32_t colors = fp->fp.colors;
|
||||
uint32_t lin[4];
|
||||
uint8_t map[64];
|
||||
uint8_t so_map[64];
|
||||
|
||||
if (!(nv50->dirty & (NV50_NEW_VERTPROG |
|
||||
NV50_NEW_FRAGPROG |
|
||||
|
|
@ -411,6 +420,30 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
|
|||
if (nv50->rast->pipe.clamp_vertex_color)
|
||||
colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
|
||||
|
||||
if (unlikely(vp->so)) {
|
||||
/* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
|
||||
* gets written.
|
||||
*
|
||||
* TODO:
|
||||
* Inverting vp->so->map (output -> offset) would probably speed this up.
|
||||
*/
|
||||
memset(so_map, 0, sizeof(so_map));
|
||||
for (i = 0; i < vp->so->map_size; ++i) {
|
||||
if (vp->so->map[i] == 0xff)
|
||||
continue;
|
||||
for (c = 0; c < m; ++c)
|
||||
if (map[c] == vp->so->map[i] && !so_map[c])
|
||||
break;
|
||||
if (c == m) {
|
||||
c = m;
|
||||
map[m++] = vp->so->map[i];
|
||||
}
|
||||
so_map[c] = 0x80 | i;
|
||||
}
|
||||
for (c = m; c & 3; ++c)
|
||||
so_map[c] = 0;
|
||||
}
|
||||
|
||||
n = (m + 3) / 4;
|
||||
assert(m <= 64);
|
||||
|
||||
|
|
@ -451,6 +484,11 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
|
|||
|
||||
BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
|
||||
PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
|
||||
|
||||
if (vp->so) {
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
|
||||
PUSH_DATAp(push, so_map, n);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -509,3 +547,75 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
|
|||
BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
|
||||
PUSH_DATAp(push, map, n);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_stream_output_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_stream_output_state *so;
|
||||
uint32_t ctrl;
|
||||
unsigned i;
|
||||
unsigned prims = ~0;
|
||||
|
||||
so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
|
||||
|
||||
if (!so || !nv50->num_so_targets) {
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
ctrl = so->ctrl;
|
||||
if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
|
||||
ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
|
||||
PUSH_DATA (push, ctrl);
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
|
||||
|
||||
for (i = 0; i < nv50->num_so_targets; ++i) {
|
||||
struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
|
||||
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
|
||||
|
||||
const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
|
||||
|
||||
if (n == 4 && !targ->clean)
|
||||
nv84_query_fifo_wait(push, targ->pq);
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
|
||||
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
|
||||
PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
|
||||
PUSH_DATA (push, so->num_attribs[i]);
|
||||
if (n == 4) {
|
||||
PUSH_DATA(push, targ->pipe.buffer_size);
|
||||
|
||||
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
|
||||
if (!targ->clean) {
|
||||
assert(targ->pq);
|
||||
nv50_query_pushbuf_submit(push, targ->pq, 0x4);
|
||||
} else {
|
||||
PUSH_DATA(push, 0);
|
||||
targ->clean = FALSE;
|
||||
}
|
||||
} else {
|
||||
const unsigned limit = targ->pipe.buffer_size /
|
||||
(so->stride[i] * nv50->state.prim_size);
|
||||
prims = MIN2(prims, limit);
|
||||
}
|
||||
BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
|
||||
}
|
||||
if (prims != ~0) {
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
|
||||
PUSH_DATA (push, prims);
|
||||
}
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -680,6 +680,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
|
|||
prog->type = type;
|
||||
prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
|
||||
|
||||
if (cso->stream_output.num_outputs)
|
||||
prog->pipe.stream_output = cso->stream_output;
|
||||
|
||||
return (void *)prog;
|
||||
}
|
||||
|
||||
|
|
@ -909,6 +912,90 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
|
|||
nv50->dirty |= NV50_NEW_VERTEX;
|
||||
}
|
||||
|
||||
static struct pipe_stream_output_target *
|
||||
nv50_so_target_create(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned offset, unsigned size)
|
||||
{
|
||||
struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target);
|
||||
if (!targ)
|
||||
return NULL;
|
||||
|
||||
if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
|
||||
targ->pq = pipe->create_query(pipe,
|
||||
NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET);
|
||||
if (!targ->pq) {
|
||||
FREE(targ);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
targ->pq = NULL;
|
||||
}
|
||||
targ->clean = TRUE;
|
||||
|
||||
targ->pipe.buffer_size = size;
|
||||
targ->pipe.buffer_offset = offset;
|
||||
targ->pipe.context = pipe;
|
||||
targ->pipe.buffer = NULL;
|
||||
pipe_resource_reference(&targ->pipe.buffer, res);
|
||||
pipe_reference_init(&targ->pipe.reference, 1);
|
||||
|
||||
return &targ->pipe;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_so_target_destroy(struct pipe_context *pipe,
|
||||
struct pipe_stream_output_target *ptarg)
|
||||
{
|
||||
struct nv50_so_target *targ = nv50_so_target(ptarg);
|
||||
if (targ->pq)
|
||||
pipe->destroy_query(pipe, targ->pq);
|
||||
FREE(targ);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_set_stream_output_targets(struct pipe_context *pipe,
|
||||
unsigned num_targets,
|
||||
struct pipe_stream_output_target **targets,
|
||||
unsigned append_mask)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
unsigned i;
|
||||
boolean serialize = TRUE;
|
||||
const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
|
||||
|
||||
assert(num_targets <= 4);
|
||||
|
||||
for (i = 0; i < num_targets; ++i) {
|
||||
const boolean changed = nv50->so_target[i] != targets[i];
|
||||
if (!changed && (append_mask & (1 << i)))
|
||||
continue;
|
||||
nv50->so_targets_dirty |= 1 << i;
|
||||
|
||||
if (can_resume && changed && nv50->so_target[i]) {
|
||||
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
|
||||
serialize = FALSE;
|
||||
}
|
||||
|
||||
if (targets[i] && !(append_mask & (1 << i)))
|
||||
nv50_so_target(targets[i])->clean = TRUE;
|
||||
|
||||
pipe_so_target_reference(&nv50->so_target[i], targets[i]);
|
||||
}
|
||||
for (; i < nv50->num_so_targets; ++i) {
|
||||
if (can_resume && nv50->so_target[i]) {
|
||||
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
|
||||
serialize = FALSE;
|
||||
}
|
||||
pipe_so_target_reference(&nv50->so_target[i], NULL);
|
||||
nv50->so_targets_dirty |= 1 << i;
|
||||
}
|
||||
nv50->num_so_targets = num_targets;
|
||||
|
||||
if (nv50->so_targets_dirty)
|
||||
nv50->dirty |= NV50_NEW_STRMOUT;
|
||||
}
|
||||
|
||||
void
|
||||
nv50_init_state_functions(struct nv50_context *nv50)
|
||||
{
|
||||
|
|
@ -965,6 +1052,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
|
|||
pipe->set_vertex_buffers = nv50_set_vertex_buffers;
|
||||
pipe->set_index_buffer = nv50_set_index_buffer;
|
||||
|
||||
pipe->create_stream_output_target = nv50_so_target_create;
|
||||
pipe->stream_output_target_destroy = nv50_so_target_destroy;
|
||||
pipe->set_stream_output_targets = nv50_set_stream_output_targets;
|
||||
|
||||
pipe->redefine_user_buffer = u_default_redefine_user_buffer;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -360,6 +360,8 @@ static struct state_validate {
|
|||
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
|
||||
{ nv50_validate_textures, NV50_NEW_TEXTURES },
|
||||
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
|
||||
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
|
||||
};
|
||||
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
|
||||
|
|
|
|||
|
|
@ -51,4 +51,17 @@ struct nv50_vertex_stateobj {
|
|||
struct nv50_vertex_element element[0];
|
||||
};
|
||||
|
||||
struct nv50_so_target {
|
||||
struct pipe_stream_output_target pipe;
|
||||
struct pipe_query *pq;
|
||||
unsigned stride;
|
||||
boolean clean;
|
||||
};
|
||||
|
||||
static INLINE struct nv50_so_target *
|
||||
nv50_so_target(struct pipe_stream_output_target *ptarg)
|
||||
{
|
||||
return (struct nv50_so_target *)ptarg;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -405,6 +405,25 @@ nv50_prim_gl(unsigned prim)
|
|||
}
|
||||
}
|
||||
|
||||
/* For pre-nva0 transform feedback. */
|
||||
static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =
|
||||
{
|
||||
[PIPE_PRIM_POINTS] = 1,
|
||||
[PIPE_PRIM_LINES] = 2,
|
||||
[PIPE_PRIM_LINE_LOOP] = 2,
|
||||
[PIPE_PRIM_LINE_STRIP] = 2,
|
||||
[PIPE_PRIM_TRIANGLES] = 3,
|
||||
[PIPE_PRIM_TRIANGLE_STRIP] = 3,
|
||||
[PIPE_PRIM_TRIANGLE_FAN] = 3,
|
||||
[PIPE_PRIM_QUADS] = 3,
|
||||
[PIPE_PRIM_QUAD_STRIP] = 3,
|
||||
[PIPE_PRIM_POLYGON] = 3,
|
||||
[PIPE_PRIM_LINES_ADJACENCY] = 2,
|
||||
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,
|
||||
[PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,
|
||||
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3
|
||||
};
|
||||
|
||||
static void
|
||||
nv50_draw_arrays(struct nv50_context *nv50,
|
||||
unsigned mode, unsigned start, unsigned count,
|
||||
|
|
@ -623,6 +642,51 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nva0_draw_stream_output(struct nv50_context *nv50,
|
||||
const struct pipe_draw_info *info)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output);
|
||||
struct nv04_resource *res = nv04_resource(so->pipe.buffer);
|
||||
unsigned num_instances = info->instance_count;
|
||||
unsigned mode = nv50_prim_gl(info->mode);
|
||||
|
||||
if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {
|
||||
/* A proper implementation without waiting doesn't seem possible,
|
||||
* so don't bother.
|
||||
*/
|
||||
NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
|
||||
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
|
||||
PUSH_SPACE(push, 4);
|
||||
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
assert(num_instances);
|
||||
do {
|
||||
PUSH_SPACE(push, 8);
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, mode);
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
|
||||
nv50_query_pushbuf_submit(push, so->pq, 0x4);
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
|
||||
} while (--num_instances);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
|
||||
{
|
||||
|
|
@ -655,6 +719,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
|
||||
nv50_update_user_vbufs(nv50);
|
||||
|
||||
if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
|
||||
nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
|
||||
|
||||
nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
|
||||
|
||||
push->kick_notify = nv50_draw_vbo_kick_notify;
|
||||
|
|
@ -679,11 +746,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
nv50->base.vbo_dirty = FALSE;
|
||||
}
|
||||
|
||||
if (!info->indexed) {
|
||||
nv50_draw_arrays(nv50,
|
||||
info->mode, info->start, info->count,
|
||||
info->instance_count);
|
||||
} else {
|
||||
if (info->indexed) {
|
||||
boolean shorten = info->max_index <= 65535;
|
||||
|
||||
assert(nv50->idxbuf.buffer);
|
||||
|
|
@ -713,6 +776,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
nv50_draw_elements(nv50, shorten,
|
||||
info->mode, info->start, info->count,
|
||||
info->instance_count, info->index_bias);
|
||||
} else
|
||||
if (unlikely(info->count_from_stream_output)) {
|
||||
nva0_draw_stream_output(nv50, info);
|
||||
} else {
|
||||
nv50_draw_arrays(nv50,
|
||||
info->mode, info->start, info->count,
|
||||
info->instance_count);
|
||||
}
|
||||
push->kick_notify = nv50_default_kick_notify;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
|
|||
|
||||
#define SUBC_3D(m) 3, (m)
|
||||
#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
|
||||
#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
|
||||
|
||||
#define SUBC_2D(m) 4, (m)
|
||||
#define NV50_2D(n) SUBC_2D(NV50_2D_##n)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue