nvc0: handle more query types

This commit is contained in:
Christoph Bumiller 2011-10-21 22:58:18 +02:00
parent 3cc1dd5b80
commit b4ecef4b1b
3 changed files with 204 additions and 49 deletions

View file

@ -168,6 +168,10 @@ void nvc0_program_library_upload(struct nvc0_context *);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
struct pipe_query *pq, unsigned result_offset);
#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0)
/* nvc0_shader_state.c */
void nvc0_vertprog_validate(struct nvc0_context *);

View file

@ -25,26 +25,22 @@
#include "nvc0_context.h"
#include "nouveau/nv_object.xml.h"
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
* (since we use only a single GPU channel per screen) will not work properly.
*
* The first is not that big of an issue because OpenGL does not allow nested
* queries anyway.
*/
struct nvc0_query {
uint32_t *data;
uint32_t type;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
uint32_t offset; /* base + i * 16 */
uint32_t offset; /* base + i * rotate */
boolean ready;
boolean active;
boolean is64bit;
uint8_t rotate;
int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
};
#define NVC0_QUERY_ALLOC_SPACE 128
#define NVC0_QUERY_ALLOC_SPACE 256
static INLINE struct nvc0_query *
nvc0_query(struct pipe_query *pipe)
@ -64,7 +60,8 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
if (q->ready)
nouveau_mm_free(q->mm);
else
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
nouveau_fence_work(screen->base.fence.current,
nouveau_mm_free_work, q->mm);
}
}
if (size) {
@ -97,24 +94,53 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nvc0_query *q;
unsigned space = NVC0_QUERY_ALLOC_SPACE;
q = CALLOC_STRUCT(nvc0_query);
if (!q)
return NULL;
if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
switch (type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
q->rotate = 32;
space = NVC0_QUERY_ALLOC_SPACE;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
q->is64bit = TRUE;
space = 512;
break;
case PIPE_QUERY_SO_STATISTICS:
q->is64bit = TRUE;
space = 64;
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
space = 32;
break;
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
space = 16;
break;
default:
FREE(q);
return NULL;
}
if (!nvc0_query_allocate(nvc0, q, space)) {
FREE(q);
return NULL;
}
q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
type == PIPE_QUERY_SO_STATISTICS);
q->type = type;
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
q->offset -= 16;
q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
if (q->rotate) {
/* we advance before query_begin ! */
q->offset -= q->rotate;
q->data -= q->rotate / sizeof(*q->data);
}
return (struct pipe_query *)q;
@ -134,6 +160,15 @@ nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
OUT_RING (chan, get);
}
static void
nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
{
q->offset += q->rotate;
q->data += q->rotate / sizeof(*q->data);
if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
}
static void
nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
@ -141,50 +176,68 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q = nvc0_query(pq);
const int index = 0; /* vertex stream */
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render conition to FALSE even *after* we re-
* initialized it to TRUE.
*/
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
q->offset += 16;
q->data += 16 / sizeof(*q->data);
if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
if (q->rotate) {
nvc0_query_rotate(nvc0, q);
/* XXX: can we do this with the GPU, and sync with respect to a previous
* query ?
*/
q->data[1] = 1; /* initial render condition = TRUE */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
if (!q->is64bit)
q->data[0] = q->sequence++; /* the previously used one */
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
case PIPE_QUERY_OCCLUSION_PREDICATE:
q->nesting = nvc0->screen->num_occlusion_queries_active++;
if (q->nesting) {
nvc0_query_get(chan, q, 0x10, 0x0100f002);
} else {
BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
OUT_RING (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT);
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
IMMED_RING(chan, RING_3D(COUNTER_RESET),
NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
case PIPE_QUERY_PRIMITIVES_GENERATED:
nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
IMMED_RING(chan, RING_3D(COUNTER_RESET),
NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
nvc0_query_get(chan, q, 0x10, 0x00005002);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
break;
default:
break;
}
q->ready = FALSE;
q->active = TRUE;
}
static void
@ -196,22 +249,38 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
const int index = 0; /* for multiple vertex streams */
if (!q->active) {
/* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
if (q->rotate)
nvc0_query_rotate(nvc0, q);
else
if (!q->is64bit)
q->data[0] = q->sequence++;
}
q->ready = FALSE;
q->active = FALSE;
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
nvc0_query_get(chan, q, 0, 0x0100f002);
BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
OUT_RING (chan, 0);
if (--nvc0->screen->num_occlusion_queries_active == 0)
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
nvc0_query_get(chan, q, 0, 0x00005002);
@ -219,6 +288,24 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
case PIPE_QUERY_GPU_FINISHED:
nvc0_query_get(chan, q, 0, 0x1000f010);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */
nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */
nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */
nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
break;
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
nvc0_query_get(chan, q, 0x04, 0x1d005022);
nvc0_query_get(chan, q, 0x08, 0x1d005042);
nvc0_query_get(chan, q, 0x0c, 0x1d005062);
break;
default:
assert(0);
break;
@ -250,11 +337,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
uint32_t *res32 = result;
boolean *res8 = result;
uint64_t *data64 = (uint64_t *)q->data;
if (q->type == PIPE_QUERY_GPU_FINISHED) {
res8[0] = nvc0_query_ready(q);
return TRUE;
}
unsigned i;
if (!q->ready) /* update ? */
q->ready = nvc0_query_ready(q);
@ -271,24 +354,50 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
q->ready = TRUE;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
res32[0] = 0;
res8[0] = TRUE;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res32[0] = q->data[1];
res64[0] = q->data[1] - q->data[5];
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
res32[0] = 0;
res8[0] = q->data[1] != q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
res64[0] = data64[0];
res64[0] = data64[0] - data64[2];
break;
case PIPE_QUERY_SO_STATISTICS:
res64[0] = data64[0];
res64[1] = data64[1];
res64[0] = data64[0] - data64[4];
res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
res32[0] = 0;
res8[0] = !q->data[1];
break;
case PIPE_QUERY_TIMESTAMP:
res64[0] = data64[1];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
res64[0] = 1000000000;
res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
res32[2] = 0;
res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
for (i = 0; i < 10; ++i)
res64[i] = data64[i * 2] - data64[24 + i * 2];
break;
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
res32[0] = q->data[0];
res32[1] = q->data[1];
res32[2] = q->data[2];
res32[3] = q->data[3];
break;
default:
return FALSE;
}
@ -303,6 +412,11 @@ nvc0_render_condition(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q;
uint32_t cond;
boolean negated = FALSE;
boolean wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
if (!pq) {
IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
@ -310,8 +424,33 @@ nvc0_render_condition(struct pipe_context *pipe,
}
q = nvc0_query(pq);
if (mode == PIPE_RENDER_COND_WAIT ||
mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
/* NOTE: comparison of 2 queries only works if both have completed */
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* query writes 1 if there was no overflow */
cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
NVC0_3D_COND_MODE_EQUAL;
wait = TRUE;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
if (likely(!negated)) {
if (unlikely(q->nesting))
cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
NVC0_3D_COND_MODE_ALWAYS;
else
cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
} else {
cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
}
break;
default:
assert(!"render condition query not a predicate");
mode = NVC0_3D_COND_MODE_ALWAYS;
break;
}
if (wait) {
MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@ -324,7 +463,17 @@ nvc0_render_condition(struct pipe_context *pipe,
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
OUT_RING (chan, cond);
}
void
nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
struct pipe_query *pq, unsigned result_offset)
{
struct nvc0_query *q = nvc0_query(pq);
nouveau_pushbuf_submit(nvc0->screen->base.channel,
q->bo, q->offset + result_offset, 4);
}
void

View file

@ -27,6 +27,8 @@ struct nvc0_screen {
struct nvc0_context *cur_ctx;
int num_occlusion_queries_active;
struct nouveau_bo *text;
struct nouveau_bo *uniforms;
struct nouveau_bo *tls;