mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
nvc0: handle more query types
This commit is contained in:
parent
3cc1dd5b80
commit
b4ecef4b1b
3 changed files with 204 additions and 49 deletions
|
|
@ -168,6 +168,10 @@ void nvc0_program_library_upload(struct nvc0_context *);
|
|||
|
||||
/* nvc0_query.c */
|
||||
void nvc0_init_query_functions(struct nvc0_context *);
|
||||
void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
|
||||
struct pipe_query *pq, unsigned result_offset);
|
||||
|
||||
#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0)
|
||||
|
||||
/* nvc0_shader_state.c */
|
||||
void nvc0_vertprog_validate(struct nvc0_context *);
|
||||
|
|
|
|||
|
|
@ -25,26 +25,22 @@
|
|||
#include "nvc0_context.h"
|
||||
#include "nouveau/nv_object.xml.h"
|
||||
|
||||
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
|
||||
* (since we use only a single GPU channel per screen) will not work properly.
|
||||
*
|
||||
* The first is not that big of an issue because OpenGL does not allow nested
|
||||
* queries anyway.
|
||||
*/
|
||||
|
||||
struct nvc0_query {
|
||||
uint32_t *data;
|
||||
uint32_t type;
|
||||
uint32_t sequence;
|
||||
struct nouveau_bo *bo;
|
||||
uint32_t base;
|
||||
uint32_t offset; /* base + i * 16 */
|
||||
uint32_t offset; /* base + i * rotate */
|
||||
boolean ready;
|
||||
boolean active;
|
||||
boolean is64bit;
|
||||
uint8_t rotate;
|
||||
int nesting; /* only used for occlusion queries */
|
||||
struct nouveau_mm_allocation *mm;
|
||||
};
|
||||
|
||||
#define NVC0_QUERY_ALLOC_SPACE 128
|
||||
#define NVC0_QUERY_ALLOC_SPACE 256
|
||||
|
||||
static INLINE struct nvc0_query *
|
||||
nvc0_query(struct pipe_query *pipe)
|
||||
|
|
@ -64,7 +60,8 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
|
|||
if (q->ready)
|
||||
nouveau_mm_free(q->mm);
|
||||
else
|
||||
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
|
||||
nouveau_fence_work(screen->base.fence.current,
|
||||
nouveau_mm_free_work, q->mm);
|
||||
}
|
||||
}
|
||||
if (size) {
|
||||
|
|
@ -97,24 +94,53 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
|
|||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nvc0_query *q;
|
||||
unsigned space = NVC0_QUERY_ALLOC_SPACE;
|
||||
|
||||
q = CALLOC_STRUCT(nvc0_query);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
|
||||
switch (type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
q->rotate = 32;
|
||||
space = NVC0_QUERY_ALLOC_SPACE;
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
q->is64bit = TRUE;
|
||||
space = 512;
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
q->is64bit = TRUE;
|
||||
space = 64;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
space = 32;
|
||||
break;
|
||||
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
|
||||
space = 16;
|
||||
break;
|
||||
default:
|
||||
FREE(q);
|
||||
return NULL;
|
||||
}
|
||||
if (!nvc0_query_allocate(nvc0, q, space)) {
|
||||
FREE(q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
|
||||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
|
||||
type == PIPE_QUERY_SO_STATISTICS);
|
||||
q->type = type;
|
||||
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
|
||||
q->offset -= 16;
|
||||
q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
|
||||
if (q->rotate) {
|
||||
/* we advance before query_begin ! */
|
||||
q->offset -= q->rotate;
|
||||
q->data -= q->rotate / sizeof(*q->data);
|
||||
}
|
||||
|
||||
return (struct pipe_query *)q;
|
||||
|
|
@ -134,6 +160,15 @@ nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
|
|||
OUT_RING (chan, get);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
q->offset += q->rotate;
|
||||
q->data += q->rotate / sizeof(*q->data);
|
||||
if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
|
||||
nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
{
|
||||
|
|
@ -141,50 +176,68 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
struct nouveau_channel *chan = nvc0->screen->base.channel;
|
||||
struct nvc0_query *q = nvc0_query(pq);
|
||||
|
||||
const int index = 0; /* vertex stream */
|
||||
|
||||
/* For occlusion queries we have to change the storage, because a previous
|
||||
* query might set the initial render conition to FALSE even *after* we re-
|
||||
* initialized it to TRUE.
|
||||
*/
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
|
||||
q->offset += 16;
|
||||
q->data += 16 / sizeof(*q->data);
|
||||
if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
|
||||
nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
|
||||
if (q->rotate) {
|
||||
nvc0_query_rotate(nvc0, q);
|
||||
|
||||
/* XXX: can we do this with the GPU, and sync with respect to a previous
|
||||
* query ?
|
||||
*/
|
||||
q->data[1] = 1; /* initial render condition = TRUE */
|
||||
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
|
||||
q->data[5] = 0;
|
||||
}
|
||||
if (!q->is64bit)
|
||||
q->data[0] = q->sequence++; /* the previously used one */
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
|
||||
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
q->nesting = nvc0->screen->num_occlusion_queries_active++;
|
||||
if (q->nesting) {
|
||||
nvc0_query_get(chan, q, 0x10, 0x0100f002);
|
||||
} else {
|
||||
BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
|
||||
OUT_RING (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT);
|
||||
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
|
||||
IMMED_RING(chan, RING_3D(COUNTER_RESET),
|
||||
NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
IMMED_RING(chan, RING_3D(COUNTER_RESET),
|
||||
NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
|
||||
nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
|
||||
OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
|
||||
OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
|
||||
nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
|
||||
nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nvc0_query_get(chan, q, 0x10, 0x00005002);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
|
||||
nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
q->ready = FALSE;
|
||||
q->active = TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -196,22 +249,38 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
|
||||
const int index = 0; /* for multiple vertex streams */
|
||||
|
||||
if (!q->active) {
|
||||
/* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
|
||||
if (q->rotate)
|
||||
nvc0_query_rotate(nvc0, q);
|
||||
else
|
||||
if (!q->is64bit)
|
||||
q->data[0] = q->sequence++;
|
||||
}
|
||||
q->ready = FALSE;
|
||||
q->active = FALSE;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
nvc0_query_get(chan, q, 0, 0x0100f002);
|
||||
BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
|
||||
OUT_RING (chan, 0);
|
||||
if (--nvc0->screen->num_occlusion_queries_active == 0)
|
||||
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
|
||||
nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
|
||||
nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
|
||||
nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nvc0_query_get(chan, q, 0, 0x00005002);
|
||||
|
|
@ -219,6 +288,24 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
case PIPE_QUERY_GPU_FINISHED:
|
||||
nvc0_query_get(chan, q, 0, 0x1000f010);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
|
||||
nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
|
||||
nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */
|
||||
nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */
|
||||
nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */
|
||||
nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
|
||||
nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
|
||||
break;
|
||||
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
|
||||
nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
|
||||
nvc0_query_get(chan, q, 0x04, 0x1d005022);
|
||||
nvc0_query_get(chan, q, 0x08, 0x1d005042);
|
||||
nvc0_query_get(chan, q, 0x0c, 0x1d005062);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
|
|
@ -250,11 +337,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
uint32_t *res32 = result;
|
||||
boolean *res8 = result;
|
||||
uint64_t *data64 = (uint64_t *)q->data;
|
||||
|
||||
if (q->type == PIPE_QUERY_GPU_FINISHED) {
|
||||
res8[0] = nvc0_query_ready(q);
|
||||
return TRUE;
|
||||
}
|
||||
unsigned i;
|
||||
|
||||
if (!q->ready) /* update ? */
|
||||
q->ready = nvc0_query_ready(q);
|
||||
|
|
@ -271,24 +354,50 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
q->ready = TRUE;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
res32[0] = 0;
|
||||
res8[0] = TRUE;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
|
||||
res32[0] = q->data[1];
|
||||
res64[0] = q->data[1] - q->data[5];
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
res32[0] = 0;
|
||||
res8[0] = q->data[1] != q->data[5];
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
|
||||
res64[0] = data64[0];
|
||||
res64[0] = data64[0] - data64[2];
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
res64[0] = data64[0];
|
||||
res64[1] = data64[1];
|
||||
res64[0] = data64[0] - data64[4];
|
||||
res64[1] = data64[2] - data64[6];
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
res32[0] = 0;
|
||||
res8[0] = !q->data[1];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
res64[0] = data64[1];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
|
||||
res64[0] = 1000000000;
|
||||
res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
|
||||
res32[2] = 0;
|
||||
res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
res64[0] = data64[1] - data64[3];
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
for (i = 0; i < 10; ++i)
|
||||
res64[i] = data64[i * 2] - data64[24 + i * 2];
|
||||
break;
|
||||
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
|
||||
res32[0] = q->data[0];
|
||||
res32[1] = q->data[1];
|
||||
res32[2] = q->data[2];
|
||||
res32[3] = q->data[3];
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
|
@ -303,6 +412,11 @@ nvc0_render_condition(struct pipe_context *pipe,
|
|||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_channel *chan = nvc0->screen->base.channel;
|
||||
struct nvc0_query *q;
|
||||
uint32_t cond;
|
||||
boolean negated = FALSE;
|
||||
boolean wait =
|
||||
mode != PIPE_RENDER_COND_NO_WAIT &&
|
||||
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
|
||||
|
||||
if (!pq) {
|
||||
IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
|
||||
|
|
@ -310,8 +424,33 @@ nvc0_render_condition(struct pipe_context *pipe,
|
|||
}
|
||||
q = nvc0_query(pq);
|
||||
|
||||
if (mode == PIPE_RENDER_COND_WAIT ||
|
||||
mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
|
||||
/* NOTE: comparison of 2 queries only works if both have completed */
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
/* query writes 1 if there was no overflow */
|
||||
cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
|
||||
NVC0_3D_COND_MODE_EQUAL;
|
||||
wait = TRUE;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
if (likely(!negated)) {
|
||||
if (unlikely(q->nesting))
|
||||
cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
|
||||
NVC0_3D_COND_MODE_ALWAYS;
|
||||
else
|
||||
cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
|
||||
} else {
|
||||
cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(!"render condition query not a predicate");
|
||||
mode = NVC0_3D_COND_MODE_ALWAYS;
|
||||
break;
|
||||
}
|
||||
|
||||
if (wait) {
|
||||
MARK_RING (chan, 5, 2);
|
||||
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
|
||||
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
|
|
@ -324,7 +463,17 @@ nvc0_render_condition(struct pipe_context *pipe,
|
|||
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
|
||||
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
|
||||
OUT_RING (chan, cond);
|
||||
}
|
||||
|
||||
void
|
||||
nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
|
||||
struct pipe_query *pq, unsigned result_offset)
|
||||
{
|
||||
struct nvc0_query *q = nvc0_query(pq);
|
||||
|
||||
nouveau_pushbuf_submit(nvc0->screen->base.channel,
|
||||
q->bo, q->offset + result_offset, 4);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ struct nvc0_screen {
|
|||
|
||||
struct nvc0_context *cur_ctx;
|
||||
|
||||
int num_occlusion_queries_active;
|
||||
|
||||
struct nouveau_bo *text;
|
||||
struct nouveau_bo *uniforms;
|
||||
struct nouveau_bo *tls;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue