diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c index 1a44091f093..797e1677b60 100644 --- a/src/gallium/drivers/v3d/v3d_query.c +++ b/src/gallium/drivers/v3d/v3d_query.c @@ -149,6 +149,155 @@ v3d_render_condition(struct pipe_context *pipe, v3d->cond_mode = mode; } +static void +extension_set(struct drm_v3d_extension *ext, struct drm_v3d_extension *next, + uint32_t id, uintptr_t flags) +{ + ext->next = (uintptr_t)(void *)next; + ext->id = id; + ext->flags = flags; +} + +static struct drm_v3d_sem * +in_syncs_set(struct v3d_context *v3d, uint32_t *count, + struct v3d_submit_sync_info *sync_info) +{ + uint32_t nsyncs = sync_info->wait_count; + + *count = nsyncs; + + struct drm_v3d_sem *syncs = + rzalloc_array(v3d, struct drm_v3d_sem, *count); + + if (!syncs) return NULL; + + for (int i = 0; i < nsyncs; i++) { + syncs[i].handle = sync_info->waits[i]; + } + + assert(*count == nsyncs); + + return syncs; +} + +static struct drm_v3d_sem * +out_syncs_set(struct v3d_context *v3d, uint32_t *count, + struct v3d_submit_sync_info *sync_info) +{ + (*count) = sync_info->signal_count; + + struct drm_v3d_sem *syncs = + rzalloc_array(v3d, struct drm_v3d_sem, *count); + + if (!syncs) return NULL; + + for (unsigned i = 0; i < *count; i++) { + syncs[i].handle = sync_info->signals[i]; + } + + return syncs; +} + +static void +multisync_set(struct v3d_context *v3d, struct drm_v3d_multi_sync *ms, + struct v3d_submit_sync_info *sync_info, + struct drm_v3d_extension *next, uint32_t wait_stage) +{ + uint32_t ocount = 0, icount = 0; + struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL; + + in_syncs = in_syncs_set(v3d, &icount, sync_info); + if (!in_syncs && icount) goto out; + + out_syncs = out_syncs_set(v3d, &ocount, sync_info); + if (!out_syncs) goto out; + + extension_set(&ms->base, next, DRM_V3D_EXT_ID_MULTI_SYNC, 0); + ms->wait_stage = wait_stage; + ms->out_sync_count = ocount; + ms->out_syncs = (uintptr_t)(void *)out_syncs; + ms->in_sync_count = icount; + ms->in_syncs = (uintptr_t)(void *)in_syncs; + + return; + +out: + fprintf(stderr, "Multisync Set Failed\n"); + if (in_syncs) { + free(in_syncs); + } +} + +static void +multisync_free(struct drm_v3d_multi_sync *ms) +{ + ralloc_free((void *)(uintptr_t)ms->out_syncs); + ralloc_free((void *)(uintptr_t)ms->in_syncs); +} + +uint64_t +v3d_get_timestamp(struct pipe_context *pctx) +{ + /* Calling glGetInteger64v with GL_TIMESTAMP will return the GPU + * timestamp when all previously given commands have issued, but not + * necessarily completed + */ + v3d_flush(pctx); + + /* Use os_time_get_nano as all of our timestamps come from the CPU clock */ + return os_time_get_nano(); +} + +void +v3d_submit_timestamp_query(struct pipe_context *pctx, struct v3d_bo *bo, + uint32_t sync, uint32_t offset) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_screen *screen = v3d->screen; + int ret; + + /* check for multisync support */ + assert(screen->has_multisync); + + /* check for a valid bo to store the timestamp result */ + assert(bo); + + /* check for a valid syncobj */ + assert(sync); + + struct drm_v3d_timestamp_query timestamp = {0}; + + extension_set(×tamp.base, NULL, DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY, 0); + + timestamp.count = 1; + timestamp.offsets = (uintptr_t)(void *)&offset; + timestamp.syncs = (uintptr_t)(void *)&sync; + + struct v3d_submit_sync_info sync_info = { + .wait_count = 1, + .waits = &v3d->out_sync, + .signal_count = 1, + .signals = &v3d->out_sync, + }; + + struct drm_v3d_multi_sync ms = {0}; + + multisync_set(v3d, &ms, &sync_info, (void *)×tamp, V3D_CPU); + + struct drm_v3d_submit_cpu submit = {0}; + + submit.bo_handle_count = 1; + submit.bo_handles = (uintptr_t)(void *)&bo->handle; + submit.flags |= DRM_V3D_SUBMIT_EXTENSION; + submit.extensions = (uintptr_t)(void *)&ms; + + ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_CPU, &submit); + if (ret) + fprintf(stderr, "Failed to submit cpu job: %s\n", strerror(errno)); + + multisync_free(&ms); +} + void v3d_query_init(struct pipe_context *pctx) { @@ -160,4 +309,5 @@ v3d_query_init(struct pipe_context *pctx) pctx->get_query_result = v3d_get_query_result; pctx->set_active_query_state = v3d_set_active_query_state; pctx->render_condition = v3d_render_condition; + pctx->get_timestamp = v3d_get_timestamp; } diff --git a/src/gallium/drivers/v3d/v3d_query.h b/src/gallium/drivers/v3d/v3d_query.h index b2d8124a908..00af47d9659 100644 --- a/src/gallium/drivers/v3d/v3d_query.h +++ b/src/gallium/drivers/v3d/v3d_query.h @@ -41,8 +41,18 @@ struct v3d_query const struct v3d_query_funcs *funcs; }; +struct v3d_submit_sync_info +{ + uint32_t wait_count; + uint32_t *waits; + uint32_t signal_count; + uint32_t *signals; +}; + struct pipe_query *v3d_create_query_pipe(struct v3d_context *v3d, unsigned query_type, unsigned index); struct pipe_query *v3d_create_batch_query_pipe(struct v3d_context *v3d, unsigned num_queries, unsigned *query_types); +uint64_t v3d_get_timestamp(struct pipe_context *pctx); +void v3d_submit_timestamp_query(struct pipe_context *pctx, struct v3d_bo *bo, uint32_t sync, uint32_t offset); #endif /* V3D_QUERY_H */ diff --git a/src/gallium/drivers/v3d/v3d_query_pipe.c b/src/gallium/drivers/v3d/v3d_query_pipe.c index 7618daaba70..aa457cfa3fc 100644 --- a/src/gallium/drivers/v3d/v3d_query_pipe.c +++ b/src/gallium/drivers/v3d/v3d_query_pipe.c @@ -44,6 +44,10 @@ struct v3d_query_pipe uint32_t start, end; uint32_t result; + + /* these fields are used for timestamp queries */ + uint64_t time_result; + uint32_t sync[1]; }; static void @@ -51,6 +55,8 @@ v3d_destroy_query_pipe(struct v3d_context *v3d, struct v3d_query *query) { struct v3d_query_pipe *pquery = (struct v3d_query_pipe *)query; + if (pquery->sync[0]) + drmSyncobjDestroy(v3d->fd, pquery->sync[0]); v3d_bo_unreference(&pquery->bo); free(pquery); } @@ -129,6 +135,21 @@ v3d_end_query_pipe(struct v3d_context *v3d, struct v3d_query *query) v3d->current_oq = NULL; v3d->dirty |= V3D_DIRTY_OQ; break; + case PIPE_QUERY_TIMESTAMP: + /* Mesa only calls EndQuery and not BeginQuery for regular + * timestamp queries + * + * This will store into the query object the time when the GPU + * will have completed all previously issued commands. + */ + assert(pquery->bo); + + /* flush any pending jobs */ + v3d_flush(&v3d->base); + + /* submit timestamp query to cpu queue */ + v3d_submit_timestamp_query(&v3d->base, pquery->bo, pquery->sync[0], 0); + break; default: unreachable("unsupported query type"); } @@ -143,21 +164,35 @@ v3d_get_query_result_pipe(struct v3d_context *v3d, struct v3d_query *query, struct v3d_query_pipe *pquery = (struct v3d_query_pipe *)query; if (pquery->bo) { - v3d_flush_jobs_using_bo(v3d, pquery->bo); + /* For timestamp queries we already flush relevant jobs + * before submitting the query */ + if (pquery->type != PIPE_QUERY_TIMESTAMP) + v3d_flush_jobs_using_bo(v3d, pquery->bo); if (wait) { if (!v3d_bo_wait(pquery->bo, ~0ull, "query")) return false; + assert(pquery->type != PIPE_QUERY_TIMESTAMP || + drmSyncobjWait(v3d->fd, &pquery->sync[0], 1, 0, + 0, NULL) != -ETIME); } else { if (!v3d_bo_wait(pquery->bo, 0, "query")) return false; } - /* XXX: Sum up per-core values. */ - uint32_t *map = v3d_bo_map(pquery->bo); - pquery->result = *map; + if (pquery->type == PIPE_QUERY_TIMESTAMP) { + uint64_t *map = v3d_bo_map(pquery->bo); + pquery->time_result = *map; + } else { + /* XXX: Sum up per-core values. */ + uint32_t *map = v3d_bo_map(pquery->bo); + pquery->result = *map; - v3d_bo_unreference(&pquery->bo); + /* FIXME: we should move creation and destruction of + * the BO for all queries to query create/destruction, + * like we do with timestamps */ + v3d_bo_unreference(&pquery->bo); + } } switch (pquery->type) { @@ -172,6 +207,9 @@ v3d_get_query_result_pipe(struct v3d_context *v3d, struct v3d_query *query, case PIPE_QUERY_PRIMITIVES_EMITTED: vresult->u64 = pquery->end - pquery->start; break; + case PIPE_QUERY_TIMESTAMP: + vresult->u64 = pquery->time_result; + break; default: unreachable("unsupported query type"); } @@ -198,6 +236,21 @@ v3d_create_query_pipe(struct v3d_context *v3d, unsigned query_type, unsigned ind pquery->type = query_type; query->funcs = &pipe_query_funcs; + /* FIXME: we should probably allocate BOs for occlusion queries here + * as well + */ + switch (pquery->type) { + case PIPE_QUERY_TIMESTAMP: + pquery->bo = v3d_bo_alloc(v3d->screen, 4096, "query"); + uint32_t *map = v3d_bo_map(pquery->bo); + *map = 0; + + drmSyncobjCreate(v3d->fd, 0, &pquery->sync[0]); + break; + default: + break; + } + /* Note that struct pipe_query isn't actually defined anywhere. */ return (struct pipe_query *)query; } diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 8b0268b522a..40a82958795 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -159,7 +159,8 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_TEXTURE_QUERY_LOD: return 1; - + case PIPE_CAP_QUERY_TIMESTAMP: + return screen->has_cpu_queue && screen->has_multisync; case PIPE_CAP_TEXTURE_SAMPLER_INDEPENDENT: return 0;