mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
swr: Remove stall waiting for core query counters.
When gathering query results, swr_gather_stats was unnecessarily stalling the entire pipeline. Results are now collected asynchronously, with a fence marking completion. Reviewed-By: George Kyriazis <george.kyriazis@intel.com>
This commit is contained in:
parent
76a36ac3ea
commit
9d86a5eea7
4 changed files with 83 additions and 126 deletions
|
|
@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen,
|
|||
swr_fence_destroy(old);
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
swr_is_fence_done(struct pipe_fence_handle *fence_handle)
|
||||
{
|
||||
struct swr_fence *fence = swr_fence(fence_handle);
|
||||
return (fence->read == fence->write);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the fence to finish.
|
||||
|
|
|
|||
|
|
@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence)
|
|||
return (struct swr_fence *)fence;
|
||||
}
|
||||
|
||||
|
||||
static INLINE boolean
|
||||
swr_is_fence_done(struct pipe_fence_handle *fence_handle)
|
||||
{
|
||||
struct swr_fence *fence = swr_fence(fence_handle);
|
||||
return (fence->read == fence->write);
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
struct swr_query *pq = swr_query(q);
|
||||
|
||||
if (pq->fence) {
|
||||
if (!swr_is_fence_pending(pq->fence)) {
|
||||
swr_fence_submit(swr_context(pipe), pq->fence);
|
||||
if (swr_is_fence_pending(pq->fence))
|
||||
swr_fence_finish(pipe->screen, pq->fence, 0);
|
||||
}
|
||||
swr_fence_reference(pipe->screen, &pq->fence, NULL);
|
||||
}
|
||||
|
||||
|
|
@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
}
|
||||
|
||||
|
||||
// XXX Create a fence callback, rather than stalling SwrWaitForIdle
|
||||
static void
|
||||
swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
|
||||
{
|
||||
struct swr_context *ctx = swr_context(pipe);
|
||||
|
||||
assert(pq->result);
|
||||
union pipe_query_result *result = pq->result;
|
||||
struct swr_query_result *result = pq->result;
|
||||
boolean enable_stats = pq->enable_stats;
|
||||
SWR_STATS swr_stats = {0};
|
||||
|
||||
if (pq->fence) {
|
||||
if (!swr_is_fence_pending(pq->fence)) {
|
||||
swr_fence_submit(ctx, pq->fence);
|
||||
swr_fence_finish(pipe->screen, pq->fence, 0);
|
||||
}
|
||||
swr_fence_reference(pipe->screen, &pq->fence, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* These queries don't need SWR Stats enabled in the core
|
||||
* Set and return.
|
||||
*/
|
||||
/* A few results don't require the core, so don't involve it */
|
||||
switch (pq->type) {
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
result->u64 = swr_get_timestamp(pipe->screen);
|
||||
return;
|
||||
result->timestamp = swr_get_timestamp(pipe->screen);
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
/* nothing to do here */
|
||||
return;
|
||||
break;
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
|
||||
vs LastRetiredId? */
|
||||
return;
|
||||
/* nothing to do here */
|
||||
break;
|
||||
default:
|
||||
/* Any query that needs SwrCore stats */
|
||||
/*
|
||||
* All other results are collected from SwrCore counters via
|
||||
* SwrGetStats. This returns immediately, but results are later filled
|
||||
* in by the backend. Fence status is the only indication of
|
||||
* completion. */
|
||||
SwrGetStats(ctx->swrContext, &result->core);
|
||||
|
||||
if (!pq->fence) {
|
||||
struct swr_screen *screen = swr_screen(pipe->screen);
|
||||
swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
|
||||
}
|
||||
swr_fence_submit(ctx, pq->fence);
|
||||
|
||||
/* Only change stat collection if there are no active queries */
|
||||
if (ctx->active_queries == 0)
|
||||
SwrEnableStats(ctx->swrContext, enable_stats);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* All other results are collected from SwrCore counters
|
||||
*/
|
||||
|
||||
/* XXX, Should turn this into a fence callback and skip the stall */
|
||||
SwrGetStats(ctx->swrContext, &swr_stats);
|
||||
/* SwrGetStats returns immediately, wait for collection */
|
||||
SwrWaitForIdle(ctx->swrContext);
|
||||
|
||||
switch (pq->type) {
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
result->u64 = swr_stats.DepthPassCount;
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
result->u64 = swr_stats.IaPrimitives;
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
|
||||
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
|
||||
so_stats->num_primitives_written =
|
||||
swr_stats.SoNumPrimsWritten[pq->index];
|
||||
so_stats->primitives_storage_needed =
|
||||
swr_stats.SoPrimStorageNeeded[pq->index];
|
||||
} break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS: {
|
||||
struct pipe_query_data_pipeline_statistics *p_stats =
|
||||
&result->pipeline_statistics;
|
||||
p_stats->ia_vertices = swr_stats.IaVertices;
|
||||
p_stats->ia_primitives = swr_stats.IaPrimitives;
|
||||
p_stats->vs_invocations = swr_stats.VsInvocations;
|
||||
p_stats->gs_invocations = swr_stats.GsInvocations;
|
||||
p_stats->gs_primitives = swr_stats.GsPrimitives;
|
||||
p_stats->c_invocations = swr_stats.CPrimitives;
|
||||
p_stats->c_primitives = swr_stats.CPrimitives;
|
||||
p_stats->ps_invocations = swr_stats.PsInvocations;
|
||||
p_stats->hs_invocations = swr_stats.HsInvocations;
|
||||
p_stats->ds_invocations = swr_stats.DsInvocations;
|
||||
p_stats->cs_invocations = swr_stats.CsInvocations;
|
||||
} break;
|
||||
default:
|
||||
assert(0 && "Unsupported query");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Only change stat collection if there are no active queries */
|
||||
if (ctx->active_queries == 0)
|
||||
SwrEnableStats(ctx->swrContext, enable_stats);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
|
|||
boolean wait,
|
||||
union pipe_query_result *result)
|
||||
{
|
||||
struct swr_context *ctx = swr_context(pipe);
|
||||
struct swr_query *pq = swr_query(q);
|
||||
struct swr_query_result *start = &pq->start;
|
||||
struct swr_query_result *end = &pq->end;
|
||||
unsigned index = pq->index;
|
||||
|
||||
if (pq->fence) {
|
||||
if (!swr_is_fence_pending(pq->fence)) {
|
||||
swr_fence_submit(ctx, pq->fence);
|
||||
if (!wait)
|
||||
return FALSE;
|
||||
swr_fence_finish(pipe->screen, pq->fence, 0);
|
||||
}
|
||||
if (!wait && !swr_is_fence_done(pq->fence))
|
||||
return FALSE;
|
||||
|
||||
swr_fence_finish(pipe->screen, pq->fence, 0);
|
||||
swr_fence_reference(pipe->screen, &pq->fence, NULL);
|
||||
}
|
||||
|
||||
|
|
@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
|
|||
switch (pq->type) {
|
||||
/* Booleans */
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
|
||||
result->b = end->core.DepthPassCount != start->core.DepthPassCount;
|
||||
break;
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
result->b = pq->end.b;
|
||||
result->b = TRUE;
|
||||
break;
|
||||
/* Counters */
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
result->u64 = end->timestamp - start->timestamp;
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
result->u64 = pq->end.u64 - pq->start.u64;
|
||||
result->u64 = end->core.SoNumPrimsWritten[index]
|
||||
- start->core.SoNumPrimsWritten[index];
|
||||
break;
|
||||
/* Structures */
|
||||
case PIPE_QUERY_SO_STATISTICS: {
|
||||
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
|
||||
struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
|
||||
struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
|
||||
struct SWR_STATS *start = &pq->start.core;
|
||||
struct SWR_STATS *end = &pq->end.core;
|
||||
so_stats->num_primitives_written =
|
||||
end->num_primitives_written - start->num_primitives_written;
|
||||
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
|
||||
so_stats->primitives_storage_needed =
|
||||
end->primitives_storage_needed - start->primitives_storage_needed;
|
||||
end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
|
||||
} break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
/* os_get_time_nano returns nanoseconds */
|
||||
result->timestamp_disjoint.frequency = UINT64_C(1000000000);
|
||||
result->timestamp_disjoint.disjoint = FALSE;
|
||||
} break;
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS: {
|
||||
struct pipe_query_data_pipeline_statistics *p_stats =
|
||||
&result->pipeline_statistics;
|
||||
struct pipe_query_data_pipeline_statistics *start =
|
||||
&pq->start.pipeline_statistics;
|
||||
struct pipe_query_data_pipeline_statistics *end =
|
||||
&pq->end.pipeline_statistics;
|
||||
p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
|
||||
p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
|
||||
p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
|
||||
p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
|
||||
p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
|
||||
p_stats->c_invocations = end->c_invocations - start->c_invocations;
|
||||
p_stats->c_primitives = end->c_primitives - start->c_primitives;
|
||||
p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
|
||||
p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
|
||||
p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
|
||||
p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
|
||||
} break;
|
||||
struct SWR_STATS *start = &pq->start.core;
|
||||
struct SWR_STATS *end = &pq->end.core;
|
||||
p_stats->ia_vertices = end->IaVertices - start->IaVertices;
|
||||
p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
|
||||
p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
|
||||
p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
|
||||
p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
|
||||
p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
|
||||
p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
|
||||
p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
|
||||
p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
|
||||
p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
|
||||
p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
|
||||
} break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
|
||||
struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
|
||||
struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
|
||||
struct SWR_STATS *start = &pq->start.core;
|
||||
struct SWR_STATS *end = &pq->end.core;
|
||||
uint64_t num_primitives_written =
|
||||
end->num_primitives_written - start->num_primitives_written;
|
||||
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
|
||||
uint64_t primitives_storage_needed =
|
||||
end->primitives_storage_needed - start->primitives_storage_needed;
|
||||
end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
|
||||
result->b = num_primitives_written > primitives_storage_needed;
|
||||
} break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unsupported query");
|
||||
break;
|
||||
|
|
@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
struct swr_context *ctx = swr_context(pipe);
|
||||
struct swr_query *pq = swr_query(q);
|
||||
|
||||
assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
|
||||
|
||||
/* Initialize Results */
|
||||
memset(&pq->start, 0, sizeof(pq->start));
|
||||
memset(&pq->end, 0, sizeof(pq->end));
|
||||
|
|
@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
|
||||
/* override start timestamp to 0 for TIMESTAMP query */
|
||||
if (pq->type == PIPE_QUERY_TIMESTAMP)
|
||||
pq->start.u64 = 0;
|
||||
pq->start.timestamp = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,13 +27,18 @@
|
|||
|
||||
#include <limits.h>
|
||||
|
||||
struct swr_query_result {
|
||||
SWR_STATS core;
|
||||
uint64_t timestamp;
|
||||
};
|
||||
|
||||
struct swr_query {
|
||||
unsigned type; /* PIPE_QUERY_* */
|
||||
unsigned index;
|
||||
|
||||
union pipe_query_result *result;
|
||||
union pipe_query_result start;
|
||||
union pipe_query_result end;
|
||||
struct swr_query_result *result;
|
||||
struct swr_query_result start;
|
||||
struct swr_query_result end;
|
||||
|
||||
struct pipe_fence_handle *fence;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue