mesa/src/gallium/drivers/llvmpipe/lp_query.c
Qiang Yu a4b6e8b1aa mesa: implement mesh shader queries
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36751>
2025-08-22 10:01:57 +00:00

592 lines
20 KiB
C

/**************************************************************************
*
* Copyright 2007 VMware, Inc.
* Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/* Authors:
* Keith Whitwell, Qicheng Christopher Li, Brian Paul
*/
#include "draw/draw_context.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/os_time.h"
#include "lp_context.h"
#include "lp_flush.h"
#include "lp_fence.h"
#include "lp_query.h"
#include "lp_screen.h"
#include "lp_state.h"
#include "lp_rast.h"
static struct llvmpipe_query *
llvmpipe_query(struct pipe_query *p)
{
return (struct llvmpipe_query *) p;
}
static struct pipe_query *
llvmpipe_create_query(struct pipe_context *pipe,
unsigned type,
unsigned index)
{
assert(type < PIPE_QUERY_TYPES);
struct llvmpipe_query *pq = CALLOC_STRUCT(llvmpipe_query);
if (pq) {
pq->type = type;
pq->index = index;
}
return (struct pipe_query *) pq;
}
static void
llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct llvmpipe_query *pq = llvmpipe_query(q);
/* Ideally we would refcount queries & not get destroyed until the
* last scene had finished with us.
*/
if (pq->fence) {
if (!lp_fence_issued(pq->fence))
llvmpipe_flush(pipe, NULL, __func__);
if (!lp_fence_signalled(pq->fence))
lp_fence_wait(pq->fence);
lp_fence_reference(&pq->fence, NULL);
}
FREE(pq);
}
static bool
llvmpipe_get_query_result(struct pipe_context *pipe,
struct pipe_query *q,
bool wait,
union pipe_query_result *result)
{
const struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
const unsigned num_threads = MAX2(1, screen->num_threads);
struct llvmpipe_query *pq = llvmpipe_query(q);
if (pq->fence) {
/* only have a fence if there was a scene */
if (!lp_fence_signalled(pq->fence)) {
if (!lp_fence_issued(pq->fence))
llvmpipe_flush(pipe, NULL, __func__);
if (!wait)
return false;
lp_fence_wait(pq->fence);
}
}
/* Always initialize the first 64-bit result word to zero since some
* callers don't consider whether the result is actually a 1-byte or 4-byte
* quantity.
*/
result->u64 = 0;
/* Combine the per-thread results */
switch (pq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
{
uint64_t sum = 0;
for (unsigned i = 0; i < num_threads; i++) {
sum += pq->end[i];
}
result->u64 = sum;
}
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
result->b = false;
for (unsigned i = 0; i < num_threads; i++) {
/* safer (still not guaranteed) when there's an overflow */
if (pq->end[i] > 0) {
result->b = true;
break;
}
}
break;
case PIPE_QUERY_TIMESTAMP:
{
uint64_t max_time = 0;
for (unsigned i = 0; i < num_threads; i++) {
max_time = MAX2(max_time, pq->end[i]);
}
result->u64 = max_time;
}
break;
case PIPE_QUERY_TIME_ELAPSED:
{
uint64_t start = UINT64_MAX, end = 0;
for (unsigned i = 0; i < num_threads; i++) {
if (pq->start[i]) {
start = MIN2(start, pq->start[i]);
}
if (pq->end[i]) {
end = MAX2(end, pq->end[i]);
}
}
result->u64 = end - start;
}
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* os_get_time_nano return nanoseconds */
result->timestamp_disjoint.frequency = UINT64_C(1000000000);
result->timestamp_disjoint.disjoint = false;
break;
case PIPE_QUERY_GPU_FINISHED:
result->b = true;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
result->u64 = pq->num_primitives_generated[0];
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
result->u64 = pq->num_primitives_written[0];
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
result->b = false;
for (unsigned s = 0; s < PIPE_MAX_VERTEX_STREAMS; s++) {
if (pq->num_primitives_generated[s] > pq->num_primitives_written[s]) {
result->b = true;
break;
}
}
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
result->b = pq->num_primitives_generated[0] > pq->num_primitives_written[0];
break;
case PIPE_QUERY_SO_STATISTICS:
result->so_statistics.num_primitives_written = pq->num_primitives_written[0];
result->so_statistics.primitives_storage_needed = pq->num_primitives_generated[0];
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
{
/* only ps_invocations are per-bin/thread */
uint64_t sum = 0;
for (unsigned i = 0; i < num_threads; i++) {
sum += pq->end[i];
}
pq->stats.ps_invocations = sum;
result->pipeline_statistics = pq->stats;
}
break;
default:
assert(0);
break;
}
return true;
}
static void
llvmpipe_get_query_result_resource(struct pipe_context *pipe,
struct pipe_query *q,
enum pipe_query_flags flags,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset)
{
const struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
const unsigned num_threads = MAX2(1, screen->num_threads);
const struct llvmpipe_query *pq = llvmpipe_query(q);
const struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
uint64_t ready;
if (pq->fence) {
/* only have a fence if there was a scene */
if (!lp_fence_signalled(pq->fence)) {
if (!lp_fence_issued(pq->fence))
llvmpipe_flush(pipe, NULL, __func__);
if (flags & PIPE_QUERY_WAIT)
lp_fence_wait(pq->fence);
}
ready = lp_fence_signalled(pq->fence);
} else {
ready = 1;
}
uint64_t value = 0, value2 = 0;
unsigned num_values = 1;
if (index == -1) {
value = ready;
} else {
/* don't write a value if fence hasn't signalled and partial isn't set */
if (!ready && !(flags & PIPE_QUERY_PARTIAL))
return;
switch (pq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
for (unsigned i = 0; i < num_threads; i++) {
value += pq->end[i];
}
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
for (unsigned i = 0; i < num_threads; i++) {
/* safer (still not guaranteed) when there's an overflow */
value = value || pq->end[i];
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
value = pq->num_primitives_generated[0];
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
value = pq->num_primitives_written[0];
break;
case PIPE_QUERY_TIMESTAMP:
for (unsigned i = 0; i < num_threads; i++) {
if (pq->end[i] > value) {
value = pq->end[i];
}
}
break;
case PIPE_QUERY_TIME_ELAPSED: {
uint64_t start = (uint64_t)-1, end = 0;
for (unsigned i = 0; i < num_threads; i++) {
if (pq->start[i] && pq->start[i] < start)
start = pq->start[i];
if (pq->end[i] && pq->end[i] > end)
end = pq->end[i];
}
value = end - start;
break;
}
case PIPE_QUERY_SO_STATISTICS:
value = pq->num_primitives_written[0];
value2 = pq->num_primitives_generated[0];
num_values = 2;
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
value = 0;
for (unsigned s = 0; s < PIPE_MAX_VERTEX_STREAMS; s++)
value |= (pq->num_primitives_generated[s] > pq->num_primitives_written[s]);
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
value = (pq->num_primitives_generated[0] > pq->num_primitives_written[0]);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
switch ((enum pipe_statistics_query_index)index) {
case PIPE_STAT_QUERY_IA_VERTICES:
value = pq->stats.ia_vertices;
break;
case PIPE_STAT_QUERY_IA_PRIMITIVES:
value = pq->stats.ia_primitives;
break;
case PIPE_STAT_QUERY_VS_INVOCATIONS:
value = pq->stats.vs_invocations;
break;
case PIPE_STAT_QUERY_GS_INVOCATIONS:
value = pq->stats.gs_invocations;
break;
case PIPE_STAT_QUERY_GS_PRIMITIVES:
value = pq->stats.gs_primitives;
break;
case PIPE_STAT_QUERY_C_INVOCATIONS:
value = pq->stats.c_invocations;
break;
case PIPE_STAT_QUERY_C_PRIMITIVES:
value = pq->stats.c_primitives;
break;
case PIPE_STAT_QUERY_PS_INVOCATIONS:
value = 0;
for (unsigned i = 0; i < num_threads; i++) {
value += pq->end[i];
}
break;
case PIPE_STAT_QUERY_HS_INVOCATIONS:
value = pq->stats.hs_invocations;
break;
case PIPE_STAT_QUERY_DS_INVOCATIONS:
value = pq->stats.ds_invocations;
break;
case PIPE_STAT_QUERY_CS_INVOCATIONS:
value = pq->stats.cs_invocations;
break;
case PIPE_STAT_QUERY_TS_INVOCATIONS:
value = pq->stats.ts_invocations;
break;
case PIPE_STAT_QUERY_MS_INVOCATIONS:
value = pq->stats.ms_invocations;
break;
case PIPE_STAT_QUERY_MS_PRIMITIVES:
value = pq->stats.ms_primitives;
break;
default:
UNREACHABLE("Invalid pipe_statistics_query_index");
}
break;
default:
fprintf(stderr, "Unknown query type %d\n", pq->type);
break;
}
}
uint8_t *dst = (uint8_t *) lpr->data + offset;
/* Write 1 or 2 result values */
for (unsigned i = 0; i < num_values; i++) {
if (i == 1) {
value = value2;
// advance dst pointer by 4 or 8 bytes
dst += (result_type == PIPE_QUERY_TYPE_I64 ||
result_type == PIPE_QUERY_TYPE_U64) ? 8 : 4;
}
switch (result_type) {
case PIPE_QUERY_TYPE_I32: {
int32_t *iptr = (int32_t *)dst;
*iptr = (int32_t) (value & INT32_MAX);
break;
}
case PIPE_QUERY_TYPE_U32: {
uint32_t *uptr = (uint32_t *)dst;
*uptr = (uint32_t) (value & UINT32_MAX);
break;
}
case PIPE_QUERY_TYPE_I64: {
int64_t *iptr = (int64_t *)dst;
*iptr = (int64_t)value;
break;
}
case PIPE_QUERY_TYPE_U64: {
uint64_t *uptr = (uint64_t *)dst;
*uptr = (uint64_t)value;
break;
}
}
}
}
static bool
llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct llvmpipe_query *pq = llvmpipe_query(q);
/* Check if the query is already in the scene. If so, we need to
* flush the scene now. Real apps shouldn't re-use a query in a
* frame of rendering.
*/
if (pq->fence && !lp_fence_issued(pq->fence)) {
llvmpipe_finish(pipe, __func__);
}
memset(pq->start, 0, sizeof(pq->start));
memset(pq->end, 0, sizeof(pq->end));
lp_setup_begin_query(llvmpipe->setup, pq);
switch (pq->type) {
case PIPE_QUERY_PRIMITIVES_EMITTED:
pq->num_primitives_written[0] = llvmpipe->so_stats[pq->index].num_primitives_written;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
pq->num_primitives_generated[0] = llvmpipe->so_stats[pq->index].primitives_storage_needed;
llvmpipe->active_primgen_queries++;
break;
case PIPE_QUERY_SO_STATISTICS:
pq->num_primitives_written[0] = llvmpipe->so_stats[pq->index].num_primitives_written;
pq->num_primitives_generated[0] = llvmpipe->so_stats[pq->index].primitives_storage_needed;
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
for (unsigned s = 0; s < PIPE_MAX_VERTEX_STREAMS; s++) {
pq->num_primitives_written[s] = llvmpipe->so_stats[s].num_primitives_written;
pq->num_primitives_generated[s] = llvmpipe->so_stats[s].primitives_storage_needed;
}
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
pq->num_primitives_written[0] = llvmpipe->so_stats[pq->index].num_primitives_written;
pq->num_primitives_generated[0] = llvmpipe->so_stats[pq->index].primitives_storage_needed;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* reset our cache */
if (llvmpipe->active_statistics_queries == 0) {
memset(&llvmpipe->pipeline_statistics, 0,
sizeof(llvmpipe->pipeline_statistics));
}
memcpy(&pq->stats, &llvmpipe->pipeline_statistics, sizeof(pq->stats));
llvmpipe->active_statistics_queries++;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
llvmpipe->active_occlusion_queries++;
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
break;
default:
break;
}
return true;
}
static bool
llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct llvmpipe_query *pq = llvmpipe_query(q);
lp_setup_end_query(llvmpipe->setup, pq);
switch (pq->type) {
case PIPE_QUERY_PRIMITIVES_EMITTED:
pq->num_primitives_written[0] =
llvmpipe->so_stats[pq->index].num_primitives_written - pq->num_primitives_written[0];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
assert(llvmpipe->active_primgen_queries);
llvmpipe->active_primgen_queries--;
pq->num_primitives_generated[0] =
llvmpipe->so_stats[pq->index].primitives_storage_needed - pq->num_primitives_generated[0];
break;
case PIPE_QUERY_SO_STATISTICS:
pq->num_primitives_written[0] =
llvmpipe->so_stats[pq->index].num_primitives_written - pq->num_primitives_written[0];
pq->num_primitives_generated[0] =
llvmpipe->so_stats[pq->index].primitives_storage_needed - pq->num_primitives_generated[0];
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
for (unsigned s = 0; s < PIPE_MAX_VERTEX_STREAMS; s++) {
pq->num_primitives_written[s] =
llvmpipe->so_stats[s].num_primitives_written - pq->num_primitives_written[s];
pq->num_primitives_generated[s] =
llvmpipe->so_stats[s].primitives_storage_needed - pq->num_primitives_generated[s];
}
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
pq->num_primitives_written[0] =
llvmpipe->so_stats[pq->index].num_primitives_written - pq->num_primitives_written[0];
pq->num_primitives_generated[0] =
llvmpipe->so_stats[pq->index].primitives_storage_needed - pq->num_primitives_generated[0];
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
pq->stats.ia_vertices =
llvmpipe->pipeline_statistics.ia_vertices - pq->stats.ia_vertices;
pq->stats.ia_primitives =
llvmpipe->pipeline_statistics.ia_primitives - pq->stats.ia_primitives;
pq->stats.vs_invocations =
llvmpipe->pipeline_statistics.vs_invocations - pq->stats.vs_invocations;
pq->stats.gs_invocations =
llvmpipe->pipeline_statistics.gs_invocations - pq->stats.gs_invocations;
pq->stats.gs_primitives =
llvmpipe->pipeline_statistics.gs_primitives - pq->stats.gs_primitives;
pq->stats.c_invocations =
llvmpipe->pipeline_statistics.c_invocations - pq->stats.c_invocations;
pq->stats.c_primitives =
llvmpipe->pipeline_statistics.c_primitives - pq->stats.c_primitives;
pq->stats.ps_invocations =
llvmpipe->pipeline_statistics.ps_invocations - pq->stats.ps_invocations;
pq->stats.cs_invocations =
llvmpipe->pipeline_statistics.cs_invocations - pq->stats.cs_invocations;
pq->stats.hs_invocations =
llvmpipe->pipeline_statistics.hs_invocations - pq->stats.hs_invocations;
pq->stats.ds_invocations =
llvmpipe->pipeline_statistics.ds_invocations - pq->stats.ds_invocations;
pq->stats.ts_invocations =
llvmpipe->pipeline_statistics.ts_invocations - pq->stats.ts_invocations;
pq->stats.ms_invocations =
llvmpipe->pipeline_statistics.ms_invocations - pq->stats.ms_invocations;
llvmpipe->active_statistics_queries--;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
assert(llvmpipe->active_occlusion_queries);
llvmpipe->active_occlusion_queries--;
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
break;
default:
break;
}
return true;
}
bool
llvmpipe_check_render_cond(struct llvmpipe_context *lp)
{
struct pipe_context *pipe = &lp->pipe;
if (lp->render_cond_buffer) {
uint32_t data = *(uint32_t *)((char *)lp->render_cond_buffer->data
+ lp->render_cond_offset);
return (!data) == lp->render_cond_cond;
}
if (!lp->render_cond_query)
return true; /* no query predicate, draw normally */
bool wait = (lp->render_cond_mode == PIPE_RENDER_COND_WAIT ||
lp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
uint64_t result;
bool b = pipe->get_query_result(pipe, lp->render_cond_query, wait,
(void*)&result);
if (b)
return ((!result) == lp->render_cond_cond);
else
return true;
}
static void
llvmpipe_set_active_query_state(struct pipe_context *pipe, bool enable)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
llvmpipe->queries_disabled = !enable;
/* for OQs we need to regenerate the fragment shader */
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
}
void
llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe)
{
llvmpipe->pipe.create_query = llvmpipe_create_query;
llvmpipe->pipe.destroy_query = llvmpipe_destroy_query;
llvmpipe->pipe.begin_query = llvmpipe_begin_query;
llvmpipe->pipe.end_query = llvmpipe_end_query;
llvmpipe->pipe.get_query_result = llvmpipe_get_query_result;
llvmpipe->pipe.get_query_result_resource = llvmpipe_get_query_result_resource;
llvmpipe->pipe.set_active_query_state = llvmpipe_set_active_query_state;
}