freedreno/a5xx+a6xx: Add base class for query samples

For PIPE_CAP_QUERY_BUFFER_OBJECT we'll need to write on the GPU a flag
when the query result is available, which means the buffers used for
query results should have a header with availability flag.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19400>
This commit is contained in:
Rob Clark 2022-10-30 09:23:52 -07:00 committed by Marge Bot
parent 46f84ce20a
commit c9b0cd6e80
6 changed files with 118 additions and 27 deletions

View file

@ -37,9 +37,11 @@
#include "fd2_query.h"
struct PACKED fd2_query_sample {
struct fd_acc_query_sample base;
uint32_t start;
uint32_t stop;
};
DEFINE_CAST(fd_acc_query_sample, fd2_query_sample);
/* offset of a single field of an array of fd2_query_sample: */
#define query_sample_idx(aq, idx, field) \
@ -138,11 +140,12 @@ perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
perfcntr_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd_batch_query_data *data = aq->query_data;
struct fd2_query_sample *sp = buf;
struct fd2_query_sample *sp = fd2_query_sample(s);
for (unsigned i = 0; i < data->num_query_entries; i++)
result->batch[i].u64 = sp[i].stop - sp[i].start;

View file

@ -35,10 +35,16 @@
#include "fd5_query.h"
struct PACKED fd5_query_sample {
struct fd_acc_query_sample base;
/* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
uint64_t pad;
uint64_t start;
uint64_t result;
uint64_t stop;
};
DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
/* offset of a single field of an array of fd5_query_sample: */
#define query_sample_idx(aq, idx, field) \
@ -65,6 +71,8 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
ASSERT_ALIGNED(struct fd5_query_sample, start, 16);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, start));
@ -89,6 +97,8 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
ASSERT_ALIGNED(struct fd5_query_sample, stop, 16);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, stop));
@ -114,18 +124,20 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
}
static void
occlusion_counter_result(struct fd_acc_query *aq, void *buf,
occlusion_counter_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
struct fd5_query_sample *sp = fd5_query_sample(s);
result->u64 = sp->result;
}
static void
occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
occlusion_predicate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
struct fd5_query_sample *sp = fd5_query_sample(s);
result->b = !!sp->result;
}
@ -205,18 +217,20 @@ ticks_to_ns(uint32_t ts)
}
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
time_elapsed_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
struct fd5_query_sample *sp = fd5_query_sample(s);
result->u64 = ticks_to_ns(sp->result);
}
static void
timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
timestamp_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
struct fd5_query_sample *sp = fd5_query_sample(s);
result->u64 = ticks_to_ns(sp->result);
}
@ -345,11 +359,12 @@ perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
perfcntr_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd_batch_query_data *data = aq->query_data;
struct fd5_query_sample *sp = buf;
struct fd5_query_sample *sp = fd5_query_sample(s);
for (unsigned i = 0; i < data->num_query_entries; i++) {
result->batch[i].u64 = sp[i].result;

View file

@ -37,10 +37,16 @@
#include "fd6_query.h"
struct PACKED fd6_query_sample {
struct fd_acc_query_sample base;
/* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
uint64_t pad;
uint64_t start;
uint64_t result;
uint64_t stop;
};
DEFINE_CAST(fd_acc_query_sample, fd6_query_sample);
/* offset of a single field of an array of fd6_query_sample: */
#define query_sample_idx(aq, idx, field) \
@ -64,6 +70,8 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{
struct fd_ringbuffer *ring = batch->draw;
ASSERT_ALIGNED(struct fd6_query_sample, start, 16);
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
@ -88,6 +96,8 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
ASSERT_ALIGNED(struct fd6_query_sample, stop, 16);
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
OUT_RELOC(ring, query_sample(aq, stop));
@ -116,18 +126,20 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
}
static void
occlusion_counter_result(struct fd_acc_query *aq, void *buf,
occlusion_counter_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd6_query_sample *sp = buf;
struct fd6_query_sample *sp = fd6_query_sample(s);
result->u64 = sp->result;
}
static void
occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
occlusion_predicate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd6_query_sample *sp = buf;
struct fd6_query_sample *sp = fd6_query_sample(s);
result->b = !!sp->result;
}
@ -224,18 +236,20 @@ ticks_to_ns(uint64_t ts)
}
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
time_elapsed_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd6_query_sample *sp = buf;
struct fd6_query_sample *sp = fd6_query_sample(s);
result->u64 = ticks_to_ns(sp->result);
}
static void
timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
timestamp_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd6_query_sample *sp = buf;
struct fd6_query_sample *sp = fd6_query_sample(s);
result->u64 = ticks_to_ns(sp->start);
}
@ -265,12 +279,18 @@ static const struct fd_acc_sample_provider timestamp = {
};
struct PACKED fd6_primitives_sample {
struct fd_acc_query_sample base;
/* VPC_SO_STREAM_COUNTS dest address must be 32b aligned: */
uint64_t pad[3];
struct {
uint64_t emitted, generated;
} start[4], stop[4], result;
uint64_t prim_start[16], prim_stop[16], prim_emitted;
};
DEFINE_CAST(fd_acc_query_sample, fd6_primitives_sample);
#define primitives_relocw(ring, aq, field) \
OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
@ -375,10 +395,11 @@ primitives_generated_pause(struct fd_acc_query *aq,
}
static void
primitives_generated_result(struct fd_acc_query *aq, void *buf,
primitives_generated_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd6_primitives_sample *ps = buf;
struct fd6_primitives_sample *ps = fd6_primitives_sample(s);
log_counters(ps);
@ -400,6 +421,9 @@ primitives_emitted_resume(struct fd_acc_query *aq,
struct fd_ringbuffer *ring = batch->draw;
fd_wfi(batch, ring);
ASSERT_ALIGNED(struct fd6_primitives_sample, start[0], 32);
OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
primitives_relocw(ring, aq, start[0]);
@ -414,8 +438,11 @@ primitives_emitted_pause(struct fd_acc_query *aq,
fd_wfi(batch, ring);
ASSERT_ALIGNED(struct fd6_primitives_sample, stop[0], 32);
OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
primitives_relocw(ring, aq, stop[0]);
fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
@ -430,10 +457,11 @@ primitives_emitted_pause(struct fd_acc_query *aq,
}
static void
primitives_emitted_result(struct fd_acc_query *aq, void *buf,
primitives_emitted_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd6_primitives_sample *ps = buf;
struct fd6_primitives_sample *ps = fd6_primitives_sample(s);
log_counters(ps);
@ -548,11 +576,12 @@ perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
perfcntr_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
union pipe_query_result *result)
{
struct fd_batch_query_data *data = aq->query_data;
struct fd6_query_sample *sp = buf;
struct fd6_query_sample *sp = fd6_query_sample(s);
for (unsigned i = 0; i < data->num_query_entries; i++) {
result->batch[i].u64 = sp[i].result;

View file

@ -133,6 +133,26 @@ fd_acc_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
/* remove from active list: */
list_delinit(&aq->node);
/* mark the result available: */
struct fd_batch *batch = fd_context_batch_locked(ctx);
struct fd_ringbuffer *ring = batch->draw;
struct fd_resource *rsc = fd_resource(aq->prsc);
if (ctx->screen->gen < 5) {
OUT_PKT3(ring, CP_MEM_WRITE, 3);
OUT_RELOC(ring, rsc->bo, 0, 0, 0);
OUT_RING(ring, 1); /* low 32b */
OUT_RING(ring, 0); /* high 32b */
} else {
OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RELOC(ring, rsc->bo, 0, 0, 0);
OUT_RING(ring, 1); /* low 32b */
OUT_RING(ring, 0); /* high 32b */
}
fd_batch_unlock_submit(batch);
fd_batch_reference(&batch, NULL);
}
static bool

View file

@ -54,6 +54,24 @@
struct fd_acc_query;
/**
* Base class for all query samples, on the GPU 'avail' is written to
* one when the query result is available.
*/
struct PACKED fd_acc_query_sample {
uint32_t avail;
uint32_t pad;
};
/**
* Helper to assert sample struct field has required alignment (ie. to
* catch issues at compile time if struct fd_acc_query_sample header
* ever changed, and to make the hw requirements more obvious)
*/
#define ASSERT_ALIGNED(type, field, nbytes) \
STATIC_ASSERT((offsetof(type, field) % nbytes) == 0)
struct fd_acc_sample_provider {
unsigned query_type;
@ -65,7 +83,7 @@ struct fd_acc_sample_provider {
void (*resume)(struct fd_acc_query *aq, struct fd_batch *batch) dt;
void (*pause)(struct fd_acc_query *aq, struct fd_batch *batch) dt;
void (*result)(struct fd_acc_query *aq, void *buf,
void (*result)(struct fd_acc_query *aq, struct fd_acc_query_sample *s,
union pipe_query_result *result);
};

View file

@ -169,6 +169,12 @@ struct __perf_time_state {
? os_time_get_nano() \
: 0)
#define DEFINE_CAST(parent, child) \
static inline struct child *child(struct parent *x) \
{ \
return (struct child *)x; \
}
struct fd_context;
/**