mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 22:10:10 +01:00
freedreno/a4xx: implement time-elapsed query
Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
62fa868728
commit
ba194630cc
2 changed files with 133 additions and 0 deletions
|
|
@ -49,6 +49,8 @@ struct fd4_context {
|
|||
|
||||
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
|
||||
* could combine it with another allocation.
|
||||
*
|
||||
* (upper area used as scratch bo.. see fd4_query)
|
||||
*/
|
||||
struct fd_bo *vsc_size_mem;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "freedreno_util.h"
|
||||
|
||||
#include "fd4_query.h"
|
||||
#include "fd4_context.h"
|
||||
#include "fd4_draw.h"
|
||||
#include "fd4_format.h"
|
||||
|
||||
|
|
@ -107,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
|
|||
result->b |= (n > 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Time Elapsed Query:
|
||||
*
|
||||
* Note: we could in theory support timestamp queries, but they
|
||||
* won't give sensible results for tilers.
|
||||
*/
|
||||
|
||||
static void
|
||||
time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
/* Right now, the assignment of countable to counter register is
|
||||
* just hard coded. If we start exposing more countables than we
|
||||
* have counters, we will need to be more clever.
|
||||
*/
|
||||
fd_wfi(ctx, ring);
|
||||
OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
|
||||
OUT_RING(ring, CP_ALWAYS_COUNT);
|
||||
}
|
||||
|
||||
static struct fd_hw_sample *
|
||||
time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
|
||||
|
||||
/* use unused part of vsc_size_mem as scratch space, to avoid
|
||||
* extra allocation:
|
||||
*/
|
||||
struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
|
||||
const int sample_off = 128;
|
||||
const int addr_off = sample_off + 8;
|
||||
|
||||
debug_assert(ctx->screen->max_freq > 0);
|
||||
|
||||
/* Basic issue is that we need to read counter value to a relative
|
||||
* destination (with per-tile offset) rather than absolute dest
|
||||
* addr. But there is no pm4 packet that can do that. This is
|
||||
* where it would be *really* nice if we could write our own fw
|
||||
* since afaict implementing the sort of packet we need would be
|
||||
* trivial.
|
||||
*
|
||||
* Instead, we:
|
||||
* (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
|
||||
* (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
|
||||
* (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
|
||||
* address to the per-sample offset in the scratch buffer
|
||||
* (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
|
||||
* to CP_ME_NRT_ADDR
|
||||
* (5) CP_MEM_TO_REG's to copy saved counter value from scratch
|
||||
* buffer to CP_ME_NRT_DATA to trigger the write out to query
|
||||
* result buffer
|
||||
*
|
||||
* Straightforward, right?
|
||||
*
|
||||
* Maybe could swap the order of things in the scratch buffer to
|
||||
* put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
|
||||
* shot, but that's really just polishing a turd..
|
||||
*/
|
||||
|
||||
fd_wfi(ctx, ring);
|
||||
|
||||
/* copy sample counter _LO and _HI to scratch: */
|
||||
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
|
||||
CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
|
||||
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
|
||||
|
||||
/* ok... here we really *would* like to use the CP_SET_CONSTANT
|
||||
* mode which can add a constant to value in reg2 and write to
|
||||
* reg1... *but* that only works for banked/context registers,
|
||||
* and CP_ME_NRT_DATA isn't one of those.. so we need to do some
|
||||
* CP math to the scratch buffer instead:
|
||||
*
|
||||
* (note first 8 bytes are counter value, use offset 0x8 for
|
||||
* address calculation)
|
||||
*/
|
||||
|
||||
/* per-sample offset to scratch bo: */
|
||||
OUT_PKT3(ring, CP_MEM_WRITE, 2);
|
||||
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
|
||||
OUT_RING(ring, samp->offset);
|
||||
|
||||
/* now add to that the per-tile base: */
|
||||
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
|
||||
CP_REG_TO_MEM_0_ACCUMULATE |
|
||||
CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */
|
||||
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
|
||||
|
||||
/* now copy that back to CP_ME_NRT_ADDR: */
|
||||
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
|
||||
OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
|
||||
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
|
||||
|
||||
/* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
|
||||
* to trigger the write to result buffer
|
||||
*/
|
||||
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
|
||||
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
|
||||
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
|
||||
|
||||
/* and again to get the value of the _HI reg from scratch: */
|
||||
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
|
||||
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
|
||||
OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
|
||||
|
||||
/* Sigh.. */
|
||||
|
||||
return samp;
|
||||
}
|
||||
|
||||
static void
|
||||
time_elapsed_accumulate_result(struct fd_context *ctx,
|
||||
const void *start, const void *end,
|
||||
union pipe_query_result *result)
|
||||
{
|
||||
uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
|
||||
/* max_freq is in Hz, convert cycle count to ns: */
|
||||
result->u64 += n * 1000000000 / ctx->screen->max_freq;
|
||||
}
|
||||
|
||||
static const struct fd_hw_sample_provider occlusion_counter = {
|
||||
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
|
||||
.active = FD_STAGE_DRAW,
|
||||
|
|
@ -121,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
|
|||
.accumulate_result = occlusion_predicate_accumulate_result,
|
||||
};
|
||||
|
||||
static const struct fd_hw_sample_provider time_elapsed = {
|
||||
.query_type = PIPE_QUERY_TIME_ELAPSED,
|
||||
.active = FD_STAGE_DRAW,
|
||||
.enable = time_elapsed_enable,
|
||||
.get_sample = time_elapsed_get_sample,
|
||||
.accumulate_result = time_elapsed_accumulate_result,
|
||||
};
|
||||
|
||||
void fd4_query_context_init(struct pipe_context *pctx)
|
||||
{
|
||||
fd_hw_query_register_provider(pctx, &occlusion_counter);
|
||||
fd_hw_query_register_provider(pctx, &occlusion_predicate);
|
||||
fd_hw_query_register_provider(pctx, &time_elapsed);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue