mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 12:28:07 +02:00
radeonsi: expose performance counters as 64 bit
This is useful for shader-related counters, since they tend to quickly exceed 32 bits. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
f096096b77
commit
ad1782cfb5
2 changed files with 19 additions and 16 deletions
|
|
@ -84,8 +84,8 @@ struct r600_pc_group {
|
|||
|
||||
struct r600_pc_counter {
|
||||
unsigned base;
|
||||
unsigned dwords;
|
||||
unsigned stride;
|
||||
unsigned qwords;
|
||||
unsigned stride; /* in uint64s */
|
||||
};
|
||||
|
||||
#define R600_PC_SHADERS_WINDOWING (1 << 31)
|
||||
|
|
@ -172,7 +172,7 @@ static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
|
|||
pc->emit_read(ctx, block,
|
||||
group->num_counters, group->selectors,
|
||||
buffer, va);
|
||||
va += 4 * group->num_counters;
|
||||
va += sizeof(uint64_t) * group->num_counters;
|
||||
} while (group->instance < 0 && ++instance < block->num_instances);
|
||||
} while (++se < se_end);
|
||||
}
|
||||
|
|
@ -194,15 +194,15 @@ static void r600_pc_query_add_result(struct r600_common_context *ctx,
|
|||
union pipe_query_result *result)
|
||||
{
|
||||
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
|
||||
uint32_t *results = buffer;
|
||||
uint64_t *results = buffer;
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < query->num_counters; ++i) {
|
||||
struct r600_pc_counter *counter = &query->counters[i];
|
||||
|
||||
for (j = 0; j < counter->dwords; ++j) {
|
||||
for (j = 0; j < counter->qwords; ++j) {
|
||||
uint32_t value = results[counter->base + j * counter->stride];
|
||||
result->batch[i].u32 += value;
|
||||
result->batch[i].u64 += value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -361,7 +361,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
|||
instances *= block->num_instances;
|
||||
|
||||
group->result_base = i;
|
||||
query->b.result_size += 4 * instances * group->num_counters;
|
||||
query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
|
||||
i += instances * group->num_counters;
|
||||
|
||||
pc->get_size(block, group->num_counters, group->selectors,
|
||||
|
|
@ -401,11 +401,11 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
|||
counter->base = group->result_base + j;
|
||||
counter->stride = group->num_counters;
|
||||
|
||||
counter->dwords = 1;
|
||||
counter->qwords = 1;
|
||||
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
|
||||
counter->dwords = screen->info.max_se;
|
||||
counter->qwords = screen->info.max_se;
|
||||
if (group->instance < 0)
|
||||
counter->dwords *= block->num_instances;
|
||||
counter->qwords *= block->num_instances;
|
||||
}
|
||||
|
||||
if (!r600_query_hw_init(rctx, &query->b))
|
||||
|
|
@ -535,7 +535,7 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
|
|||
info->name = block->selector_names + sub * block->selector_name_stride;
|
||||
info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
|
||||
info->max_value.u64 = 0;
|
||||
info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
|
||||
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
|
||||
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
|
||||
info->group_id = base_gid + sub / block->num_selectors;
|
||||
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
|
||||
|
|
|
|||
|
|
@ -208,6 +208,7 @@ static struct si_pc_block_base cik_PA_SC = {
|
|||
.layout = SI_PC_MULTI_ALTERNATE,
|
||||
};
|
||||
|
||||
/* According to docs, PA_SU counters are only 48 bits wide. */
|
||||
static struct si_pc_block_base cik_PA_SU = {
|
||||
.name = "PA_SU",
|
||||
.num_counters = 4,
|
||||
|
|
@ -651,24 +652,26 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
|
||||
COPY_DATA_DST_SEL(COPY_DATA_MEM));
|
||||
COPY_DATA_DST_SEL(COPY_DATA_MEM) |
|
||||
COPY_DATA_COUNT_SEL); /* 64 bits */
|
||||
radeon_emit(cs, reg >> 2);
|
||||
radeon_emit(cs, 0); /* unused */
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
va += 4;
|
||||
va += sizeof(uint64_t);
|
||||
reg += reg_delta;
|
||||
}
|
||||
} else {
|
||||
for (idx = 0; idx < count; ++idx) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
|
||||
COPY_DATA_DST_SEL(COPY_DATA_MEM));
|
||||
COPY_DATA_DST_SEL(COPY_DATA_MEM) |
|
||||
COPY_DATA_COUNT_SEL);
|
||||
radeon_emit(cs, 0); /* immediate */
|
||||
radeon_emit(cs, 0); /* unused */
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
va += 4;
|
||||
va += sizeof(uint64_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue