intel/anv: batch stats util

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24536>
This commit is contained in:
Lionel Landwerlin 2023-06-07 00:37:03 +03:00 committed by Marge Bot
parent 1fdc089e9c
commit 2c3a51573a
7 changed files with 193 additions and 11 deletions

View file

@ -25,6 +25,7 @@
#include "intel_disasm.h"
#include "util/macros.h"
#include "util/u_debug.h"
#include "util/u_dynarray.h"
#include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
#include <string.h>
@ -67,11 +68,18 @@ intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
ctx->spec = intel_spec_load(devinfo);
else
ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
ctx->commands =
_mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->stats =
_mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
}
void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
{
_mesa_hash_table_destroy(ctx->commands, NULL);
_mesa_hash_table_destroy(ctx->stats, NULL);
intel_spec_destroy(ctx->spec);
}
@ -1602,3 +1610,133 @@ intel_print_batch(struct intel_batch_decode_ctx *ctx,
ctx->n_batch_buffer_start--;
}
void
intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx)
{
_mesa_hash_table_clear(ctx->stats, NULL);
}
void
intel_batch_stats(struct intel_batch_decode_ctx *ctx,
const uint32_t *batch, uint32_t batch_size,
uint64_t batch_addr, bool from_ring)
{
const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
int length;
struct intel_group *inst;
if (ctx->n_batch_buffer_start >= 100) {
fprintf(stderr, "Max batch buffer jumps exceeded\n");
return;
}
ctx->n_batch_buffer_start++;
for (p = batch; p < end; p += length) {
inst = intel_ctx_find_instruction(ctx, p);
length = intel_group_get_length(inst, p);
assert(inst == NULL || length > 0);
length = MAX2(1, length);
const char *name =
inst != NULL ? inst->name : "unknown";
struct hash_entry *entry = _mesa_hash_table_search(ctx->stats, name);
if (entry != NULL) {
entry->data = (void *)((uintptr_t)entry->data + 1);
} else {
_mesa_hash_table_insert(ctx->stats, name, (void *)(uintptr_t)1);
}
if (inst == NULL)
continue;
if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
uint64_t next_batch_addr = 0;
bool ppgtt = false;
bool second_level = false;
bool predicate = false;
struct intel_field_iterator iter;
intel_field_iterator_init(&iter, inst, p, 0, false);
while (intel_field_iterator_next(&iter)) {
if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
next_batch_addr = iter.raw_value;
} else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
second_level = iter.raw_value;
} else if (strcmp(iter.name, "Address Space Indicator") == 0) {
ppgtt = iter.raw_value;
} else if (strcmp(iter.name, "Predication Enable") == 0) {
predicate = iter.raw_value;
}
}
if (!predicate) {
struct intel_batch_decode_bo next_batch =
ctx_get_bo(ctx, ppgtt, next_batch_addr);
if (next_batch.map == NULL) {
fprintf(stderr, "Secondary batch at 0x%08"PRIx64" unavailable\n",
next_batch_addr);
} else {
intel_batch_stats(ctx, next_batch.map, next_batch.size,
next_batch.addr, false);
}
if (second_level) {
/* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
* like a subroutine call. Commands that come afterwards get
* processed once the 2nd level batch buffer returns with
* MI_BATCH_BUFFER_END.
*/
continue;
} else if (!from_ring) {
/* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
* like a goto. Nothing after it will ever get processed. In
* order to prevent the recursion from growing, we just reset the
* loop and continue;
*/
break;
}
}
} else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
break;
}
}
ctx->n_batch_buffer_start--;
}
struct inst_stat {
const char *name;
uint32_t count;
};
static int
compare_inst_stat(const void *v1, const void *v2)
{
const struct inst_stat *i1 = v1, *i2 = v2;
return strcmp(i1->name, i2->name);
}
void
intel_batch_print_stats(struct intel_batch_decode_ctx *ctx)
{
struct util_dynarray arr;
util_dynarray_init(&arr, NULL);
hash_table_foreach(ctx->stats, entry) {
struct inst_stat inst = {
.name = (const char *)entry->key,
.count = (uintptr_t)entry->data,
};
util_dynarray_append(&arr, struct inst_stat, inst);
}
qsort(util_dynarray_begin(&arr),
util_dynarray_num_elements(&arr, struct inst_stat),
sizeof(struct inst_stat),
compare_inst_stat);
util_dynarray_foreach(&arr, struct inst_stat, i)
fprintf(ctx->fp, "%-40s: %u\n", i->name, i->count);
util_dynarray_fini(&arr);
}

View file

@ -53,3 +53,21 @@ intel_print_batch(struct intel_batch_decode_ctx *ctx,
{
mesa_logw("Batch logging not supported on Android.");
}
void
intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx)
{
}
void
intel_batch_stats(struct intel_batch_decode_ctx *ctx,
const uint32_t *batch, uint32_t batch_size,
uint64_t batch_addr, bool from_ring)
{
}
void
intel_batch_print_stats(struct intel_batch_decode_ctx *ctx)
{
mesa_logw("Batch logging not supported on Android.");
}

View file

@ -273,6 +273,9 @@ struct intel_batch_decode_ctx {
int n_batch_buffer_start;
uint64_t acthd;
struct hash_table *commands;
struct hash_table *stats;
};
void intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
@ -293,6 +296,14 @@ void intel_print_batch(struct intel_batch_decode_ctx *ctx,
const uint32_t *batch, uint32_t batch_size,
uint64_t batch_addr, bool from_ring);
void intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx);
void intel_batch_stats(struct intel_batch_decode_ctx *ctx,
const uint32_t *batch, uint32_t batch_size,
uint64_t batch_addr, bool from_ring);
void intel_batch_print_stats(struct intel_batch_decode_ctx *ctx);
#ifdef __cplusplus
}
#endif

View file

@ -105,6 +105,7 @@ static const struct debug_control debug_control[] = {
{ "isl", DEBUG_ISL },
{ "sparse", DEBUG_SPARSE },
{ "draw_bkp", DEBUG_DRAW_BKP },
{ "bat-stats", DEBUG_BATCH_STATS },
{ NULL, 0 }
};

View file

@ -95,6 +95,7 @@ extern uint64_t intel_debug;
#define DEBUG_ISL (1ull << 47)
#define DEBUG_SPARSE (1ull << 48)
#define DEBUG_DRAW_BKP (1ull << 49)
#define DEBUG_BATCH_STATS (1ull << 50)
#define DEBUG_ANY (~0ull)

View file

@ -1190,7 +1190,7 @@ anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass)
{
if (!INTEL_DEBUG(DEBUG_BATCH))
if (!INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS))
return;
struct anv_device *device = queue->device;
@ -1209,19 +1209,28 @@ anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
uint64_t pass_batch_offset =
khr_perf_query_preamble_offset(perf_query_pool, perf_query_pass);
intel_print_batch(queue->decoder,
pass_batch_bo->map + pass_batch_offset, 64,
pass_batch_bo->offset + pass_batch_offset, false);
if (INTEL_DEBUG(DEBUG_BATCH)) {
intel_print_batch(queue->decoder,
pass_batch_bo->map + pass_batch_offset, 64,
pass_batch_bo->offset + pass_batch_offset, false);
}
}
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
struct anv_batch_bo **bo = u_vector_tail(&cmd_buffers[i]->seen_bbos);
struct anv_batch_bo *bbo =
list_first_entry(&cmd_buffers[i]->batch_bos, struct anv_batch_bo, link);
device->cmd_buffer_being_decoded = cmd_buffers[i];
intel_print_batch(queue->decoder, (*bo)->bo->map,
(*bo)->bo->size, (*bo)->bo->offset, false);
if (INTEL_DEBUG(DEBUG_BATCH)) {
intel_print_batch(queue->decoder, bbo->bo->map,
bbo->bo->size, bbo->bo->offset, false);
}
if (INTEL_DEBUG(DEBUG_BATCH_STATS)) {
intel_batch_stats(queue->decoder, bbo->bo->map,
bbo->bo->size, bbo->bo->offset, false);
}
device->cmd_buffer_being_decoded = NULL;
}
} else {
} else if (INTEL_DEBUG(DEBUG_BATCH)) {
intel_print_batch(queue->decoder, device->trivial_batch_bo->map,
device->trivial_batch_bo->size,
device->trivial_batch_bo->offset, false);

View file

@ -3071,7 +3071,7 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
goto fail_alloc;
if (INTEL_DEBUG(DEBUG_BATCH)) {
if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
struct intel_batch_decode_ctx *decoder = &device->decoder[i];
@ -3082,6 +3082,7 @@ VkResult anv_CreateDevice(
&physical_device->info,
stderr, decode_flags, NULL,
decode_get_bo, NULL, device);
intel_batch_stats_reset(decoder);
decoder->engine = physical_device->queue.families[i].engine_class;
decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
@ -3644,9 +3645,12 @@ void anv_DestroyDevice(
anv_device_destroy_context_or_vm(device);
if (INTEL_DEBUG(DEBUG_BATCH)) {
for (unsigned i = 0; i < pdevice->queue.family_count; i++)
if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
if (INTEL_DEBUG(DEBUG_BATCH_STATS))
intel_batch_print_stats(&device->decoder[i]);
intel_batch_decode_ctx_finish(&device->decoder[i]);
}
}
close(device->fd);