diff --git a/src/intel/common/intel_batch_decoder.c b/src/intel/common/intel_batch_decoder.c index afe0f39f887..8ec3be21c37 100644 --- a/src/intel/common/intel_batch_decoder.c +++ b/src/intel/common/intel_batch_decoder.c @@ -25,6 +25,7 @@ #include "intel_disasm.h" #include "util/macros.h" #include "util/u_debug.h" +#include "util/u_dynarray.h" #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */ #include @@ -67,11 +68,18 @@ intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx, ctx->spec = intel_spec_load(devinfo); else ctx->spec = intel_spec_load_from_path(devinfo, xml_path); + + ctx->commands = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx->stats = + _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal); } void intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx) { + _mesa_hash_table_destroy(ctx->commands, NULL); + _mesa_hash_table_destroy(ctx->stats, NULL); intel_spec_destroy(ctx->spec); } @@ -1602,3 +1610,133 @@ intel_print_batch(struct intel_batch_decode_ctx *ctx, ctx->n_batch_buffer_start--; } + +void +intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx) +{ + _mesa_hash_table_clear(ctx->stats, NULL); +} + +void +intel_batch_stats(struct intel_batch_decode_ctx *ctx, + const uint32_t *batch, uint32_t batch_size, + uint64_t batch_addr, bool from_ring) +{ + const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t); + int length; + struct intel_group *inst; + + if (ctx->n_batch_buffer_start >= 100) { + fprintf(stderr, "Max batch buffer jumps exceeded\n"); + return; + } + + ctx->n_batch_buffer_start++; + + for (p = batch; p < end; p += length) { + inst = intel_ctx_find_instruction(ctx, p); + length = intel_group_get_length(inst, p); + assert(inst == NULL || length > 0); + length = MAX2(1, length); + + const char *name = + inst != NULL ? inst->name : "unknown"; + + struct hash_entry *entry = _mesa_hash_table_search(ctx->stats, name); + if (entry != NULL) { + entry->data = (void *)((uintptr_t)entry->data + 1); + } else { + _mesa_hash_table_insert(ctx->stats, name, (void *)(uintptr_t)1); + } + + if (inst == NULL) + continue; + + if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) { + uint64_t next_batch_addr = 0; + bool ppgtt = false; + bool second_level = false; + bool predicate = false; + struct intel_field_iterator iter; + intel_field_iterator_init(&iter, inst, p, 0, false); + while (intel_field_iterator_next(&iter)) { + if (strcmp(iter.name, "Batch Buffer Start Address") == 0) { + next_batch_addr = iter.raw_value; + } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) { + second_level = iter.raw_value; + } else if (strcmp(iter.name, "Address Space Indicator") == 0) { + ppgtt = iter.raw_value; + } else if (strcmp(iter.name, "Predication Enable") == 0) { + predicate = iter.raw_value; + } + } + + if (!predicate) { + struct intel_batch_decode_bo next_batch = + ctx_get_bo(ctx, ppgtt, next_batch_addr); + + if (next_batch.map == NULL) { + fprintf(stderr, "Secondary batch at 0x%08"PRIx64" unavailable\n", + next_batch_addr); + } else { + intel_batch_stats(ctx, next_batch.map, next_batch.size, + next_batch.addr, false); + } + if (second_level) { + /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts + * like a subroutine call. Commands that come afterwards get + * processed once the 2nd level batch buffer returns with + * MI_BATCH_BUFFER_END. + */ + continue; + } else if (!from_ring) { + /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts + * like a goto. Nothing after it will ever get processed. In + * order to prevent the recursion from growing, we just reset the + * loop and continue; + */ + break; + } + } + } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) { + break; + } + } + + ctx->n_batch_buffer_start--; +} + +struct inst_stat { + const char *name; + uint32_t count; +}; + +static int +compare_inst_stat(const void *v1, const void *v2) +{ + const struct inst_stat *i1 = v1, *i2 = v2; + return strcmp(i1->name, i2->name); +} + +void +intel_batch_print_stats(struct intel_batch_decode_ctx *ctx) +{ + struct util_dynarray arr; + util_dynarray_init(&arr, NULL); + + hash_table_foreach(ctx->stats, entry) { + struct inst_stat inst = { + .name = (const char *)entry->key, + .count = (uintptr_t)entry->data, + }; + util_dynarray_append(&arr, struct inst_stat, inst); + } + qsort(util_dynarray_begin(&arr), + util_dynarray_num_elements(&arr, struct inst_stat), + sizeof(struct inst_stat), + compare_inst_stat); + util_dynarray_foreach(&arr, struct inst_stat, i) + fprintf(ctx->fp, "%-40s: %u\n", i->name, i->count); + + util_dynarray_fini(&arr); +} diff --git a/src/intel/common/intel_batch_decoder_stub.c b/src/intel/common/intel_batch_decoder_stub.c index 27bfeb42946..808755d7f63 100644 --- a/src/intel/common/intel_batch_decoder_stub.c +++ b/src/intel/common/intel_batch_decoder_stub.c @@ -53,3 +53,21 @@ intel_print_batch(struct intel_batch_decode_ctx *ctx, { mesa_logw("Batch logging not supported on Android."); } + +void +intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx) +{ +} + +void +intel_batch_stats(struct intel_batch_decode_ctx *ctx, + const uint32_t *batch, uint32_t batch_size, + uint64_t batch_addr, bool from_ring) +{ +} + +void +intel_batch_print_stats(struct intel_batch_decode_ctx *ctx) +{ + mesa_logw("Batch logging not supported on Android."); +} diff --git a/src/intel/common/intel_decoder.h b/src/intel/common/intel_decoder.h index 09c38a73133..9025c8a384a 100644 --- a/src/intel/common/intel_decoder.h +++ b/src/intel/common/intel_decoder.h @@ -273,6 +273,9 @@ struct intel_batch_decode_ctx { int n_batch_buffer_start; uint64_t acthd; + + struct hash_table *commands; + struct hash_table *stats; }; void intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx, @@ -293,6 +296,14 @@ void intel_print_batch(struct intel_batch_decode_ctx *ctx, const uint32_t *batch, uint32_t batch_size, uint64_t batch_addr, bool from_ring); +void intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx); + +void intel_batch_stats(struct intel_batch_decode_ctx *ctx, + const uint32_t *batch, uint32_t batch_size, + uint64_t batch_addr, bool from_ring); + +void intel_batch_print_stats(struct intel_batch_decode_ctx *ctx); + #ifdef __cplusplus } #endif diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index 70f340eb487..e7c643131c9 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -105,6 +105,7 @@ static const struct debug_control debug_control[] = { { "isl", DEBUG_ISL }, { "sparse", DEBUG_SPARSE }, { "draw_bkp", DEBUG_DRAW_BKP }, + { "bat-stats", DEBUG_BATCH_STATS }, { NULL, 0 } }; diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index dde6b15f8d0..727d77d04a6 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -95,6 +95,7 @@ extern uint64_t intel_debug; #define DEBUG_ISL (1ull << 47) #define DEBUG_SPARSE (1ull << 48) #define DEBUG_DRAW_BKP (1ull << 49) +#define DEBUG_BATCH_STATS (1ull << 50) #define DEBUG_ANY (~0ull) diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index b3afc5ebd93..2f8039e0bc0 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1190,7 +1190,7 @@ anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue, struct anv_query_pool *perf_query_pool, uint32_t perf_query_pass) { - if (!INTEL_DEBUG(DEBUG_BATCH)) + if (!INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) return; struct anv_device *device = queue->device; @@ -1209,19 +1209,28 @@ anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue, uint64_t pass_batch_offset = khr_perf_query_preamble_offset(perf_query_pool, perf_query_pass); - intel_print_batch(queue->decoder, - pass_batch_bo->map + pass_batch_offset, 64, - pass_batch_bo->offset + pass_batch_offset, false); + if (INTEL_DEBUG(DEBUG_BATCH)) { + intel_print_batch(queue->decoder, + pass_batch_bo->map + pass_batch_offset, 64, + pass_batch_bo->offset + pass_batch_offset, false); + } } for (uint32_t i = 0; i < cmd_buffer_count; i++) { - struct anv_batch_bo **bo = u_vector_tail(&cmd_buffers[i]->seen_bbos); + struct anv_batch_bo *bbo = + list_first_entry(&cmd_buffers[i]->batch_bos, struct anv_batch_bo, link); device->cmd_buffer_being_decoded = cmd_buffers[i]; - intel_print_batch(queue->decoder, (*bo)->bo->map, - (*bo)->bo->size, (*bo)->bo->offset, false); + if (INTEL_DEBUG(DEBUG_BATCH)) { + intel_print_batch(queue->decoder, bbo->bo->map, + bbo->bo->size, bbo->bo->offset, false); + } + if (INTEL_DEBUG(DEBUG_BATCH_STATS)) { + intel_batch_stats(queue->decoder, bbo->bo->map, + bbo->bo->size, bbo->bo->offset, false); + } device->cmd_buffer_being_decoded = NULL; } - } else { + } else if (INTEL_DEBUG(DEBUG_BATCH)) { intel_print_batch(queue->decoder, device->trivial_batch_bo->map, device->trivial_batch_bo->size, device->trivial_batch_bo->offset, false); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 55e95569b18..0ae707e6cec 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3071,7 +3071,7 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_alloc; - if (INTEL_DEBUG(DEBUG_BATCH)) { + if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) { for (unsigned i = 0; i < physical_device->queue.family_count; i++) { struct intel_batch_decode_ctx *decoder = &device->decoder[i]; @@ -3082,6 +3082,7 @@ VkResult anv_CreateDevice( &physical_device->info, stderr, decode_flags, NULL, decode_get_bo, NULL, device); + intel_batch_stats_reset(decoder); decoder->engine = physical_device->queue.families[i].engine_class; decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr; @@ -3644,9 +3645,12 @@ void anv_DestroyDevice( anv_device_destroy_context_or_vm(device); - if (INTEL_DEBUG(DEBUG_BATCH)) { - for (unsigned i = 0; i < pdevice->queue.family_count; i++) + if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) { + for (unsigned i = 0; i < pdevice->queue.family_count; i++) { + if (INTEL_DEBUG(DEBUG_BATCH_STATS)) + intel_batch_print_stats(&device->decoder[i]); intel_batch_decode_ctx_finish(&device->decoder[i]); + } } close(device->fd);