brw,anv: use XML-based stats

I didn't bother switching either iris or elk/hasvk but one could.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37517>
This commit is contained in:
Alyssa Rosenzweig 2025-09-22 13:00:49 -04:00 committed by Marge Bot
parent b575b0954a
commit c2ae207e80
13 changed files with 116 additions and 192 deletions

View file

@ -462,7 +462,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges);
struct brw_compile_stats stats[3];
struct genisa_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,

View file

@ -69,7 +69,7 @@ compile_single_bs(const struct brw_compiler *compiler,
struct brw_bs_prog_data *prog_data,
nir_shader *shader,
brw_generator *g,
struct brw_compile_stats *stats,
struct genisa_stats *stats,
int *prog_offset,
uint64_t *bsr)
{

View file

@ -291,7 +291,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
struct brw_compile_stats *stats = params->base.stats;
struct genisa_stats *stats = params->base.stats;
for (unsigned simd = 0; simd < 3; simd++) {
if (prog_data->prog_mask & (1u << simd)) {
assert(v[simd]);

View file

@ -1917,7 +1917,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
nir->info.name));
}
struct brw_compile_stats *stats = params->base.stats;
struct genisa_stats *stats = params->base.stats;
uint32_t max_dispatch_width = 0;
if (vmulti) {
@ -1947,7 +1947,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
max_dispatch_width = 32;
}
for (struct brw_compile_stats *s = params->base.stats; s != NULL && s != stats; s++)
for (struct genisa_stats *s = params->base.stats; s != NULL && s != stats; s++)
s->max_dispatch_width = max_dispatch_width;
g.add_const_data(nir->constant_data, nir->constant_data_size);

View file

@ -33,6 +33,7 @@
#include "util/mesa-sha1.h"
#include "util/enum_operators.h"
#include "util/ralloc.h"
#include "util/shader_stats.h"
#include "util/u_math.h"
#include "brw_isa_info.h"
#include "intel_shader_enums.h"
@ -1427,20 +1428,6 @@ DEFINE_PROG_DATA_DOWNCAST(mesh, prog_data->stage == MESA_SHADER_MESH)
#undef DEFINE_PROG_DATA_DOWNCAST
struct brw_compile_stats {
uint32_t dispatch_width; /**< 0 for vec4 */
uint32_t max_polygons;
uint32_t max_dispatch_width;
uint32_t instructions;
uint32_t sends;
uint32_t loops;
uint32_t cycles;
uint32_t spills;
uint32_t fills;
uint32_t max_live_registers;
uint32_t non_ssa_registers_after_nir;
};
/** @} */
struct brw_compiler *
@ -1483,7 +1470,7 @@ struct brw_compile_params {
nir_shader *nir;
struct brw_compile_stats *stats;
struct genisa_stats *stats;
void *log_data;

View file

@ -733,7 +733,7 @@ brw_generator::enable_debug(const char *shader_name)
int
brw_generator::generate_code(const brw_shader &s,
struct brw_compile_stats *stats)
struct genisa_stats *stats)
{
const int dispatch_width = s.dispatch_width;
struct brw_shader_stats shader_stats = s.shader_stats;
@ -1512,15 +1512,30 @@ brw_generator::generate_code(const brw_shader &s,
if (stats) {
stats->dispatch_width = dispatch_width;
stats->max_polygons = s.max_polygons;
stats->max_dispatch_width = dispatch_width;
stats->instructions = before_size / 16 - nop_count - sync_nop_count;
stats->sends = send_count;
stats->loops = loop_count;
stats->cycles = perf.latency;
stats->spills = shader_stats.spill_count;
stats->fills = shader_stats.fill_count;
stats->instrs = before_size / 16 - nop_count - sync_nop_count;
stats->send_messages = send_count;
stats->loop_count = loop_count;
stats->cycle_count = perf.latency;
stats->spill_count = shader_stats.spill_count;
stats->fill_count = shader_stats.fill_count;
stats->max_live_registers = shader_stats.max_register_pressure;
stats->non_ssa_registers_after_nir = shader_stats.non_ssa_registers_after_nir;
stats->non_ssa_regs_after_nir = shader_stats.non_ssa_registers_after_nir;
stats->source_hash = prog_data->source_hash;
stats->grf_registers = devinfo->ver >= 30 ? prog_data->grf_used : 0;
/* Report the max dispatch width only on the smallest SIMD variant.
*
* XXX: SIMD8 is not the smallest on Xe2. This logic should be adjusted.
*/
if (stage != MESA_SHADER_FRAGMENT || dispatch_width == 8)
stats->max_dispatch_width = dispatch_width;
else
stats->max_dispatch_width = 0;
if (mesa_shader_stage_uses_workgroup(stage))
stats->workgroup_memory_size = prog_data->total_shared;
else
stats->workgroup_memory_size = 0;
}
return start_offset;

View file

@ -19,7 +19,7 @@ public:
void enable_debug(const char *shader_name);
int generate_code(const brw_shader &s,
struct brw_compile_stats *stats);
struct genisa_stats *stats);
void add_const_data(void *data, unsigned size);
void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
const unsigned *get_assembly();

View file

@ -48,6 +48,14 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
return true;
}
static void
check_sends(struct genisa_stats *stats, unsigned send_count)
{
assert(stats->spill_count == 0);
assert(stats->fill_count == 0);
assert(stats->send_messages == send_count);
}
static struct anv_shader_bin *
compile_shader(struct anv_device *device,
enum anv_internal_kernel_name shader_name,
@ -153,7 +161,7 @@ compile_shader(struct anv_device *device,
const unsigned *program;
if (stage == MESA_SHADER_FRAGMENT) {
struct brw_compile_stats stats[3];
struct genisa_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
@ -170,28 +178,18 @@ compile_shader(struct anv_device *device,
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
unsigned stat_idx = 0;
if (prog_data.wm.dispatch_8) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation);
stat_idx++;
check_sends(&stats[stat_idx++], sends_count_expectation);
}
if (prog_data.wm.dispatch_16) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation);
stat_idx++;
check_sends(&stats[stat_idx++], sends_count_expectation);
}
if (prog_data.wm.dispatch_32) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends ==
sends_count_expectation *
(device->info->ver < 20 ? 2 : 1));
stat_idx++;
check_sends(&stats[stat_idx++], sends_count_expectation *
(device->info->ver < 20 ? 2 : 1));
}
}
} else {
struct brw_compile_stats stats;
struct genisa_stats stats;
struct brw_compile_cs_params params = {
.base = {
.nir = nir,
@ -206,9 +204,7 @@ compile_shader(struct anv_device *device,
program = brw_compile_cs(compiler, &params);
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
assert(stats.spills == 0);
assert(stats.fills == 0);
assert(stats.sends == sends_count_expectation);
check_sends(&stats, sends_count_expectation);
}
}

View file

@ -103,7 +103,7 @@ anv_shader_bin_create(struct anv_device *device,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data_in,
uint32_t prog_data_size,
const struct brw_compile_stats *stats, uint32_t num_stats,
const struct genisa_stats *stats, uint32_t num_stats,
const nir_xfb_info *xfb_info_in,
const struct anv_pipeline_bind_map *bind_map,
const struct anv_push_descriptor_info *push_desc_info)
@ -381,7 +381,7 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
void *mem_ctx = ralloc_context(NULL);
uint32_t num_stats = blob_read_uint32(blob);
const struct brw_compile_stats *stats =
const struct genisa_stats *stats =
blob_read_bytes(blob, num_stats * sizeof(stats[0]));
const nir_xfb_info *xfb_info = NULL;

View file

@ -1225,7 +1225,7 @@ struct anv_shader {
const struct brw_stage_prog_data *prog_data;
struct brw_compile_stats stats[3];
struct genisa_stats stats[3];
uint32_t num_stats;
char *nir_str;
@ -5097,7 +5097,7 @@ struct anv_shader_upload_params {
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
const struct brw_compile_stats *stats;
const struct genisa_stats *stats;
uint32_t num_stats;
const struct nir_xfb_info *xfb_info;
@ -5145,7 +5145,7 @@ struct anv_shader_bin {
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
struct brw_compile_stats stats[3];
struct genisa_stats stats[3];
uint32_t num_stats;
struct nir_xfb_info *xfb_info;
@ -5178,7 +5178,7 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
struct anv_pipeline_executable {
mesa_shader_stage stage;
struct brw_compile_stats stats;
struct genisa_stats stats;
char *nir;
char *disasm;

View file

@ -8,6 +8,7 @@
#include "nir/nir_serialize.h"
#include "compiler/brw_disasm.h"
#include "util/shader_stats.h"
static void
anv_shader_destroy(struct vk_device *vk_device,
@ -174,7 +175,7 @@ anv_shader_get_executable_properties(struct vk_device *device,
container_of(vk_shader, struct anv_shader, vk);
for (uint32_t i = 0; i < shader->num_stats; i++) {
const struct brw_compile_stats *stats = &shader->stats[i];
const struct genisa_stats *stats = &shader->stats[i];
vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
mesa_shader_stage stage = vk_shader->stage;
@ -219,145 +220,11 @@ anv_shader_get_executable_statistics(struct vk_device *vk_device,
{
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
statistics, statistic_count);
struct anv_device *device =
container_of(vk_device, struct anv_device, vk);
struct anv_shader *shader =
container_of(vk_shader, struct anv_shader, vk);
assert(executable_index < shader->num_stats);
const struct brw_compile_stats *stats = &shader->stats[executable_index];
const struct brw_stage_prog_data *prog_data = shader->prog_data;
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Instruction Count");
VK_COPY_STR(stat->description,
"Number of GEN instructions in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->instructions;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "SEND Count");
VK_COPY_STR(stat->description,
"Number of instructions in the final generated shader "
"executable which access external units such as the "
"constant cache or the sampler.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->sends;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Loop Count");
VK_COPY_STR(stat->description,
"Number of loops (not unrolled) in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->loops;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Cycle Count");
VK_COPY_STR(stat->description,
"Estimate of the number of EU cycles required to execute "
"the final generated executable. This is an estimate only "
"and may vary greatly from actual run-time performance.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->cycles;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Spill Count");
VK_COPY_STR(stat->description,
"Number of scratch spill operations. This gives a rough "
"estimate of the cost incurred due to spilling temporary "
"values to memory. If this is non-zero, you may want to "
"adjust your shader to reduce register pressure.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->spills;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Fill Count");
VK_COPY_STR(stat->description,
"Number of scratch fill operations. This gives a rough "
"estimate of the cost incurred due to spilling temporary "
"values to memory. If this is non-zero, you may want to "
"adjust your shader to reduce register pressure.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->fills;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Scratch Memory Size");
VK_COPY_STR(stat->description,
"Number of bytes of scratch memory required by the "
"generated shader executable. If this is non-zero, you "
"may want to adjust your shader to reduce register "
"pressure.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = prog_data->total_scratch;
}
if (device->info->ver >= 30) {
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "GRF registers");
VK_COPY_STR(stat->description,
"Number of GRF registers required by the shader.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = prog_data->grf_used;
}
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Max dispatch width");
VK_COPY_STR(stat->description,
"Largest SIMD dispatch width.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
/* Report the max dispatch width only on the smallest SIMD variant */
if (vk_shader->stage != MESA_SHADER_FRAGMENT || stats->dispatch_width == 8)
stat->value.u64 = stats->max_dispatch_width;
else
stat->value.u64 = 0;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Max live registers");
VK_COPY_STR(stat->description,
"Maximum number of registers used across the entire shader.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->max_live_registers;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Workgroup Memory Size");
VK_COPY_STR(stat->description,
"Number of bytes of workgroup shared memory used by this "
"shader including any padding.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
if (mesa_shader_stage_uses_workgroup(vk_shader->stage))
stat->value.u64 = prog_data->total_shared;
else
stat->value.u64 = 0;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Source hash");
VK_PRINT_STR(stat->description,
"hash = 0x%08x. Hash generated from shader source.",
prog_data->source_hash);
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = prog_data->source_hash;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
VK_COPY_STR(stat->name, "Non SSA regs after NIR");
VK_COPY_STR(stat->description, "Non SSA regs after NIR translation to BRW.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = stats->non_ssa_registers_after_nir;
}
vk_add_genisa_stats(out, &shader->stats[executable_index]);
return VK_SUCCESS;
}

View file

@ -93,7 +93,7 @@ struct anv_shader_data {
const nir_xfb_info *xfb_info;
uint32_t num_stats;
struct brw_compile_stats stats[3];
struct genisa_stats stats[3];
char *disasm[3];
bool use_primitive_replication;

View file

@ -121,4 +121,63 @@
<stat name="SMEM">Number of SMEM instructions</stat>
<stat name="VOPD" more="better">Number of VOPD instructions</stat>
</isa>
<family name="Intel">
<isa name="GenISA">
<stat name="Dispatch width" hidden="true">0 for vec4</stat>
<stat name="Max polygons" hidden="true"/>
<stat name="Instruction Count" display="Instrs">
Number of GEN instructions in the final generated shader executable.
</stat>
<stat name="SEND Count" display="Send messages">
Number of instructions in the final generated shader executable
which access external units such as the constant cache or the sampler.
</stat>
<stat name="Loop Count" display="Loop count">
Number of loops (not unrolled) in the final generated shader
executable.
</stat>
<stat name="Cycle Count" display="Cycle count">
Estimate of the number of EU cycles required to execute the final
generated executable. This is an estimate only and may vary greatly
from actual run-time performance.
</stat>
<stat name="Spill Count" display="Spill count">
Number of scratch spill operations. This gives a rough estimate of
the cost incurred due to spilling temporary values to memory. If
this is non-zero, you may want to adjust your shader to reduce
register pressure.
</stat>
<stat name="Fill Count" display="Fill count">
Number of scratch fill operations. This gives a rough estimate of
the cost incurred due to spilling temporary values to memory. If
this is non-zero, you may want to adjust your shader to reduce
register pressure.
</stat>
<stat name="Scratch Memory Size">
Number of bytes of scratch memory required by the generated shader
executable. If this is non-zero, you may want to adjust your shader
to reduce register pressure.
</stat>
<stat name="GRF registers">
Number of GRF registers required by the shader.
</stat>
<stat name="Max dispatch width" more="better">
Largest SIMD dispatch width.
</stat>
<stat name="Max live registers">
Maximum number of registers used across the entire shader.
</stat>
<stat name="Workgroup Memory Size">
Number of bytes of workgroup shared memory used by this shader
including any padding.
</stat>
<stat name="Non SSA regs after NIR">
Non SSA regs after NIR translation to BRW.
</stat>
<stat name="Source hash" hash="true">
Hash generated from shader source.
</stat>
</isa>
</family>
</shaderdb>