From 5acedf5b31f475ffa2e16532ad8fbb3914feb7c2 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Tue, 19 Aug 2025 13:02:21 +0000 Subject: [PATCH] pan/bi: Prioritize consts moved to the FAU Instead of allocating constants to the FAU entries on a first-come-first-serve basis, it would be more efficient to put the most frequently used constants in the FAU so we save the most amount of ADD_IMM to push constants into registers. This commit does so using a simple pass before the main constant lowering pass. Reviewed-by: Mary Guillemard Part-of: --- src/panfrost/compiler/bifrost_compile.c | 38 +++++++++++- .../valhall/test/test-lower-constants.cpp | 4 +- src/panfrost/compiler/valhall/va_compiler.h | 3 +- .../compiler/valhall/va_lower_constants.c | 62 ++++++++++++++++--- 4 files changed, 94 insertions(+), 13 deletions(-) diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index ef04834f22b..b6192359f39 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2020 Collabora Ltd. * Copyright (C) 2022 Alyssa Rosenzweig + * Copyright (C) 2025 Arm Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +32,7 @@ #include "panfrost/util/pan_ir.h" #include "util/perf/cpu_trace.h" #include "util/u_debug.h" +#include "util/u_qsort.h" #include "bifrost/disassemble.h" #include "panfrost/lib/pan_props.h" @@ -6092,6 +6094,14 @@ void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id) } } +static int +compare_u32(const void* a, const void* b, void* _) +{ + const uint32_t va = (uintptr_t)a; + const uint32_t vb = (uintptr_t)b; + return va - vb; +} + static bi_context * bi_compile_variant_nir(nir_shader *nir, const struct pan_compile_inputs *inputs, @@ -6236,17 +6246,43 @@ bi_compile_variant_nir(nir_shader *nir, va_lower_isel(ctx); va_optimize(ctx); + /* Count how often a specific constant appears. */ + struct hash_table_u64 *const_hist = _mesa_hash_table_u64_create(ctx); bi_foreach_instr_global_safe(ctx, I) { /* Phis become single moves so shouldn't be affected */ if (I->op == BI_OPCODE_PHI) continue; - va_lower_constants(ctx, I); + va_count_constants(ctx, I, const_hist); + } + + uint32_t const_amount = _mesa_hash_table_u64_num_entries(const_hist); + uint32_t *sorted = rzalloc_array(ctx, uint32_t, const_amount); + + uint32_t idx = 0; + hash_table_u64_foreach(const_hist, entry) + { + sorted[idx++] = (uintptr_t)entry.data; + } + + util_qsort_r(sorted, const_amount, sizeof(uint32_t), compare_u32, NULL); + uint32_t max_amount = MIN2(const_amount, ctx->inputs->fau_consts.max_amount); + uint32_t min_count_for_fau = max_amount > 0 ? sorted[max_amount - 1] : 0; + ralloc_free(sorted); + + bi_foreach_instr_global_safe(ctx, I) { + /* Phis become single moves so shouldn't be affected */ + if (I->op == BI_OPCODE_PHI) + continue; + + va_lower_constants(ctx, I, const_hist, min_count_for_fau); bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); va_repair_fau(&b, I); } + _mesa_hash_table_u64_destroy(const_hist); + /* We need to clean up after constant lowering */ if (likely(optimize)) { bi_opt_cse(ctx); diff --git a/src/panfrost/compiler/valhall/test/test-lower-constants.cpp b/src/panfrost/compiler/valhall/test/test-lower-constants.cpp index d58805392fd..b409fe3bca6 100644 --- a/src/panfrost/compiler/valhall/test/test-lower-constants.cpp +++ b/src/panfrost/compiler/valhall/test/test-lower-constants.cpp @@ -30,9 +30,11 @@ static inline void add_imm(bi_context *ctx) { + struct hash_table_u64 *stats = _mesa_hash_table_u64_create(ctx); bi_foreach_instr_global(ctx, I) { - va_lower_constants(ctx, I); + va_lower_constants(ctx, I, stats, UINT32_MAX); } + _mesa_hash_table_u64_destroy(stats); } #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm) diff --git a/src/panfrost/compiler/valhall/va_compiler.h b/src/panfrost/compiler/valhall/va_compiler.h index 22f58955e28..00650110539 100644 --- a/src/panfrost/compiler/valhall/va_compiler.h +++ b/src/panfrost/compiler/valhall/va_compiler.h @@ -38,7 +38,8 @@ bool va_validate_fau(bi_instr *I); void va_validate(FILE *fp, bi_context *ctx); void va_repair_fau(bi_builder *b, bi_instr *I); void va_fuse_add_imm(bi_instr *I); -void va_lower_constants(bi_context *ctx, bi_instr *I); +void va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count); +void va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts); void va_lower_isel(bi_context *ctx); void va_assign_slots(bi_context *ctx); void va_insert_flow_control_nops(bi_context *ctx); diff --git a/src/panfrost/compiler/valhall/va_lower_constants.c b/src/panfrost/compiler/valhall/va_lower_constants.c index 18db8403af4..dc4c489d857 100644 --- a/src/panfrost/compiler/valhall/va_lower_constants.c +++ b/src/panfrost/compiler/valhall/va_lower_constants.c @@ -136,11 +136,10 @@ va_move_const_to_fau(bi_builder *b, uint32_t value) } static bi_index -va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, - bool is_signed, bool staging) +va_lookup_constant(uint32_t value, struct va_src_info info, bool is_signed) { /* Try the constant as-is */ - if (!staging) { + { bi_index lut = va_lut_index_32(value); if (!bi_is_null(lut)) return lut; @@ -162,8 +161,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, /* Try using a single half of a FP16 constant */ bool replicated_halves = (value & 0xFFFF) == (value >> 16); - if (!staging && info.swizzle && info.size == VA_SIZE_16 && - replicated_halves) { + if (info.swizzle && info.size == VA_SIZE_16 && replicated_halves) { bi_index lut = va_lut_index_16(value & 0xFFFF); if (!bi_is_null(lut)) return lut; @@ -177,7 +175,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, } /* Try extending a byte */ - if (!staging && (info.widen || info.lanes || info.lane) && + if ((info.widen || info.lanes || info.lane) && is_extension_of_8(value, is_signed)) { bi_index lut = va_lut_index_8(value & 0xFF); @@ -186,7 +184,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, } /* Try extending a halfword */ - if (!staging && info.widen && is_extension_of_16(value, is_signed)) { + if (info.widen && is_extension_of_16(value, is_signed)) { bi_index lut = va_lut_index_16(value & 0xFFFF); if (!bi_is_null(lut)) @@ -194,7 +192,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, } /* Try demoting the constant to FP16 */ - if (!staging && info.swizzle && info.size == VA_SIZE_32) { + if (info.swizzle && info.size == VA_SIZE_32) { bi_index lut = va_demote_constant_fp16(value); if (!bi_is_null(lut)) return lut; @@ -206,7 +204,20 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, } } + return bi_null(); +} + +static bi_index +va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, + bool is_signed, bool staging, bool try_move_to_fau) +{ if (!staging) { + bi_index lut = va_lookup_constant(value, info, is_signed); + if (!bi_is_null(lut)) + return lut; + } + + if (!staging && try_move_to_fau) { bi_index c = va_move_const_to_fau(b, value); if (!bi_is_null(c)) return c; @@ -255,7 +266,7 @@ va_resolve_swizzles(bi_context *ctx, bi_instr *I, unsigned s) } void -va_lower_constants(bi_context *ctx, bi_instr *I) +va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count) { bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); @@ -269,8 +280,11 @@ va_lower_constants(bi_context *ctx, bi_instr *I) struct va_src_info info = va_src_info(I->op, s); const uint32_t value = va_resolve_swizzles(ctx, I, s); + const uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value); + const bool move_to_fau = count >= min_fau_count; + bi_index cons = - va_resolve_constant(&b, value, info, is_signed, staging); + va_resolve_constant(&b, value, info, is_signed, staging, move_to_fau); cons.neg ^= I->src[s].neg; I->src[s] = cons; @@ -290,3 +304,31 @@ va_lower_constants(bi_context *ctx, bi_instr *I) } } } + +void +va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts) +{ + bi_foreach_src(I, s) { + if (I->src[s].type != BI_INDEX_CONSTANT) + continue; + + const bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs); + if (staging) + continue; + + bool is_signed = valhall_opcodes[I->op].is_signed; + struct va_src_info info = va_src_info(I->op, s); + uint32_t value = va_resolve_swizzles(ctx, I, s); + + bi_index cons = va_lookup_constant(value, info, is_signed); + + const bool can_lut = !bi_is_null(cons); + + /* We want to move constants that can't be created from built-in + * constants into the FAU if they are not staging register sources. */ + if (!can_lut) { + uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value); + _mesa_hash_table_u64_insert(counts, value, (void*)(uintptr_t)(count + 1)); + } + } +}