mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 21:30:23 +01:00
pan/bi: Prioritize consts moved to the FAU
Instead of allocating constants to the FAU entries on a first-come-first-serve basis, it would be more efficient to put the most frequently used constants in the FAU so we save the most amount of ADD_IMM to push constants into registers. This commit does so using a simple pass before the main constant lowering pass. Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36872>
This commit is contained in:
parent
e83ca0e954
commit
5acedf5b31
4 changed files with 94 additions and 13 deletions
|
|
@ -1,6 +1,7 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
* Copyright (C) 2022 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
* Copyright (C) 2025 Arm Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
|
@ -31,6 +32,7 @@
|
|||
#include "panfrost/util/pan_ir.h"
|
||||
#include "util/perf/cpu_trace.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_qsort.h"
|
||||
|
||||
#include "bifrost/disassemble.h"
|
||||
#include "panfrost/lib/pan_props.h"
|
||||
|
|
@ -6092,6 +6094,14 @@ void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
|
|||
}
|
||||
}
|
||||
|
||||
static int
|
||||
compare_u32(const void* a, const void* b, void* _)
|
||||
{
|
||||
const uint32_t va = (uintptr_t)a;
|
||||
const uint32_t vb = (uintptr_t)b;
|
||||
return va - vb;
|
||||
}
|
||||
|
||||
static bi_context *
|
||||
bi_compile_variant_nir(nir_shader *nir,
|
||||
const struct pan_compile_inputs *inputs,
|
||||
|
|
@ -6236,17 +6246,43 @@ bi_compile_variant_nir(nir_shader *nir,
|
|||
va_lower_isel(ctx);
|
||||
va_optimize(ctx);
|
||||
|
||||
/* Count how often a specific constant appears. */
|
||||
struct hash_table_u64 *const_hist = _mesa_hash_table_u64_create(ctx);
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
/* Phis become single moves so shouldn't be affected */
|
||||
if (I->op == BI_OPCODE_PHI)
|
||||
continue;
|
||||
|
||||
va_lower_constants(ctx, I);
|
||||
va_count_constants(ctx, I, const_hist);
|
||||
}
|
||||
|
||||
uint32_t const_amount = _mesa_hash_table_u64_num_entries(const_hist);
|
||||
uint32_t *sorted = rzalloc_array(ctx, uint32_t, const_amount);
|
||||
|
||||
uint32_t idx = 0;
|
||||
hash_table_u64_foreach(const_hist, entry)
|
||||
{
|
||||
sorted[idx++] = (uintptr_t)entry.data;
|
||||
}
|
||||
|
||||
util_qsort_r(sorted, const_amount, sizeof(uint32_t), compare_u32, NULL);
|
||||
uint32_t max_amount = MIN2(const_amount, ctx->inputs->fau_consts.max_amount);
|
||||
uint32_t min_count_for_fau = max_amount > 0 ? sorted[max_amount - 1] : 0;
|
||||
ralloc_free(sorted);
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
/* Phis become single moves so shouldn't be affected */
|
||||
if (I->op == BI_OPCODE_PHI)
|
||||
continue;
|
||||
|
||||
va_lower_constants(ctx, I, const_hist, min_count_for_fau);
|
||||
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
va_repair_fau(&b, I);
|
||||
}
|
||||
|
||||
_mesa_hash_table_u64_destroy(const_hist);
|
||||
|
||||
/* We need to clean up after constant lowering */
|
||||
if (likely(optimize)) {
|
||||
bi_opt_cse(ctx);
|
||||
|
|
|
|||
|
|
@ -30,9 +30,11 @@
|
|||
static inline void
|
||||
add_imm(bi_context *ctx)
|
||||
{
|
||||
struct hash_table_u64 *stats = _mesa_hash_table_u64_create(ctx);
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
va_lower_constants(ctx, I);
|
||||
va_lower_constants(ctx, I, stats, UINT32_MAX);
|
||||
}
|
||||
_mesa_hash_table_u64_destroy(stats);
|
||||
}
|
||||
|
||||
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ bool va_validate_fau(bi_instr *I);
|
|||
void va_validate(FILE *fp, bi_context *ctx);
|
||||
void va_repair_fau(bi_builder *b, bi_instr *I);
|
||||
void va_fuse_add_imm(bi_instr *I);
|
||||
void va_lower_constants(bi_context *ctx, bi_instr *I);
|
||||
void va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count);
|
||||
void va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts);
|
||||
void va_lower_isel(bi_context *ctx);
|
||||
void va_assign_slots(bi_context *ctx);
|
||||
void va_insert_flow_control_nops(bi_context *ctx);
|
||||
|
|
|
|||
|
|
@ -136,11 +136,10 @@ va_move_const_to_fau(bi_builder *b, uint32_t value)
|
|||
}
|
||||
|
||||
static bi_index
|
||||
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
||||
bool is_signed, bool staging)
|
||||
va_lookup_constant(uint32_t value, struct va_src_info info, bool is_signed)
|
||||
{
|
||||
/* Try the constant as-is */
|
||||
if (!staging) {
|
||||
{
|
||||
bi_index lut = va_lut_index_32(value);
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
|
|
@ -162,8 +161,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
|||
|
||||
/* Try using a single half of a FP16 constant */
|
||||
bool replicated_halves = (value & 0xFFFF) == (value >> 16);
|
||||
if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
|
||||
replicated_halves) {
|
||||
if (info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
|
||||
bi_index lut = va_lut_index_16(value & 0xFFFF);
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
|
|
@ -177,7 +175,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
|||
}
|
||||
|
||||
/* Try extending a byte */
|
||||
if (!staging && (info.widen || info.lanes || info.lane) &&
|
||||
if ((info.widen || info.lanes || info.lane) &&
|
||||
is_extension_of_8(value, is_signed)) {
|
||||
|
||||
bi_index lut = va_lut_index_8(value & 0xFF);
|
||||
|
|
@ -186,7 +184,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
|||
}
|
||||
|
||||
/* Try extending a halfword */
|
||||
if (!staging && info.widen && is_extension_of_16(value, is_signed)) {
|
||||
if (info.widen && is_extension_of_16(value, is_signed)) {
|
||||
|
||||
bi_index lut = va_lut_index_16(value & 0xFFFF);
|
||||
if (!bi_is_null(lut))
|
||||
|
|
@ -194,7 +192,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
|||
}
|
||||
|
||||
/* Try demoting the constant to FP16 */
|
||||
if (!staging && info.swizzle && info.size == VA_SIZE_32) {
|
||||
if (info.swizzle && info.size == VA_SIZE_32) {
|
||||
bi_index lut = va_demote_constant_fp16(value);
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
|
|
@ -206,7 +204,20 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
|||
}
|
||||
}
|
||||
|
||||
return bi_null();
|
||||
}
|
||||
|
||||
static bi_index
|
||||
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
||||
bool is_signed, bool staging, bool try_move_to_fau)
|
||||
{
|
||||
if (!staging) {
|
||||
bi_index lut = va_lookup_constant(value, info, is_signed);
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
|
||||
if (!staging && try_move_to_fau) {
|
||||
bi_index c = va_move_const_to_fau(b, value);
|
||||
if (!bi_is_null(c))
|
||||
return c;
|
||||
|
|
@ -255,7 +266,7 @@ va_resolve_swizzles(bi_context *ctx, bi_instr *I, unsigned s)
|
|||
}
|
||||
|
||||
void
|
||||
va_lower_constants(bi_context *ctx, bi_instr *I)
|
||||
va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count)
|
||||
{
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
|
||||
|
|
@ -269,8 +280,11 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
|
|||
struct va_src_info info = va_src_info(I->op, s);
|
||||
const uint32_t value = va_resolve_swizzles(ctx, I, s);
|
||||
|
||||
const uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value);
|
||||
const bool move_to_fau = count >= min_fau_count;
|
||||
|
||||
bi_index cons =
|
||||
va_resolve_constant(&b, value, info, is_signed, staging);
|
||||
va_resolve_constant(&b, value, info, is_signed, staging, move_to_fau);
|
||||
cons.neg ^= I->src[s].neg;
|
||||
I->src[s] = cons;
|
||||
|
||||
|
|
@ -290,3 +304,31 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts)
|
||||
{
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type != BI_INDEX_CONSTANT)
|
||||
continue;
|
||||
|
||||
const bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
|
||||
if (staging)
|
||||
continue;
|
||||
|
||||
bool is_signed = valhall_opcodes[I->op].is_signed;
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
uint32_t value = va_resolve_swizzles(ctx, I, s);
|
||||
|
||||
bi_index cons = va_lookup_constant(value, info, is_signed);
|
||||
|
||||
const bool can_lut = !bi_is_null(cons);
|
||||
|
||||
/* We want to move constants that can't be created from built-in
|
||||
* constants into the FAU if they are not staging register sources. */
|
||||
if (!can_lut) {
|
||||
uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value);
|
||||
_mesa_hash_table_u64_insert(counts, value, (void*)(uintptr_t)(count + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue