agx: compact 32-bit constants

we can convert f16->f32 for free on read, so we can compact constants to reduce
register pressure. this makes constant promotion more effective.

this saves a few instructions in "wall and chimney".

total instructions in shared programs: 2039709 -> 2039862 (<.01%)
instructions in affected programs: 12585 -> 12738 (1.22%)
helped: 0
HURT: 3

total bytes in shared programs: 14111800 -> 14112726 (<.01%)
bytes in affected programs: 102778 -> 103704 (0.90%)
helped: 7
HURT: 4
Inconclusive result (value mean confidence interval includes 0).

total uniforms in shared programs: 1533232 -> 1532271 (-0.06%)
uniforms in affected programs: 60255 -> 59294 (-1.59%)
helped: 481
HURT: 0
Uniforms are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
Alyssa Rosenzweig 2024-03-01 18:16:45 -04:00 committed by Marge Bot
parent 61b74894a9
commit fb785a5503
4 changed files with 65 additions and 0 deletions

View file

@ -2822,6 +2822,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
/* After DCE, use counts are right so we can run the optimizer. */
agx_optimizer(ctx);
agx_opt_compact_constants(ctx);
/* After inlining constants, promote what's left */
if (key->promote_constants && !(agx_compiler_debug & AGX_DBG_NOPROMOTE)) {

View file

@ -939,6 +939,7 @@ void agx_lower_pseudo(agx_context *ctx);
void agx_lower_spill(agx_context *ctx);
void agx_lower_uniform_sources(agx_context *ctx);
void agx_opt_cse(agx_context *ctx);
void agx_opt_compact_constants(agx_context *ctx);
void agx_opt_promote_constants(agx_context *ctx);
void agx_dce(agx_context *ctx, bool partial);
void agx_pressure_schedule(agx_context *ctx);

View file

@ -0,0 +1,62 @@
/*
* Copyright 2024 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include "util/bitset.h"
#include "util/half_float.h"
#include "agx_builder.h"
#include "agx_compiler.h"
#include "agx_opcodes.h"
/*
* AGX can convert 16-bit sources to 32-bit for free, so it's beneficial to
* compact 32-bit constants down to 16-bit when doing so is lossless. This
* reduces register pressure (GPR or uniform, depending on whether the constant
* is promoted).
*/
void
agx_opt_compact_constants(agx_context *ctx)
{
/* TODO: Handle ints too */
BITSET_WORD *src_float = calloc(ctx->alloc, sizeof(BITSET_WORD));
BITSET_WORD *src_other = calloc(ctx->alloc, sizeof(BITSET_WORD));
BITSET_WORD *replaced = calloc(ctx->alloc, sizeof(BITSET_WORD));
/* Analyze the types that we read constants as */
agx_foreach_instr_global(ctx, I) {
agx_foreach_ssa_src(I, s) {
if (agx_is_float_src(I, s))
BITSET_SET(src_float, I->src[s].value);
else
BITSET_SET(src_other, I->src[s].value);
}
}
agx_foreach_instr_global(ctx, I) {
if (I->op == AGX_OPCODE_MOV_IMM && I->dest[0].size == AGX_SIZE_32) {
unsigned v = I->dest[0].value;
if (BITSET_TEST(src_float, v) && !BITSET_TEST(src_other, v)) {
/* Try to compact to f16 */
uint16_t compact = _mesa_float_to_half(uif(I->imm));
if (I->imm == fui(_mesa_half_to_float(compact))) {
I->dest[0].size = AGX_SIZE_16;
I->imm = compact;
BITSET_SET(replaced, v);
}
}
} else {
agx_foreach_ssa_src(I, s) {
if (BITSET_TEST(replaced, I->src[s].value)) {
I->src[s].size = AGX_SIZE_16;
}
}
}
}
free(replaced);
free(src_float);
free(src_other);
}

View file

@ -33,6 +33,7 @@ libasahi_agx_files = files(
'agx_opt_break_if.c',
'agx_opt_empty_else.c',
'agx_opt_jmp_none.c',
'agx_opt_compact_constants.c',
'agx_opt_promote_constants.c',
'agx_optimizer.c',
'agx_repair_ssa.c',