mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
pan/bi: Push UBOs on Bifrost
Based on the Midgard pass. Results look better since Midgard already had a basic UBO pushing pass to begin with. Particularly nice to see the dramatic reduction in spilling. total instructions in shared programs: 169141 -> 161215 (-4.69%) instructions in affected programs: 164102 -> 156176 (-4.83%) helped: 1269 HURT: 90 helped stats (abs) min: 1 max: 61 x̄: 6.50 x̃: 4 helped stats (rel) min: 0.15% max: 17.58% x̄: 6.31% x̃: 5.88% HURT stats (abs) min: 1 max: 170 x̄: 3.58 x̃: 1 HURT stats (rel) min: 0.08% max: 133.33% x̄: 16.65% x̃: 5.26% 95% mean confidence interval for instructions value: -6.28 -5.38 95% mean confidence interval for instructions %-change: -5.39% -4.18% Instructions are helped. total nops in shared programs: 121049 -> 120997 (-0.04%) nops in affected programs: 110024 -> 109972 (-0.05%) helped: 501 HURT: 758 helped stats (abs) min: 1 max: 45 x̄: 5.54 x̃: 2 helped stats (rel) min: 0.25% max: 47.06% x̄: 6.81% x̃: 4.55% HURT stats (abs) min: 1 max: 102 x̄: 3.59 x̃: 3 HURT stats (rel) min: 0.32% max: 50.00% x̄: 7.13% x̃: 6.06% 95% mean confidence interval for nops value: -0.45 0.37 95% mean confidence interval for nops %-change: 1.07% 2.09% Inconclusive result (value mean confidence interval includes 0). total clauses in shared programs: 40388 -> 31610 (-21.73%) clauses in affected programs: 38825 -> 30047 (-22.61%) helped: 1367 HURT: 2 helped stats (abs) min: 1 max: 58 x̄: 6.43 x̃: 5 helped stats (rel) min: 1.34% max: 55.56% x̄: 24.97% x̃: 25.00% HURT stats (abs) min: 2 max: 12 x̄: 7.00 x̃: 7 HURT stats (rel) min: 5.08% max: 6.67% x̄: 5.88% x̃: 5.88% 95% mean confidence interval for clauses value: -6.74 -6.08 95% mean confidence interval for clauses %-change: -25.50% -24.35% Clauses are helped. total quadwords in shared programs: 144937 -> 130686 (-9.83%) quadwords in affected programs: 140419 -> 126168 (-10.15%) helped: 1369 HURT: 13 helped stats (abs) min: 1 max: 112 x̄: 10.50 x̃: 7 helped stats (rel) min: 0.23% max: 31.82% x̄: 11.36% x̃: 10.78% HURT stats (abs) min: 1 max: 106 x̄: 10.00 x̃: 1 HURT stats (rel) min: 5.88% max: 10.24% x̄: 9.26% x̃: 10.00% 95% mean confidence interval for quadwords value: -10.96 -9.66 95% mean confidence interval for quadwords %-change: -11.52% -10.82% Quadwords are helped. total spills in shared programs: 1106 -> 705 (-36.26%) spills in affected programs: 1058 -> 657 (-37.90%) helped: 41 HURT: 0 total fills in shared programs: 2241 -> 1645 (-26.60%) fills in affected programs: 2219 -> 1623 (-26.86%) helped: 43 HURT: 2 Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
This commit is contained in:
parent
040a350b1e
commit
a27d76a2d9
5 changed files with 167 additions and 0 deletions
|
|
@ -11,6 +11,7 @@ bifrost_FILES := \
|
|||
bifrost/bi_ra.c \
|
||||
bifrost/bi_opt_copy_prop.c \
|
||||
bifrost/bi_opt_dce.c \
|
||||
bifrost/bi_opt_push_ubo.c \
|
||||
bifrost/bi_quirks.h \
|
||||
bifrost/bi_test_pack.c \
|
||||
bifrost/bir.c \
|
||||
|
|
|
|||
159
src/panfrost/bifrost/bi_opt_push_ubo.c
Normal file
159
src/panfrost/bifrost/bi_opt_push_ubo.c
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Collabora, Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
/* This optimization pass, intended to run once after code emission but before
|
||||
* copy propagation, analyzes direct word-aligned UBO reads and promotes a
|
||||
* subset to moves from FAU. It is the sole populator of the UBO push data
|
||||
* structure returned back to the command stream. */
|
||||
|
||||
static bool
|
||||
bi_is_direct_aligned_ubo(bi_instr *ins)
|
||||
{
|
||||
return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
|
||||
(ins->seg == BI_SEG_UBO) &&
|
||||
(ins->src[0].type == BI_INDEX_CONSTANT) &&
|
||||
(ins->src[1].type == BI_INDEX_CONSTANT) &&
|
||||
((ins->src[0].value & 0x3) == 0);
|
||||
}
|
||||
|
||||
/* Represents use data for a single UBO */
|
||||
|
||||
#define MAX_UBO_WORDS (65536 / 16)
|
||||
|
||||
struct bi_ubo_block {
|
||||
BITSET_DECLARE(pushed, MAX_UBO_WORDS);
|
||||
uint8_t range[MAX_UBO_WORDS];
|
||||
};
|
||||
|
||||
struct bi_ubo_analysis {
|
||||
/* Per block analysis */
|
||||
unsigned nr_blocks;
|
||||
struct bi_ubo_block *blocks;
|
||||
};
|
||||
|
||||
static struct bi_ubo_analysis
|
||||
bi_analyze_ranges(bi_context *ctx)
|
||||
{
|
||||
struct bi_ubo_analysis res = {
|
||||
.nr_blocks = ctx->nir->info.num_ubos + 1,
|
||||
};
|
||||
|
||||
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
|
||||
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (!bi_is_direct_aligned_ubo(ins)) continue;
|
||||
|
||||
unsigned ubo = ins->src[1].value;
|
||||
unsigned word = ins->src[0].value / 4;
|
||||
unsigned channels = bi_opcode_props[ins->op].sr_count;
|
||||
|
||||
assert(ubo < res.nr_blocks);
|
||||
assert(channels > 0 && channels <= 4);
|
||||
|
||||
if (word < MAX_UBO_WORDS)
|
||||
res.blocks[ubo].range[word] = channels;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Select UBO words to push. A sophisticated implementation would consider the
|
||||
* number of uses and perhaps the control flow to estimate benefit. This is not
|
||||
* sophisticated. Select from the last UBO first to prioritize sysvals. */
|
||||
|
||||
static void
|
||||
bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
|
||||
{
|
||||
for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
|
||||
struct bi_ubo_block *block = &analysis->blocks[ubo];
|
||||
|
||||
for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
|
||||
unsigned range = block->range[r];
|
||||
|
||||
/* Don't push something we don't access */
|
||||
if (range == 0) continue;
|
||||
|
||||
/* Don't push more than possible */
|
||||
if (push->count > PAN_MAX_PUSH - range)
|
||||
return;
|
||||
|
||||
for (unsigned offs = 0; offs < range; ++offs) {
|
||||
struct panfrost_ubo_word word = {
|
||||
.ubo = ubo,
|
||||
.offset = (r + offs) * 4
|
||||
};
|
||||
|
||||
push->words[push->count++] = word;
|
||||
}
|
||||
|
||||
/* Mark it as pushed so we can rewrite */
|
||||
BITSET_SET(block->pushed, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_push_ubo(bi_context *ctx)
|
||||
{
|
||||
/* This pass only runs once */
|
||||
assert(ctx->push->count == 0);
|
||||
|
||||
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
|
||||
bi_pick_ubo(ctx->push, &analysis);
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
if (!bi_is_direct_aligned_ubo(ins)) continue;
|
||||
|
||||
unsigned ubo = ins->src[1].value;
|
||||
unsigned offset = ins->src[0].value;
|
||||
|
||||
/* Check if we decided to push this */
|
||||
assert(ubo < analysis.nr_blocks);
|
||||
if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) continue;
|
||||
|
||||
/* Replace the UBO load with moves from FAU */
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
|
||||
unsigned channels = bi_opcode_props[ins->op].sr_count;
|
||||
|
||||
for (unsigned w = 0; w < channels; ++w) {
|
||||
/* FAU is grouped in pairs (2 x 4-byte) */
|
||||
unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo,
|
||||
(offset + 4 * w));
|
||||
|
||||
unsigned fau_idx = (base >> 1);
|
||||
unsigned fau_hi = (base & 1);
|
||||
|
||||
bi_mov_i32_to(&b,
|
||||
bi_word(ins->dest[0], w),
|
||||
bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi));
|
||||
}
|
||||
|
||||
bi_remove_instruction(ins);
|
||||
}
|
||||
|
||||
free(analysis.blocks);
|
||||
}
|
||||
|
|
@ -2446,6 +2446,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
ctx->arch = inputs->gpu_id >> 12;
|
||||
ctx->is_blend = inputs->is_blend;
|
||||
ctx->blend_desc = inputs->blend.bifrost_blend_desc;
|
||||
ctx->push = &program->push;
|
||||
memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants));
|
||||
list_inithead(&ctx->blocks);
|
||||
|
||||
|
|
@ -2512,6 +2513,9 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
bi_cull_dead_branch(block);
|
||||
}
|
||||
|
||||
/* Runs before copy prop */
|
||||
bi_opt_push_ubo(ctx);
|
||||
|
||||
bool progress = false;
|
||||
|
||||
do {
|
||||
|
|
|
|||
|
|
@ -498,6 +498,7 @@ typedef struct {
|
|||
gl_shader_stage stage;
|
||||
struct list_head blocks; /* list of bi_block */
|
||||
struct panfrost_sysvals sysvals;
|
||||
struct panfrost_ubo_push *push;
|
||||
uint32_t quirks;
|
||||
unsigned arch;
|
||||
unsigned tls_size;
|
||||
|
|
@ -748,6 +749,7 @@ void bi_print_shader(bi_context *ctx, FILE *fp);
|
|||
|
||||
bool bi_opt_copy_prop(bi_context *ctx);
|
||||
bool bi_opt_dead_code_eliminate(bi_context *ctx, bi_block *block, bool soft);
|
||||
void bi_opt_push_ubo(bi_context *ctx);
|
||||
void bi_schedule(bi_context *ctx);
|
||||
void bi_register_allocate(bi_context *ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ libpanfrost_bifrost_files = files(
|
|||
'bi_print.c',
|
||||
'bi_opt_copy_prop.c',
|
||||
'bi_opt_dce.c',
|
||||
'bi_opt_push_ubo.c',
|
||||
'bi_pack.c',
|
||||
'bi_ra.c',
|
||||
'bi_schedule.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue