mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
pan/bi: Calculate dependency graph when bundling
Code is ported from Midgard, modified to be scalar, post-RA, and to put the arrays on the worklist instead of the instruction to save memory. This enables out-of-order scheduling. total tuples in shared programs: 128691 -> 125304 (-2.63%) tuples in affected programs: 114091 -> 110704 (-2.97%) helped: 844 HURT: 377 helped stats (abs) min: 1.0 max: 150.0 x̄: 4.88 x̃: 3 helped stats (rel) min: 0.30% max: 26.42% x̄: 5.56% x̃: 4.35% HURT stats (abs) min: 1.0 max: 8.0 x̄: 1.94 x̃: 1 HURT stats (rel) min: 0.20% max: 33.33% x̄: 6.84% x̃: 3.23% 95% mean confidence interval for tuples value: -3.16 -2.38 95% mean confidence interval for tuples %-change: -2.19% -1.27% Tuples are helped. total clauses in shared programs: 27579 -> 26059 (-5.51%) clauses in affected programs: 20606 -> 19086 (-7.38%) helped: 941 HURT: 39 helped stats (abs) min: 1.0 max: 21.0 x̄: 1.66 x̃: 1 helped stats (rel) min: 0.69% max: 44.44% x̄: 10.48% x̃: 9.09% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.15 x̃: 1 HURT stats (rel) min: 1.89% max: 10.00% x̄: 4.73% x̃: 4.55% 95% mean confidence interval for clauses value: -1.63 -1.47 95% mean confidence interval for clauses %-change: -10.27% -9.48% Clauses are helped. total cycles in shared programs: 12262.54 -> 12154.79 (-0.88%) cycles in affected programs: 2210.54 -> 2102.79 (-4.87%) helped: 374 HURT: 56 helped stats (abs) min: 0.041665999999999315 max: 6.25 x̄: 0.30 x̃: 0 helped stats (rel) min: 0.42% max: 26.00% x̄: 6.90% x̃: 7.14% HURT stats (abs) min: 0.041665999999999315 max: 0.5833319999999986 x̄: 0.11 x̃: 0 HURT stats (rel) min: 0.16% max: 100.00% x̄: 55.17% x̃: 50.00% 95% mean confidence interval for cycles value: -0.29 -0.21 95% mean confidence interval for cycles %-change: -1.37% 3.73% Inconclusive result (%-change mean confidence interval includes 0). total arith in shared programs: 4852.29 -> 4658.13 (-4.00%) arith in affected programs: 4525.17 -> 4331 (-4.29%) helped: 1112 HURT: 166 helped stats (abs) min: 0.041665999999999315 max: 6.25 x̄: 0.19 x̃: 0 helped stats (rel) min: 0.42% max: 33.33% x̄: 6.59% x̃: 5.36% HURT stats (abs) min: 0.041665999999999315 max: 0.5833319999999986 x̄: 0.07 x̃: 0 HURT stats (rel) min: 0.16% max: 100.00% x̄: 25.05% x̃: 2.40% 95% mean confidence interval for arith value: -0.17 -0.14 95% mean confidence interval for arith %-change: -3.44% -1.51% Arith are helped. total quadwords in shared programs: 117141 -> 111394 (-4.91%) quadwords in affected programs: 104390 -> 98643 (-5.51%) helped: 1245 HURT: 76 helped stats (abs) min: 1.0 max: 69.0 x̄: 4.74 x̃: 4 helped stats (rel) min: 0.28% max: 35.00% x̄: 7.88% x̃: 6.45% HURT stats (abs) min: 1.0 max: 8.0 x̄: 2.01 x̃: 1 HURT stats (rel) min: 0.20% max: 10.00% x̄: 3.52% x̃: 4.25% 95% mean confidence interval for quadwords value: -4.61 -4.09 95% mean confidence interval for quadwords %-change: -7.56% -6.88% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10961>
This commit is contained in:
parent
e46ec44a51
commit
5996622050
3 changed files with 177 additions and 8 deletions
|
|
@ -38,6 +38,13 @@ struct bi_worklist {
|
|||
|
||||
/* Bitset of instructions in the block ready for scheduling */
|
||||
BITSET_WORD *worklist;
|
||||
|
||||
/* The backwards dependency graph. nr_dependencies is the number of
|
||||
* unscheduled instructions that must still be scheduled after (before)
|
||||
* this instruction. dependents are which instructions need to be
|
||||
* scheduled before (after) this instruction. */
|
||||
unsigned *dep_counts;
|
||||
BITSET_WORD **dependents;
|
||||
};
|
||||
|
||||
/* State of a single tuple and clause under construction */
|
||||
|
|
@ -149,6 +156,138 @@ bi_supports_dtsel(bi_instr *ins)
|
|||
}
|
||||
}
|
||||
|
||||
/* Adds an edge to the dependency graph */
|
||||
|
||||
static void
|
||||
bi_push_dependency(unsigned parent, unsigned child,
|
||||
BITSET_WORD **dependents, unsigned *dep_counts)
|
||||
{
|
||||
if (!BITSET_TEST(dependents[parent], child)) {
|
||||
BITSET_SET(dependents[parent], child);
|
||||
dep_counts[child]++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
add_dependency(struct util_dynarray *table, unsigned index, unsigned child,
|
||||
BITSET_WORD **dependents, unsigned *dep_counts)
|
||||
{
|
||||
assert(index < 64);
|
||||
util_dynarray_foreach(table + index, unsigned, parent)
|
||||
bi_push_dependency(*parent, child, dependents, dep_counts);
|
||||
}
|
||||
|
||||
static void
|
||||
mark_access(struct util_dynarray *table, unsigned index, unsigned parent)
|
||||
{
|
||||
assert(index < 64);
|
||||
util_dynarray_append(&table[index], unsigned, parent);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_sched_barrier(bi_instr *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_BARRIER:
|
||||
case BI_OPCODE_DISCARD_F32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bi_create_dependency_graph(struct bi_worklist st, bool inorder)
|
||||
{
|
||||
struct util_dynarray last_read[64], last_write[64];
|
||||
|
||||
for (unsigned i = 0; i < 64; ++i) {
|
||||
util_dynarray_init(&last_read[i], NULL);
|
||||
util_dynarray_init(&last_write[i], NULL);
|
||||
}
|
||||
|
||||
/* Initialize dependency graph */
|
||||
for (unsigned i = 0; i < st.count; ++i) {
|
||||
st.dependents[i] =
|
||||
calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
|
||||
|
||||
st.dep_counts[i] = 0;
|
||||
}
|
||||
|
||||
unsigned prev_msg = ~0;
|
||||
|
||||
/* Populate dependency graph */
|
||||
for (signed i = st.count - 1; i >= 0; --i) {
|
||||
bi_instr *ins = st.instructions[i];
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type != BI_INDEX_REGISTER) continue;
|
||||
unsigned count = bi_count_read_registers(ins, s);
|
||||
|
||||
for (unsigned c = 0; c < count; ++c)
|
||||
add_dependency(last_write, ins->src[s].value + c, i, st.dependents, st.dep_counts);
|
||||
}
|
||||
|
||||
/* Keep message-passing ops in order. (This pass only cares
|
||||
* about bundling; reordering of message-passing instructions
|
||||
* happens during earlier scheduling.) */
|
||||
|
||||
if (bi_message_type_for_instr(ins)) {
|
||||
if (prev_msg != ~0)
|
||||
bi_push_dependency(prev_msg, i, st.dependents, st.dep_counts);
|
||||
|
||||
prev_msg = i;
|
||||
}
|
||||
|
||||
/* Handle schedule barriers, adding All the deps */
|
||||
if (inorder || bi_is_sched_barrier(ins)) {
|
||||
for (unsigned j = 0; j < st.count; ++j) {
|
||||
if (i == j) continue;
|
||||
|
||||
bi_push_dependency(MAX2(i, j), MIN2(i, j),
|
||||
st.dependents, st.dep_counts);
|
||||
}
|
||||
}
|
||||
|
||||
bi_foreach_dest(ins, d) {
|
||||
if (ins->dest[d].type != BI_INDEX_REGISTER) continue;
|
||||
unsigned dest = ins->dest[d].value;
|
||||
|
||||
unsigned count = bi_count_write_registers(ins, d);
|
||||
|
||||
for (unsigned c = 0; c < count; ++c) {
|
||||
add_dependency(last_read, dest + c, i, st.dependents, st.dep_counts);
|
||||
add_dependency(last_write, dest + c, i, st.dependents, st.dep_counts);
|
||||
mark_access(last_write, dest + c, i);
|
||||
}
|
||||
}
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type != BI_INDEX_REGISTER) continue;
|
||||
|
||||
unsigned count = bi_count_read_registers(ins, s);
|
||||
|
||||
for (unsigned c = 0; c < count; ++c)
|
||||
mark_access(last_read, ins->src[s].value + c, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* If there is a branch, all instructions depend on it, as interblock
|
||||
* execution must be purely in-order */
|
||||
|
||||
bi_instr *last = st.instructions[st.count - 1];
|
||||
if (last->branch_target || last->op == BI_OPCODE_JUMP) {
|
||||
for (signed i = st.count - 2; i >= 0; --i)
|
||||
bi_push_dependency(st.count - 1, i, st.dependents, st.dep_counts);
|
||||
}
|
||||
|
||||
/* Free the intermediate structures */
|
||||
for (unsigned i = 0; i < 64; ++i) {
|
||||
util_dynarray_fini(&last_read[i]);
|
||||
util_dynarray_fini(&last_write[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Scheduler pseudoinstruction lowerings to enable instruction pairings.
|
||||
* Currently only support CUBEFACE -> *CUBEFACE1/+CUBEFACE2
|
||||
*/
|
||||
|
|
@ -275,14 +414,23 @@ bi_flatten_block(bi_block *block, unsigned *len)
|
|||
*/
|
||||
|
||||
static struct bi_worklist
|
||||
bi_initialize_worklist(bi_block *block)
|
||||
bi_initialize_worklist(bi_block *block, bool inorder)
|
||||
{
|
||||
struct bi_worklist st = { };
|
||||
st.instructions = bi_flatten_block(block, &st.count);
|
||||
|
||||
if (st.count) {
|
||||
st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
|
||||
BITSET_SET(st.worklist, st.count - 1);
|
||||
if (!st.count)
|
||||
return st;
|
||||
|
||||
st.dependents = calloc(st.count, sizeof(st.dependents[0]));
|
||||
st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0]));
|
||||
|
||||
bi_create_dependency_graph(st, inorder);
|
||||
st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
|
||||
|
||||
for (unsigned i = 0; i < st.count; ++i) {
|
||||
if (st.dep_counts[i] == 0)
|
||||
BITSET_SET(st.worklist, i);
|
||||
}
|
||||
|
||||
return st;
|
||||
|
|
@ -291,6 +439,8 @@ bi_initialize_worklist(bi_block *block)
|
|||
static void
|
||||
bi_free_worklist(struct bi_worklist st)
|
||||
{
|
||||
free(st.dep_counts);
|
||||
free(st.dependents);
|
||||
free(st.instructions);
|
||||
free(st.worklist);
|
||||
}
|
||||
|
|
@ -298,8 +448,24 @@ bi_free_worklist(struct bi_worklist st)
|
|||
static void
|
||||
bi_update_worklist(struct bi_worklist st, unsigned idx)
|
||||
{
|
||||
if (idx >= 1)
|
||||
BITSET_SET(st.worklist, idx - 1);
|
||||
assert(st.dep_counts[idx] == 0);
|
||||
|
||||
if (!st.dependents[idx])
|
||||
return;
|
||||
|
||||
/* Iterate each dependent to remove one dependency (`done`),
|
||||
* adding dependents to the worklist where possible. */
|
||||
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, st.dependents[idx], st.count) {
|
||||
assert(st.dep_counts[i] != 0);
|
||||
unsigned new_deps = --st.dep_counts[i];
|
||||
|
||||
if (new_deps == 0)
|
||||
BITSET_SET(st.worklist, i);
|
||||
}
|
||||
|
||||
free(st.dependents[idx]);
|
||||
}
|
||||
|
||||
/* To work out the back-to-back flag, we need to detect branches and
|
||||
|
|
@ -1568,7 +1734,8 @@ bi_schedule_block(bi_context *ctx, bi_block *block)
|
|||
list_inithead(&block->clauses);
|
||||
|
||||
/* Copy list to dynamic array */
|
||||
struct bi_worklist st = bi_initialize_worklist(block);
|
||||
struct bi_worklist st = bi_initialize_worklist(block,
|
||||
bifrost_debug & BIFROST_DBG_INORDER);
|
||||
|
||||
if (!st.count) {
|
||||
bi_free_worklist(st);
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@
|
|||
#define BIFROST_DBG_VERBOSE 0x0008
|
||||
#define BIFROST_DBG_INTERNAL 0x0010
|
||||
#define BIFROST_DBG_NOSCHED 0x0020
|
||||
#define BIFROST_DBG_INORDER 0x0040
|
||||
|
||||
extern int bifrost_debug;
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,8 @@ static const struct debug_named_value bifrost_debug_options[] = {
|
|||
{"shaderdb", BIFROST_DBG_SHADERDB, "Print statistics"},
|
||||
{"verbose", BIFROST_DBG_VERBOSE, "Disassemble verbosely"},
|
||||
{"internal", BIFROST_DBG_INTERNAL, "Dump even internal shaders"},
|
||||
{"nosched", BIFROST_DBG_NOSCHED, "Force trivial scheduling"},
|
||||
{"nosched", BIFROST_DBG_NOSCHED, "Force trivial bundling"},
|
||||
{"inorder", BIFROST_DBG_INORDER, "Force in-order bundling"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue