mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
i965/fs: Schedule instructions both before and after register allocation.
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
1315f3b4b3
commit
7a9f940cab
3 changed files with 78 additions and 18 deletions
|
|
@ -2498,7 +2498,7 @@ fs_visitor::run()
|
|||
|
||||
remove_dead_constants();
|
||||
|
||||
schedule_instructions();
|
||||
schedule_instructions(false);
|
||||
|
||||
assign_curb_setup();
|
||||
assign_urb_setup();
|
||||
|
|
@ -2525,6 +2525,8 @@ fs_visitor::run()
|
|||
if (failed)
|
||||
return false;
|
||||
|
||||
schedule_instructions(true);
|
||||
|
||||
if (dispatch_width == 8) {
|
||||
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ public:
|
|||
bool remove_dead_constants();
|
||||
bool remove_duplicate_mrf_writes();
|
||||
bool virtual_grf_interferes(int a, int b);
|
||||
void schedule_instructions();
|
||||
void schedule_instructions(bool post_reg_alloc);
|
||||
void fail(const char *msg, ...);
|
||||
|
||||
void push_force_uncompressed();
|
||||
|
|
|
|||
|
|
@ -112,13 +112,15 @@ public:
|
|||
|
||||
class instruction_scheduler {
|
||||
public:
|
||||
instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count)
|
||||
instruction_scheduler(fs_visitor *v, void *mem_ctx, int grf_count,
|
||||
bool post_reg_alloc)
|
||||
{
|
||||
this->v = v;
|
||||
this->mem_ctx = ralloc_context(mem_ctx);
|
||||
this->virtual_grf_count = virtual_grf_count;
|
||||
this->grf_count = grf_count;
|
||||
this->instructions.make_empty();
|
||||
this->instructions_to_schedule = 0;
|
||||
this->post_reg_alloc = post_reg_alloc;
|
||||
}
|
||||
|
||||
~instruction_scheduler()
|
||||
|
|
@ -137,8 +139,9 @@ public:
|
|||
|
||||
void *mem_ctx;
|
||||
|
||||
bool post_reg_alloc;
|
||||
int instructions_to_schedule;
|
||||
int virtual_grf_count;
|
||||
int grf_count;
|
||||
exec_list instructions;
|
||||
fs_visitor *v;
|
||||
};
|
||||
|
|
@ -247,7 +250,12 @@ instruction_scheduler::is_compressed(fs_inst *inst)
|
|||
void
|
||||
instruction_scheduler::calculate_deps()
|
||||
{
|
||||
schedule_node *last_grf_write[virtual_grf_count];
|
||||
/* Pre-register-allocation, this tracks the last write per VGRF (so
|
||||
* different reg_offsets within it can interfere when they shouldn't).
|
||||
* After register allocation, reg_offsets are gone and we track individual
|
||||
* GRF registers.
|
||||
*/
|
||||
schedule_node *last_grf_write[grf_count];
|
||||
schedule_node *last_mrf_write[BRW_MAX_MRF];
|
||||
schedule_node *last_conditional_mod[2] = { NULL, NULL };
|
||||
/* Fixed HW registers are assumed to be separate from the virtual
|
||||
|
|
@ -256,6 +264,7 @@ instruction_scheduler::calculate_deps()
|
|||
* granular level.
|
||||
*/
|
||||
schedule_node *last_fixed_grf_write = NULL;
|
||||
int reg_width = v->dispatch_width / 8;
|
||||
|
||||
/* The last instruction always needs to still be the last
|
||||
* instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
|
||||
|
|
@ -277,11 +286,21 @@ instruction_scheduler::calculate_deps()
|
|||
/* read-after-write deps. */
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file == GRF) {
|
||||
add_dep(last_grf_write[inst->src[i].reg], n);
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < reg_width; r++)
|
||||
add_dep(last_grf_write[inst->src[i].reg + r], n);
|
||||
} else {
|
||||
add_dep(last_grf_write[inst->src[i].reg], n);
|
||||
}
|
||||
} else if (inst->src[i].file == FIXED_HW_REG &&
|
||||
(inst->src[i].fixed_hw_reg.file ==
|
||||
BRW_GENERAL_REGISTER_FILE)) {
|
||||
add_dep(last_fixed_grf_write, n);
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < reg_width; r++)
|
||||
add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
|
||||
} else {
|
||||
add_dep(last_fixed_grf_write, n);
|
||||
}
|
||||
} else if (inst->src[i].file != BAD_FILE &&
|
||||
inst->src[i].file != IMM &&
|
||||
inst->src[i].file != UNIFORM) {
|
||||
|
|
@ -304,8 +323,15 @@ instruction_scheduler::calculate_deps()
|
|||
|
||||
/* write-after-write deps. */
|
||||
if (inst->dst.file == GRF) {
|
||||
add_dep(last_grf_write[inst->dst.reg], n);
|
||||
last_grf_write[inst->dst.reg] = n;
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < inst->regs_written() * reg_width; r++) {
|
||||
add_dep(last_grf_write[inst->dst.reg + r], n);
|
||||
last_grf_write[inst->dst.reg + r] = n;
|
||||
}
|
||||
} else {
|
||||
add_dep(last_grf_write[inst->dst.reg], n);
|
||||
last_grf_write[inst->dst.reg] = n;
|
||||
}
|
||||
} else if (inst->dst.file == MRF) {
|
||||
int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
|
||||
|
||||
|
|
@ -321,7 +347,12 @@ instruction_scheduler::calculate_deps()
|
|||
}
|
||||
} else if (inst->dst.file == FIXED_HW_REG &&
|
||||
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
|
||||
last_fixed_grf_write = n;
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < reg_width; r++)
|
||||
last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
|
||||
} else {
|
||||
last_fixed_grf_write = n;
|
||||
}
|
||||
} else if (inst->dst.file != BAD_FILE) {
|
||||
add_barrier_deps(n);
|
||||
}
|
||||
|
|
@ -360,12 +391,22 @@ instruction_scheduler::calculate_deps()
|
|||
/* write-after-read deps. */
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file == GRF) {
|
||||
add_dep(n, last_grf_write[inst->src[i].reg]);
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < reg_width; r++)
|
||||
add_dep(n, last_grf_write[inst->src[i].reg + r]);
|
||||
} else {
|
||||
add_dep(n, last_grf_write[inst->src[i].reg]);
|
||||
}
|
||||
} else if (inst->src[i].file == FIXED_HW_REG &&
|
||||
(inst->src[i].fixed_hw_reg.file ==
|
||||
BRW_GENERAL_REGISTER_FILE)) {
|
||||
add_dep(n, last_fixed_grf_write);
|
||||
} else if (inst->src[i].file != BAD_FILE &&
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < reg_width; r++)
|
||||
add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
|
||||
} else {
|
||||
add_dep(n, last_fixed_grf_write);
|
||||
}
|
||||
} else if (inst->src[i].file != BAD_FILE &&
|
||||
inst->src[i].file != IMM &&
|
||||
inst->src[i].file != UNIFORM) {
|
||||
assert(inst->src[i].file != MRF);
|
||||
|
|
@ -389,7 +430,12 @@ instruction_scheduler::calculate_deps()
|
|||
* can mark this as WAR dependency.
|
||||
*/
|
||||
if (inst->dst.file == GRF) {
|
||||
last_grf_write[inst->dst.reg] = n;
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < inst->regs_written() * reg_width; r++)
|
||||
last_grf_write[inst->dst.reg + r] = n;
|
||||
} else {
|
||||
last_grf_write[inst->dst.reg] = n;
|
||||
}
|
||||
} else if (inst->dst.file == MRF) {
|
||||
int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
|
||||
|
||||
|
|
@ -405,7 +451,12 @@ instruction_scheduler::calculate_deps()
|
|||
}
|
||||
} else if (inst->dst.file == FIXED_HW_REG &&
|
||||
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
|
||||
last_fixed_grf_write = n;
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < reg_width; r++)
|
||||
last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
|
||||
} else {
|
||||
last_fixed_grf_write = n;
|
||||
}
|
||||
} else if (inst->dst.file != BAD_FILE) {
|
||||
add_barrier_deps(n);
|
||||
}
|
||||
|
|
@ -499,10 +550,17 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::schedule_instructions()
|
||||
fs_visitor::schedule_instructions(bool post_reg_alloc)
|
||||
{
|
||||
fs_inst *next_block_header = (fs_inst *)instructions.head;
|
||||
instruction_scheduler sched(this, mem_ctx, this->virtual_grf_count);
|
||||
|
||||
int grf_count;
|
||||
if (post_reg_alloc)
|
||||
grf_count = grf_used;
|
||||
else
|
||||
grf_count = virtual_grf_count;
|
||||
|
||||
instruction_scheduler sched(this, mem_ctx, grf_count, post_reg_alloc);
|
||||
|
||||
while (!next_block_header->is_tail_sentinel()) {
|
||||
/* Add things to be scheduled until we get to a new BB. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue