mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 12:40:09 +01:00
freedreno/ir3: use standard list implementation
Use standard list_head double-linked list and related iterators, helpers, etc, rather than weird combo of instruction array and next pointers depending on stage. Now block has an instrs_list. In certain stages where we want to remove and re-add to the blocks list we just use list_replace() to copy the list to a new list_head. Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
67d994c676
commit
adf1659ff5
8 changed files with 160 additions and 208 deletions
|
|
@ -81,7 +81,7 @@ void ir3_destroy(struct ir3 *shader)
|
|||
shader->chunk = chunk->next;
|
||||
free(chunk);
|
||||
}
|
||||
free(shader->instrs);
|
||||
free(shader->indirects);
|
||||
free(shader->baryfs);
|
||||
free(shader);
|
||||
}
|
||||
|
|
@ -534,28 +534,32 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr,
|
|||
void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
|
||||
uint32_t gpu_id)
|
||||
{
|
||||
struct ir3_block *block = shader->block;
|
||||
uint32_t *ptr, *dwords;
|
||||
uint32_t i;
|
||||
|
||||
info->max_reg = -1;
|
||||
info->max_half_reg = -1;
|
||||
info->max_const = -1;
|
||||
info->instrs_count = 0;
|
||||
info->sizedwords = 0;
|
||||
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
info->sizedwords += 2;
|
||||
}
|
||||
|
||||
/* need a integer number of instruction "groups" (sets of 16
|
||||
* instructions on a4xx or sets of 4 instructions on a3xx),
|
||||
* so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
|
||||
*/
|
||||
if (gpu_id >= 400) {
|
||||
info->sizedwords = 2 * align(shader->instrs_count, 16);
|
||||
info->sizedwords = align(info->sizedwords, 16 * 2);
|
||||
} else {
|
||||
info->sizedwords = 2 * align(shader->instrs_count, 4);
|
||||
info->sizedwords = align(info->sizedwords, 4 * 2);
|
||||
}
|
||||
|
||||
ptr = dwords = calloc(4, info->sizedwords);
|
||||
|
||||
for (i = 0; i < shader->instrs_count; i++) {
|
||||
struct ir3_instruction *instr = shader->instrs[i];
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
int ret = emit[instr->category](instr, dwords, info);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
|
@ -581,14 +585,15 @@ static struct ir3_register * reg_create(struct ir3 *shader,
|
|||
return reg;
|
||||
}
|
||||
|
||||
static void insert_instr(struct ir3 *shader,
|
||||
static void insert_instr(struct ir3_block *block,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3 *shader = block->shader;
|
||||
#ifdef DEBUG
|
||||
static uint32_t serialno = 0;
|
||||
instr->serialno = ++serialno;
|
||||
#endif
|
||||
array_insert(shader->instrs, instr);
|
||||
list_addtail(&instr->node, &block->instr_list);
|
||||
|
||||
if (is_input(instr))
|
||||
array_insert(shader->baryfs, instr);
|
||||
|
|
@ -625,6 +630,8 @@ struct ir3_block * ir3_block_create(struct ir3 *shader,
|
|||
|
||||
block->shader = shader;
|
||||
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
|
|
@ -652,7 +659,7 @@ struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
|
|||
instr->block = block;
|
||||
instr->category = category;
|
||||
instr->opc = opc;
|
||||
insert_instr(block->shader, instr);
|
||||
insert_instr(block, instr);
|
||||
return instr;
|
||||
}
|
||||
|
||||
|
|
@ -677,7 +684,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
|
|||
*new_instr = *instr;
|
||||
new_instr->regs = regs;
|
||||
|
||||
insert_instr(instr->block->shader, new_instr);
|
||||
insert_instr(instr->block, new_instr);
|
||||
|
||||
/* clone registers: */
|
||||
new_instr->regs_count = 0;
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
#include "util/u_debug.h"
|
||||
#include "util/list.h"
|
||||
|
||||
#include "instr-a3xx.h"
|
||||
#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */
|
||||
|
|
@ -290,7 +291,9 @@ struct ir3_instruction {
|
|||
*/
|
||||
struct ir3_instruction *fanin;
|
||||
|
||||
struct ir3_instruction *next;
|
||||
/* Entry in ir3_block's instruction list: */
|
||||
struct list_head node;
|
||||
|
||||
#ifdef DEBUG
|
||||
uint32_t serialno;
|
||||
#endif
|
||||
|
|
@ -321,8 +324,6 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr)
|
|||
struct ir3_heap_chunk;
|
||||
|
||||
struct ir3 {
|
||||
unsigned instrs_count, instrs_sz;
|
||||
struct ir3_instruction **instrs;
|
||||
|
||||
/* Track bary.f (and ldlv) instructions.. this is needed in
|
||||
* scheduling to ensure that all varying fetches happen before
|
||||
|
|
@ -361,7 +362,7 @@ struct ir3_block {
|
|||
/* only a single address register: */
|
||||
struct ir3_instruction *address;
|
||||
struct ir3_block *parent;
|
||||
struct ir3_instruction *head;
|
||||
struct list_head instr_list;
|
||||
};
|
||||
|
||||
struct ir3 * ir3_create(void);
|
||||
|
|
@ -402,11 +403,8 @@ static inline void ir3_clear_mark(struct ir3 *shader)
|
|||
* a block, so tracking the list of instrs globally is
|
||||
* unlikely to be what we want.
|
||||
*/
|
||||
unsigned i;
|
||||
for (i = 0; i < shader->instrs_count; i++) {
|
||||
struct ir3_instruction *instr = shader->instrs[i];
|
||||
list_for_each_entry (struct ir3_instruction, instr, &shader->block->instr_list, node)
|
||||
instr->flags &= ~IR3_INSTR_MARK;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ir3_instr_regno(struct ir3_instruction *instr,
|
||||
|
|
@ -756,6 +754,7 @@ int ir3_block_flatten(struct ir3_block *block);
|
|||
/* depth calculation: */
|
||||
int ir3_delayslots(struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned n);
|
||||
void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
|
||||
void ir3_block_depth(struct ir3_block *block);
|
||||
|
||||
/* copy-propagate: */
|
||||
|
|
|
|||
|
|
@ -354,13 +354,6 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
|
|||
{
|
||||
struct ir3_register *reg;
|
||||
|
||||
/* stay within the block.. don't try to operate across
|
||||
* basic block boundaries or we'll have problems when
|
||||
* dealing with multiple basic blocks:
|
||||
*/
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
|
||||
return instr;
|
||||
|
||||
if (is_eligible_mov(instr, !!flags)) {
|
||||
struct ir3_register *reg = instr->regs[1];
|
||||
struct ir3_instruction *src_instr = ssa(reg);
|
||||
|
|
@ -394,11 +387,11 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
|
|||
return instr;
|
||||
}
|
||||
|
||||
static void block_cp(struct ir3_block *block)
|
||||
void ir3_block_cp(struct ir3_block *block)
|
||||
{
|
||||
unsigned i;
|
||||
ir3_clear_mark(block->shader);
|
||||
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
for (unsigned i = 0; i < block->noutputs; i++) {
|
||||
if (block->outputs[i]) {
|
||||
struct ir3_instruction *out =
|
||||
instr_cp(block->outputs[i], NULL);
|
||||
|
|
@ -407,9 +400,3 @@ static void block_cp(struct ir3_block *block)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ir3_block_cp(struct ir3_block *block)
|
||||
{
|
||||
ir3_clear_mark(block->shader);
|
||||
block_cp(block);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,25 +84,25 @@ int ir3_delayslots(struct ir3_instruction *assigner,
|
|||
}
|
||||
}
|
||||
|
||||
static void insert_by_depth(struct ir3_instruction *instr)
|
||||
void
|
||||
ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
|
||||
{
|
||||
struct ir3_block *block = instr->block;
|
||||
struct ir3_instruction *n = block->head;
|
||||
struct ir3_instruction *p = NULL;
|
||||
/* remove from existing spot in list: */
|
||||
list_delinit(&instr->node);
|
||||
|
||||
while (n && (n != instr) && (n->depth > instr->depth)) {
|
||||
p = n;
|
||||
n = n->next;
|
||||
/* find where to re-insert instruction: */
|
||||
list_for_each_entry (struct ir3_instruction, pos, list, node) {
|
||||
if (pos->depth > instr->depth) {
|
||||
list_add(&instr->node, &pos->node);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
instr->next = n;
|
||||
if (p)
|
||||
p->next = instr;
|
||||
else
|
||||
block->head = instr;
|
||||
/* if we get here, we didn't find an insertion spot: */
|
||||
list_addtail(&instr->node, list);
|
||||
}
|
||||
|
||||
static void ir3_instr_depth(struct ir3_instruction *instr)
|
||||
static void
|
||||
ir3_instr_depth(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *src;
|
||||
|
||||
|
|
@ -123,42 +123,38 @@ static void ir3_instr_depth(struct ir3_instruction *instr)
|
|||
instr->depth = MAX2(instr->depth, sd);
|
||||
}
|
||||
|
||||
/* meta-instructions don't add cycles, other than PHI.. which
|
||||
* might translate to a real instruction..
|
||||
*
|
||||
* well, not entirely true, fan-in/out, etc might need to need
|
||||
* to generate some extra mov's in edge cases, etc.. probably
|
||||
* we might want to do depth calculation considering the worst
|
||||
* case for these??
|
||||
*/
|
||||
if (!is_meta(instr))
|
||||
instr->depth++;
|
||||
|
||||
insert_by_depth(instr);
|
||||
ir3_insert_by_depth(instr, &instr->block->instr_list);
|
||||
}
|
||||
|
||||
static void
|
||||
remove_unused_by_block(struct ir3_block *block)
|
||||
{
|
||||
list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
if (!ir3_instr_check_mark(instr)) {
|
||||
/* mark it, in case it is input, so we can
|
||||
* remove unused inputs:
|
||||
*/
|
||||
instr->depth = DEPTH_UNUSED;
|
||||
/* and remove from instruction list: */
|
||||
list_delinit(&instr->node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ir3_block_depth(struct ir3_block *block)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
block->head = NULL;
|
||||
|
||||
ir3_clear_mark(block->shader);
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
if (block->outputs[i])
|
||||
ir3_instr_depth(block->outputs[i]);
|
||||
|
||||
/* mark un-used instructions: */
|
||||
for (i = 0; i < block->shader->instrs_count; i++) {
|
||||
struct ir3_instruction *instr = block->shader->instrs[i];
|
||||
|
||||
/* just consider instructions within this block: */
|
||||
if (instr->block != block)
|
||||
continue;
|
||||
|
||||
if (!ir3_instr_check_mark(instr))
|
||||
instr->depth = DEPTH_UNUSED;
|
||||
}
|
||||
remove_unused_by_block(block);
|
||||
|
||||
/* cleanup unused inputs: */
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
|
|
|
|||
|
|
@ -51,12 +51,9 @@ struct ir3_legalize_ctx {
|
|||
static void legalize(struct ir3_legalize_ctx *ctx)
|
||||
{
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3_instruction *n;
|
||||
struct ir3 *shader = block->shader;
|
||||
struct ir3_instruction *end =
|
||||
ir3_instr_create(block, 0, OPC_END);
|
||||
struct ir3_instruction *last_input = NULL;
|
||||
struct ir3_instruction *last_rel = NULL;
|
||||
struct list_head instr_list;
|
||||
regmask_t needs_ss_war; /* write after read */
|
||||
regmask_t needs_ss;
|
||||
regmask_t needs_sy;
|
||||
|
|
@ -65,9 +62,13 @@ static void legalize(struct ir3_legalize_ctx *ctx)
|
|||
regmask_init(&needs_ss);
|
||||
regmask_init(&needs_sy);
|
||||
|
||||
shader->instrs_count = 0;
|
||||
/* remove all the instructions from the list, we'll be adding
|
||||
* them back in as we go
|
||||
*/
|
||||
list_replace(&block->instr_list, &instr_list);
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
for (n = block->head; n; n = n->next) {
|
||||
list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
|
||||
struct ir3_register *reg;
|
||||
unsigned i;
|
||||
|
||||
|
|
@ -140,12 +141,12 @@ static void legalize(struct ir3_legalize_ctx *ctx)
|
|||
}
|
||||
|
||||
/* need to be able to set (ss) on first instruction: */
|
||||
if ((shader->instrs_count == 0) && (n->category >= 5))
|
||||
if (list_empty(&block->instr_list) && (n->category >= 5))
|
||||
ir3_NOP(block);
|
||||
|
||||
if (is_nop(n) && shader->instrs_count) {
|
||||
struct ir3_instruction *last =
|
||||
shader->instrs[shader->instrs_count-1];
|
||||
if (is_nop(n) && !list_empty(&block->instr_list)) {
|
||||
struct ir3_instruction *last = list_last_entry(&block->instr_list,
|
||||
struct ir3_instruction, node);
|
||||
if (is_nop(last) && (last->repeat < 5)) {
|
||||
last->repeat++;
|
||||
last->flags |= n->flags;
|
||||
|
|
@ -153,7 +154,7 @@ static void legalize(struct ir3_legalize_ctx *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
shader->instrs[shader->instrs_count++] = n;
|
||||
list_addtail(&n->node, &block->instr_list);
|
||||
|
||||
if (is_sfu(n))
|
||||
regmask_set(&needs_ss, n->regs[0]);
|
||||
|
|
@ -192,35 +193,19 @@ static void legalize(struct ir3_legalize_ctx *ctx)
|
|||
* the (ei) flag:
|
||||
*/
|
||||
if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
|
||||
int i, cnt;
|
||||
struct ir3_instruction *baryf;
|
||||
|
||||
/* note that ir3_instr_create() inserts into
|
||||
* shader->instrs[] and increments the count..
|
||||
* so we need to bump up the cnt initially (to
|
||||
* avoid it clobbering the last real instr) and
|
||||
* restore it after.
|
||||
*/
|
||||
cnt = ++shader->instrs_count;
|
||||
/* (ss)bary.f (ei)r63.x, 0, r0.x */
|
||||
baryf = ir3_instr_create(block, 2, OPC_BARY_F);
|
||||
baryf->flags |= IR3_INSTR_SS;
|
||||
ir3_reg_create(baryf, regid(63, 0), 0);
|
||||
ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
|
||||
ir3_reg_create(baryf, regid(0, 0), 0);
|
||||
|
||||
/* inserting instructions would be a bit nicer if list.. */
|
||||
for (i = cnt - 2; i >= 0; i--) {
|
||||
if (shader->instrs[i] == last_input) {
|
||||
/* insert the dummy bary.f after last_input: */
|
||||
list_add(&baryf->node, &last_input->node);
|
||||
|
||||
/* (ss)bary.f (ei)r63.x, 0, r0.x */
|
||||
last_input = ir3_instr_create(block, 2, OPC_BARY_F);
|
||||
last_input->flags |= IR3_INSTR_SS;
|
||||
ir3_reg_create(last_input, regid(63, 0), 0);
|
||||
ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
|
||||
ir3_reg_create(last_input, regid(0, 0), 0);
|
||||
|
||||
shader->instrs[i + 1] = last_input;
|
||||
|
||||
break;
|
||||
}
|
||||
shader->instrs[i + 1] = shader->instrs[i];
|
||||
}
|
||||
|
||||
shader->instrs_count = cnt;
|
||||
last_input = baryf;
|
||||
}
|
||||
last_input->regs[0]->flags |= IR3_REG_EI;
|
||||
}
|
||||
|
|
@ -228,9 +213,11 @@ static void legalize(struct ir3_legalize_ctx *ctx)
|
|||
if (last_rel)
|
||||
last_rel->flags |= IR3_INSTR_UL;
|
||||
|
||||
shader->instrs[shader->instrs_count++] = end;
|
||||
/* create/add 'end' instruction: */
|
||||
ir3_instr_create(block, 0, OPC_END);
|
||||
|
||||
shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
|
||||
list_first_entry(&block->instr_list, struct ir3_instruction, node)
|
||||
->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
|
||||
}
|
||||
|
||||
void ir3_block_legalize(struct ir3_block *block,
|
||||
|
|
|
|||
|
|
@ -186,9 +186,8 @@ void ir3_print_instr(struct ir3_instruction *instr)
|
|||
static void
|
||||
print_block(struct ir3_block *block, int lvl)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
tab(lvl); printf("block {\n");
|
||||
for (instr = block->head; instr; instr = instr->next) {
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
print_instr(instr, lvl+1);
|
||||
}
|
||||
tab(lvl); printf("}\n");
|
||||
|
|
|
|||
|
|
@ -75,10 +75,10 @@ struct ir3_ra_ctx {
|
|||
# define ra_debug 0
|
||||
#endif
|
||||
|
||||
#define ra_dump_list(msg, n) do { \
|
||||
#define ra_dump_list(msg, ir) do { \
|
||||
if (ra_debug) { \
|
||||
debug_printf("-- " msg); \
|
||||
ir3_print(n->block->shader); \
|
||||
ir3_print(ir); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
|
@ -175,14 +175,13 @@ static void mark_sources(struct ir3_instruction *instr,
|
|||
static void compute_liveregs(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *instr, regmask_t *liveregs)
|
||||
{
|
||||
struct ir3_block *block = instr->block;
|
||||
struct ir3_instruction *n;
|
||||
struct ir3_block *block = ctx->block;
|
||||
regmask_t written;
|
||||
unsigned i;
|
||||
|
||||
regmask_init(&written);
|
||||
|
||||
for (n = instr->next; n; n = n->next) {
|
||||
list_for_each_entry (struct ir3_instruction, n, &instr->node, node) {
|
||||
struct ir3_register *r;
|
||||
|
||||
if (is_meta(n))
|
||||
|
|
@ -411,9 +410,8 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx,
|
|||
static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *instr, unsigned name)
|
||||
{
|
||||
struct ir3_instruction *n, *src;
|
||||
|
||||
for (n = instr->next; n && !ctx->error; n = n->next) {
|
||||
list_for_each_entry (struct ir3_instruction, n, &instr->node, node) {
|
||||
struct ir3_instruction *src;
|
||||
foreach_ssa_src_n(src, i, n) {
|
||||
unsigned r = i + 1;
|
||||
|
||||
|
|
@ -424,6 +422,8 @@ static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
|
|||
if (src == instr)
|
||||
instr_assign_src(ctx, n, r, name);
|
||||
}
|
||||
if (ctx->error)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -589,14 +589,45 @@ static void instr_assign_array(struct ir3_ra_ctx *ctx,
|
|||
|
||||
}
|
||||
|
||||
static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
static bool
|
||||
block_ra(struct ir3_block *block, void *state)
|
||||
{
|
||||
struct ir3_instruction *n;
|
||||
struct ir3_ra_ctx *ctx = state;
|
||||
|
||||
ra_dump_list("-------\n", block->shader);
|
||||
|
||||
/* first pass, assign arrays: */
|
||||
list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) {
|
||||
if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
|
||||
debug_assert(!n->cp.left); /* don't think this should happen */
|
||||
ra_dump_instr("ASSIGN ARRAY: ", n);
|
||||
instr_assign_array(ctx, n);
|
||||
ra_dump_list("-------\n", block->shader);
|
||||
}
|
||||
|
||||
if (ctx->error)
|
||||
return false;
|
||||
}
|
||||
|
||||
list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) {
|
||||
ra_dump_instr("ASSIGN: ", n);
|
||||
instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
|
||||
ra_dump_list("-------\n", block->shader);
|
||||
|
||||
if (ctx->error)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
shader_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
/* frag shader inputs get pre-assigned, since we have some
|
||||
* constraints/unknowns about setup for some of these regs:
|
||||
*/
|
||||
if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
|
||||
if (ctx->type == SHADER_FRAGMENT) {
|
||||
unsigned i = 0, j;
|
||||
if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
|
||||
/* if we have frag_face, it gets hr0.x */
|
||||
|
|
@ -608,31 +639,23 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
instr_assign(ctx, block->inputs[i], j);
|
||||
}
|
||||
|
||||
ra_dump_list("-------\n", block->head);
|
||||
|
||||
/* first pass, assign arrays: */
|
||||
for (n = block->head; n && !ctx->error; n = n->next) {
|
||||
if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
|
||||
debug_assert(!n->cp.left); /* don't think this should happen */
|
||||
ra_dump_instr("ASSIGN ARRAY: ", n);
|
||||
instr_assign_array(ctx, n);
|
||||
ra_dump_list("-------\n", block->head);
|
||||
}
|
||||
}
|
||||
|
||||
for (n = block->head; n && !ctx->error; n = n->next) {
|
||||
ra_dump_instr("ASSIGN: ", n);
|
||||
instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
|
||||
ra_dump_list("-------\n", block->head);
|
||||
}
|
||||
block_ra(block, ctx);
|
||||
|
||||
return ctx->error ? -1 : 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
block_mark_dst(struct ir3_block *block, void *state)
|
||||
{
|
||||
list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node)
|
||||
if (n->regs_count > 0)
|
||||
n->regs[0]->flags |= IR3_REG_SSA;
|
||||
return true;
|
||||
}
|
||||
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
||||
bool frag_coord, bool frag_face)
|
||||
{
|
||||
struct ir3_instruction *n;
|
||||
struct ir3_ra_ctx ctx = {
|
||||
.block = block,
|
||||
.type = type,
|
||||
|
|
@ -648,12 +671,10 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
|||
* NOTE: we really should set SSA flag consistently on
|
||||
* every dst register in the frontend.
|
||||
*/
|
||||
for (n = block->head; n; n = n->next)
|
||||
if (n->regs_count > 0)
|
||||
n->regs[0]->flags |= IR3_REG_SSA;
|
||||
block_mark_dst(block, &ctx);
|
||||
|
||||
ir3_clear_mark(block->shader);
|
||||
ret = block_ra(&ctx, block);
|
||||
ret = shader_ra(&ctx, block);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,26 +88,21 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs)
|
|||
return d;
|
||||
}
|
||||
|
||||
static unsigned distance(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr, unsigned maxd)
|
||||
static unsigned
|
||||
distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
|
||||
unsigned maxd)
|
||||
{
|
||||
struct ir3_instruction *n = ctx->scheduled;
|
||||
struct list_head *instr_list = &instr->block->instr_list;
|
||||
unsigned d = 0;
|
||||
while (n && (n != instr) && (d < maxd)) {
|
||||
|
||||
list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
|
||||
if ((n == instr) || (d >= maxd))
|
||||
break;
|
||||
if (is_alu(n) || is_flow(n))
|
||||
d++;
|
||||
n = n->next;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/* TODO maybe we want double linked list? */
|
||||
static struct ir3_instruction * prev(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *p = instr->block->head;
|
||||
while (p && (p->next != instr))
|
||||
p = p->next;
|
||||
return p;
|
||||
return d;
|
||||
}
|
||||
|
||||
static bool is_sfu_or_mem(struct ir3_instruction *instr)
|
||||
|
|
@ -125,25 +120,11 @@ static void schedule(struct ir3_sched_ctx *ctx,
|
|||
* scheduling and depth calculation..
|
||||
*/
|
||||
if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
|
||||
schedule(ctx, ir3_NOP(block), false);
|
||||
ir3_NOP(block);
|
||||
|
||||
/* remove from depth list:
|
||||
*/
|
||||
if (remove) {
|
||||
struct ir3_instruction *p = prev(instr);
|
||||
|
||||
/* NOTE: this can happen for inputs which are not
|
||||
* read.. in that case there is no need to schedule
|
||||
* the input, so just bail:
|
||||
*/
|
||||
if (instr != (p ? p->next : block->head))
|
||||
return;
|
||||
|
||||
if (p)
|
||||
p->next = instr->next;
|
||||
else
|
||||
block->head = instr->next;
|
||||
}
|
||||
list_delinit(&instr->node);
|
||||
|
||||
if (writes_addr(instr)) {
|
||||
assert(ctx->addr == NULL);
|
||||
|
|
@ -157,7 +138,7 @@ static void schedule(struct ir3_sched_ctx *ctx,
|
|||
|
||||
instr->flags |= IR3_INSTR_MARK;
|
||||
|
||||
instr->next = ctx->scheduled;
|
||||
list_addtail(&instr->node, &instr->block->instr_list);
|
||||
ctx->scheduled = instr;
|
||||
|
||||
ctx->cnt++;
|
||||
|
|
@ -284,18 +265,6 @@ static int trysched(struct ir3_sched_ctx *ctx,
|
|||
return SCHEDULED;
|
||||
}
|
||||
|
||||
static struct ir3_instruction * reverse(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *reversed = NULL;
|
||||
while (instr) {
|
||||
struct ir3_instruction *next = instr->next;
|
||||
instr->next = reversed;
|
||||
reversed = instr;
|
||||
instr = next;
|
||||
}
|
||||
return reversed;
|
||||
}
|
||||
|
||||
static bool uses_current_addr(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
|
|
@ -317,16 +286,14 @@ static bool uses_current_pred(struct ir3_sched_ctx *ctx,
|
|||
* other instructions using the current address register:
|
||||
*/
|
||||
static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_block *block)
|
||||
struct list_head *unscheduled_list)
|
||||
{
|
||||
struct ir3_instruction *instr = block->head;
|
||||
bool addr_in_use = false;
|
||||
bool pred_in_use = false;
|
||||
bool all_delayed = true;
|
||||
unsigned cnt = ~0, attempted = 0;
|
||||
|
||||
while (instr) {
|
||||
struct ir3_instruction *next = instr->next;
|
||||
list_for_each_entry_safe(struct ir3_instruction, instr, unscheduled_list, node) {
|
||||
bool addr = uses_current_addr(ctx, instr);
|
||||
bool pred = uses_current_pred(ctx, instr);
|
||||
|
||||
|
|
@ -347,8 +314,6 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
|||
|
||||
attempted++;
|
||||
}
|
||||
|
||||
instr = next;
|
||||
}
|
||||
|
||||
if (!addr_in_use)
|
||||
|
|
@ -408,7 +373,10 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
|||
|
||||
static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
struct list_head unscheduled_list;
|
||||
|
||||
list_replace(&block->instr_list, &unscheduled_list);
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
/* schedule all the shader input's (meta-instr) first so that
|
||||
* the RA step sees that the input registers contain a value
|
||||
|
|
@ -423,31 +391,22 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
while ((instr = block->head) && !ctx->error) {
|
||||
/* NOTE: always grab next *before* trysched(), in case the
|
||||
* instruction is actually scheduled (and therefore moved
|
||||
* from depth list into scheduled list)
|
||||
*/
|
||||
struct ir3_instruction *next = instr->next;
|
||||
list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
|
||||
int cnt = trysched(ctx, instr);
|
||||
|
||||
if (cnt == DELAYED)
|
||||
cnt = block_sched_undelayed(ctx, block);
|
||||
cnt = block_sched_undelayed(ctx, &unscheduled_list);
|
||||
|
||||
/* -1 is signal to return up stack, but to us means same as 0: */
|
||||
cnt = MAX2(0, cnt);
|
||||
cnt += ctx->cnt;
|
||||
instr = next;
|
||||
|
||||
/* if deepest remaining instruction cannot be scheduled, try
|
||||
* the increasingly more shallow instructions until needed
|
||||
* number of delay slots is filled:
|
||||
*/
|
||||
while (instr && (cnt > ctx->cnt)) {
|
||||
next = instr->next;
|
||||
list_for_each_entry_safe (struct ir3_instruction, instr, &instr->node, node)
|
||||
trysched(ctx, instr);
|
||||
instr = next;
|
||||
}
|
||||
|
||||
/* and if we run out of instructions that can be scheduled,
|
||||
* then it is time for nop's:
|
||||
|
|
@ -455,9 +414,6 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
|||
while (cnt > ctx->cnt)
|
||||
schedule(ctx, ir3_NOP(block), false);
|
||||
}
|
||||
|
||||
/* at this point, scheduled list is in reverse order, so fix that: */
|
||||
block->head = reverse(ctx->scheduled);
|
||||
}
|
||||
|
||||
int ir3_block_sched(struct ir3_block *block)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue