mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 04:30:10 +01:00
freedreno/ir3: array rework
Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
cc7ed34df9
commit
fad158a0e0
9 changed files with 365 additions and 363 deletions
|
|
@ -400,9 +400,16 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return 1;
|
||||
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
|
||||
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
|
||||
/* Technically this should be the same as for TEMP/CONST, since
|
||||
* everything is just normal registers. This is just temporary
|
||||
* hack until load_input/store_output handle arrays in a similar
|
||||
* way as load_var/store_var..
|
||||
*/
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
|
||||
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
|
||||
return 1;
|
||||
/* a2xx compiler doesn't handle indirect: */
|
||||
return is_ir3(screen) ? 1 : 0;
|
||||
case PIPE_SHADER_CAP_SUBROUTINES:
|
||||
case PIPE_SHADER_CAP_DOUBLES:
|
||||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler,
|
|||
shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
|
||||
|
||||
list_inithead(&shader->block_list);
|
||||
list_inithead(&shader->array_list);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
|
@ -121,18 +122,19 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
|
|||
val.iim_val = reg->iim_val;
|
||||
} else {
|
||||
unsigned components;
|
||||
int16_t max;
|
||||
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
components = reg->size;
|
||||
val.dummy10 = reg->offset;
|
||||
val.dummy10 = reg->array.offset;
|
||||
max = (reg->array.offset + repeat + components - 1) >> 2;
|
||||
} else {
|
||||
components = util_last_bit(reg->wrmask);
|
||||
val.comp = reg->num & 0x3;
|
||||
val.num = reg->num >> 2;
|
||||
max = (reg->num + repeat + components - 1) >> 2;
|
||||
}
|
||||
|
||||
int16_t max = (reg->num + repeat + components - 1) >> 2;
|
||||
|
||||
if (reg->flags & IR3_REG_CONST) {
|
||||
info->max_const = MAX2(info->max_const, max);
|
||||
} else if (val.num == 63) {
|
||||
|
|
@ -233,7 +235,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
|
|||
iassert((instr->regs_count == 2) || (instr->regs_count == 3));
|
||||
|
||||
if (src1->flags & IR3_REG_RELATIV) {
|
||||
iassert(src1->num < (1 << 10));
|
||||
iassert(src1->array.offset < (1 << 10));
|
||||
cat2->rel1.src1 = reg(src1, info, instr->repeat,
|
||||
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
|
||||
IR3_REG_HALF | absneg);
|
||||
|
|
@ -260,7 +262,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
|
|||
!((src1->flags ^ src2->flags) & IR3_REG_HALF));
|
||||
|
||||
if (src2->flags & IR3_REG_RELATIV) {
|
||||
iassert(src2->num < (1 << 10));
|
||||
iassert(src2->array.offset < (1 << 10));
|
||||
cat2->rel2.src2 = reg(src2, info, instr->repeat,
|
||||
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
|
||||
IR3_REG_HALF | absneg);
|
||||
|
|
@ -333,7 +335,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
|
|||
iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
|
||||
|
||||
if (src1->flags & IR3_REG_RELATIV) {
|
||||
iassert(src1->num < (1 << 10));
|
||||
iassert(src1->array.offset < (1 << 10));
|
||||
cat3->rel1.src1 = reg(src1, info, instr->repeat,
|
||||
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
|
||||
IR3_REG_HALF | absneg);
|
||||
|
|
@ -361,7 +363,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
|
|||
|
||||
|
||||
if (src3->flags & IR3_REG_RELATIV) {
|
||||
iassert(src3->num < (1 << 10));
|
||||
iassert(src3->array.offset < (1 << 10));
|
||||
cat3->rel2.src3 = reg(src3, info, instr->repeat,
|
||||
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
|
||||
IR3_REG_HALF | absneg);
|
||||
|
|
@ -404,7 +406,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
|
|||
iassert(instr->regs_count == 2);
|
||||
|
||||
if (src->flags & IR3_REG_RELATIV) {
|
||||
iassert(src->num < (1 << 10));
|
||||
iassert(src->array.offset < (1 << 10));
|
||||
cat4->rel.src = reg(src, info, instr->repeat,
|
||||
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
|
||||
IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
|
||||
|
|
@ -737,6 +739,14 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
|
|||
return reg;
|
||||
}
|
||||
|
||||
struct ir3_register * ir3_reg_clone(struct ir3 *shader,
|
||||
struct ir3_register *reg)
|
||||
{
|
||||
struct ir3_register *new_reg = reg_create(shader, 0, 0);
|
||||
*new_reg = *reg;
|
||||
return new_reg;
|
||||
}
|
||||
|
||||
void
|
||||
ir3_instr_set_address(struct ir3_instruction *instr,
|
||||
struct ir3_instruction *addr)
|
||||
|
|
@ -777,3 +787,12 @@ ir3_count_instructions(struct ir3 *ir)
|
|||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
struct ir3_array *
|
||||
ir3_lookup_array(struct ir3 *ir, unsigned id)
|
||||
{
|
||||
list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
|
||||
if (arr->id == id)
|
||||
return arr;
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,8 @@ struct ir3_register {
|
|||
* before register assignment is done:
|
||||
*/
|
||||
IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
|
||||
IR3_REG_PHI_SRC= 0x4000, /* phi src, regs[0]->instr points to phi */
|
||||
IR3_REG_ARRAY = 0x4000,
|
||||
IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */
|
||||
|
||||
} flags;
|
||||
union {
|
||||
|
|
@ -97,11 +98,18 @@ struct ir3_register {
|
|||
uint32_t uim_val;
|
||||
float fim_val;
|
||||
/* relative: */
|
||||
int offset;
|
||||
struct {
|
||||
uint16_t id;
|
||||
uint16_t offset;
|
||||
} array;
|
||||
};
|
||||
|
||||
/* for IR3_REG_SSA, src registers contain ptr back to
|
||||
* assigning instruction.
|
||||
/* For IR3_REG_SSA, src registers contain ptr back to assigning
|
||||
* instruction.
|
||||
*
|
||||
* For IR3_REG_ARRAY, the pointer is back to the last dependent
|
||||
* array access (although the net effect is the same, it points
|
||||
* back to a previous instruction that we depend on).
|
||||
*/
|
||||
struct ir3_instruction *instr;
|
||||
|
||||
|
|
@ -221,9 +229,6 @@ struct ir3_instruction {
|
|||
struct {
|
||||
int off; /* component/offset */
|
||||
} fo;
|
||||
struct {
|
||||
int aid;
|
||||
} fi;
|
||||
struct {
|
||||
/* used to temporarily hold reference to nir_phi_instr
|
||||
* until we resolve the phi srcs
|
||||
|
|
@ -293,19 +298,6 @@ struct ir3_instruction {
|
|||
*/
|
||||
struct ir3_instruction *address;
|
||||
|
||||
/* in case of a instruction with relative dst instruction, we need to
|
||||
* capture the dependency on the fanin for the previous values of
|
||||
* the array elements. Since we don't know at compile time actually
|
||||
* which array elements are written, this serves to preserve the
|
||||
* unconditional write to array elements prior to the conditional
|
||||
* write.
|
||||
*
|
||||
* TODO only cat1 can do indirect write.. we could maybe move this
|
||||
* into instr->cat1.fanin (but would require the frontend to insert
|
||||
* the extra mov)
|
||||
*/
|
||||
struct ir3_instruction *fanin;
|
||||
|
||||
/* Entry in ir3_block's instruction list: */
|
||||
struct list_head node;
|
||||
|
||||
|
|
@ -379,10 +371,39 @@ struct ir3 {
|
|||
/* List of blocks: */
|
||||
struct list_head block_list;
|
||||
|
||||
/* List of ir3_array's: */
|
||||
struct list_head array_list;
|
||||
|
||||
unsigned heap_idx;
|
||||
struct ir3_heap_chunk *chunk;
|
||||
};
|
||||
|
||||
typedef struct nir_variable nir_variable;
|
||||
|
||||
struct ir3_array {
|
||||
struct list_head node;
|
||||
unsigned length;
|
||||
unsigned id;
|
||||
|
||||
nir_variable *var;
|
||||
|
||||
/* We track the last write and last access (read or write) to
|
||||
* setup dependencies on instructions that read or write the
|
||||
* array. Reads can be re-ordered wrt. other reads, but should
|
||||
* not be re-ordered wrt. to writes. Writes cannot be reordered
|
||||
* wrt. any other access to the array.
|
||||
*
|
||||
* So array reads depend on last write, and array writes depend
|
||||
* on the last access.
|
||||
*/
|
||||
struct ir3_instruction *last_write, *last_access;
|
||||
|
||||
/* extra stuff used in RA pass: */
|
||||
unsigned base;
|
||||
};
|
||||
|
||||
struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
|
||||
|
||||
typedef struct nir_block nir_block;
|
||||
|
||||
struct ir3_block {
|
||||
|
|
@ -430,6 +451,8 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
|
|||
|
||||
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
|
||||
int num, int flags);
|
||||
struct ir3_register * ir3_reg_clone(struct ir3 *shader,
|
||||
struct ir3_register *reg);
|
||||
|
||||
void ir3_instr_set_address(struct ir3_instruction *instr,
|
||||
struct ir3_instruction *addr);
|
||||
|
|
@ -510,6 +533,9 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
|
|||
if (dst->num == regid(REG_A0, 0))
|
||||
return false;
|
||||
|
||||
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
|
||||
return false;
|
||||
|
||||
if ((instr->category == 1) &&
|
||||
(instr->cat1.src_type == instr->cat1.dst_type))
|
||||
return true;
|
||||
|
|
@ -623,8 +649,10 @@ static inline bool writes_pred(struct ir3_instruction *instr)
|
|||
/* TODO better name */
|
||||
static inline struct ir3_instruction *ssa(struct ir3_register *reg)
|
||||
{
|
||||
if (reg->flags & IR3_REG_SSA)
|
||||
if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
|
||||
debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
|
||||
return reg->instr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -813,8 +841,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
|
|||
|
||||
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->fanin)
|
||||
return instr->regs_count + 2;
|
||||
if (instr->address)
|
||||
return instr->regs_count + 1;
|
||||
return instr->regs_count;
|
||||
|
|
@ -822,8 +848,6 @@ static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
|
|||
|
||||
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
|
||||
{
|
||||
if (n == (instr->regs_count + 1))
|
||||
return instr->fanin;
|
||||
if (n == (instr->regs_count + 0))
|
||||
return instr->address;
|
||||
return ssa(instr->regs[n]);
|
||||
|
|
@ -834,8 +858,8 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
|
|||
/* iterator for an instruction's SSA sources (instr), also returns src #: */
|
||||
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
|
||||
if ((__instr)->regs_count) \
|
||||
for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
|
||||
if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
|
||||
for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
|
||||
if ((__srcinst = __ssa_src_n(__instr, __n)))
|
||||
|
||||
/* iterator for an instruction's SSA sources (instr): */
|
||||
#define foreach_ssa_src(__srcinst, __instr) \
|
||||
|
|
@ -878,7 +902,15 @@ ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
|
|||
struct ir3_instruction *instr =
|
||||
ir3_instr_create(block, 1, 0);
|
||||
ir3_reg_create(instr, 0, 0); /* dst */
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
|
||||
if (src->regs[0]->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_register *src_reg =
|
||||
ir3_reg_create(instr, 0, IR3_REG_ARRAY);
|
||||
src_reg->array = src->regs[0]->array;
|
||||
src_reg->instr = src;
|
||||
} else {
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
|
||||
}
|
||||
debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
|
||||
instr->cat1.src_type = type;
|
||||
instr->cat1.dst_type = type;
|
||||
return instr;
|
||||
|
|
@ -894,6 +926,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
|
|||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
|
||||
instr->cat1.src_type = src_type;
|
||||
instr->cat1.dst_type = dst_type;
|
||||
debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
|
||||
return instr;
|
||||
}
|
||||
|
||||
|
|
@ -1083,7 +1116,7 @@ typedef uint8_t regmask_t[2 * MAX_REG / 8];
|
|||
|
||||
static inline unsigned regmask_idx(struct ir3_register *reg)
|
||||
{
|
||||
unsigned num = reg->num;
|
||||
unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
|
||||
debug_assert(num < MAX_REG);
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
num += MAX_REG;
|
||||
|
|
|
|||
|
|
@ -74,8 +74,6 @@ struct ir3_compile {
|
|||
/* mapping from nir_register to defining instruction: */
|
||||
struct hash_table *def_ht;
|
||||
|
||||
/* mapping from nir_variable to ir3_array: */
|
||||
struct hash_table *var_ht;
|
||||
unsigned num_arrays;
|
||||
|
||||
/* a common pattern for indirect addressing is to request the
|
||||
|
|
@ -142,8 +140,6 @@ compile_init(struct ir3_compiler *compiler,
|
|||
ctx->so = so;
|
||||
ctx->def_ht = _mesa_hash_table_create(ctx,
|
||||
_mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
ctx->var_ht = _mesa_hash_table_create(ctx,
|
||||
_mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
ctx->block_ht = _mesa_hash_table_create(ctx,
|
||||
_mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
|
||||
|
|
@ -220,206 +216,26 @@ compile_free(struct ir3_compile *ctx)
|
|||
ralloc_free(ctx);
|
||||
}
|
||||
|
||||
/* global per-array information: */
|
||||
struct ir3_array {
|
||||
unsigned length, aid;
|
||||
};
|
||||
|
||||
/* per-block array state: */
|
||||
struct ir3_array_value {
|
||||
/* TODO drop length/aid, and just have ptr back to ir3_array */
|
||||
unsigned length, aid;
|
||||
/* initial array element values are phi's, other than for the
|
||||
* entry block. The phi src's get added later in a resolve step
|
||||
* after we have visited all the blocks, to account for back
|
||||
* edges in the cfg.
|
||||
*/
|
||||
struct ir3_instruction **phis;
|
||||
/* current array element values (as block is processed). When
|
||||
* the array phi's are resolved, it will contain the array state
|
||||
* at exit of block, so successor blocks can use it to add their
|
||||
* phi srcs.
|
||||
*/
|
||||
struct ir3_instruction *arr[];
|
||||
};
|
||||
|
||||
/* track array assignments per basic block. When an array is read
|
||||
* outside of the same basic block, we can use NIR's dominance-frontier
|
||||
* information to figure out where phi nodes are needed.
|
||||
*/
|
||||
struct ir3_nir_block_data {
|
||||
unsigned foo;
|
||||
/* indexed by array-id (aid): */
|
||||
struct ir3_array_value *arrs[];
|
||||
};
|
||||
|
||||
static struct ir3_nir_block_data *
|
||||
get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
|
||||
{
|
||||
if (!block->data) {
|
||||
struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
|
||||
((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
|
||||
block->data = bd;
|
||||
}
|
||||
return block->data;
|
||||
}
|
||||
|
||||
static void
|
||||
declare_var(struct ir3_compile *ctx, nir_variable *var)
|
||||
{
|
||||
unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */
|
||||
struct ir3_array *arr = ralloc(ctx, struct ir3_array);
|
||||
arr->id = ++ctx->num_arrays;
|
||||
arr->length = length;
|
||||
arr->aid = ++ctx->num_arrays;
|
||||
_mesa_hash_table_insert(ctx->var_ht, var, arr);
|
||||
arr->var = var;
|
||||
list_addtail(&arr->node, &ctx->ir->array_list);
|
||||
}
|
||||
|
||||
static nir_block *
|
||||
nir_block_pred(nir_block *block)
|
||||
{
|
||||
assert(block->predecessors->entries < 2);
|
||||
if (block->predecessors->entries == 0)
|
||||
return NULL;
|
||||
return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
|
||||
}
|
||||
|
||||
static struct ir3_array_value *
|
||||
static struct ir3_array *
|
||||
get_var(struct ir3_compile *ctx, nir_variable *var)
|
||||
{
|
||||
struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3_nir_block_data *bd = get_block_data(ctx, block);
|
||||
struct ir3_array *arr = entry->data;
|
||||
|
||||
if (!bd->arrs[arr->aid]) {
|
||||
struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
|
||||
(arr->length * sizeof(av->arr[0])));
|
||||
struct ir3_array_value *defn = NULL;
|
||||
nir_block *pred_block;
|
||||
|
||||
av->length = arr->length;
|
||||
av->aid = arr->aid;
|
||||
|
||||
/* For loops, we have to consider that we have not visited some
|
||||
* of the blocks who should feed into the phi (ie. back-edges in
|
||||
* the cfg).. for example:
|
||||
*
|
||||
* loop {
|
||||
* block { load_var; ... }
|
||||
* if then block {} else block {}
|
||||
* block { store_var; ... }
|
||||
* if then block {} else block {}
|
||||
* block {...}
|
||||
* }
|
||||
*
|
||||
* We can skip the phi if we can chase the block predecessors
|
||||
* until finding the block previously defining the array without
|
||||
* crossing a block that has more than one predecessor.
|
||||
*
|
||||
* Otherwise create phi's and resolve them as a post-pass after
|
||||
* all the blocks have been visited (to handle back-edges).
|
||||
*/
|
||||
|
||||
for (pred_block = block->nblock;
|
||||
pred_block && (pred_block->predecessors->entries < 2) && !defn;
|
||||
pred_block = nir_block_pred(pred_block)) {
|
||||
struct ir3_block *pblock = get_block(ctx, pred_block);
|
||||
struct ir3_nir_block_data *pbd = pblock->data;
|
||||
if (!pbd)
|
||||
continue;
|
||||
defn = pbd->arrs[arr->aid];
|
||||
}
|
||||
|
||||
if (defn) {
|
||||
/* only one possible definer: */
|
||||
for (unsigned i = 0; i < arr->length; i++)
|
||||
av->arr[i] = defn->arr[i];
|
||||
} else if (pred_block) {
|
||||
/* not the first block, and multiple potential definers: */
|
||||
av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
|
||||
|
||||
for (unsigned i = 0; i < arr->length; i++) {
|
||||
struct ir3_instruction *phi;
|
||||
|
||||
phi = ir3_instr_create2(block, -1, OPC_META_PHI,
|
||||
1 + ctx->impl->num_blocks);
|
||||
ir3_reg_create(phi, 0, 0); /* dst */
|
||||
|
||||
/* phi's should go at head of block: */
|
||||
list_delinit(&phi->node);
|
||||
list_add(&phi->node, &block->instr_list);
|
||||
|
||||
av->phis[i] = av->arr[i] = phi;
|
||||
}
|
||||
} else {
|
||||
/* Some shaders end up reading array elements without
|
||||
* first writing.. so initialize things to prevent null
|
||||
* instr ptrs later:
|
||||
*/
|
||||
for (unsigned i = 0; i < arr->length; i++)
|
||||
av->arr[i] = create_immed(block, 0);
|
||||
}
|
||||
|
||||
bd->arrs[arr->aid] = av;
|
||||
}
|
||||
|
||||
return bd->arrs[arr->aid];
|
||||
}
|
||||
|
||||
static void
|
||||
add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
|
||||
struct ir3_array_value *av, BITSET_WORD *visited)
|
||||
{
|
||||
struct ir3_block *block;
|
||||
struct ir3_nir_block_data *bd;
|
||||
|
||||
if (BITSET_TEST(visited, nblock->index))
|
||||
return;
|
||||
|
||||
BITSET_SET(visited, nblock->index);
|
||||
|
||||
block = get_block(ctx, nblock);
|
||||
bd = block->data;
|
||||
|
||||
if (bd && bd->arrs[av->aid]) {
|
||||
struct ir3_array_value *dav = bd->arrs[av->aid];
|
||||
for (unsigned i = 0; i < av->length; i++) {
|
||||
ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
|
||||
dav->arr[i];
|
||||
}
|
||||
} else {
|
||||
/* didn't find defn, recurse predecessors: */
|
||||
struct set_entry *entry;
|
||||
set_foreach(nblock->predecessors, entry) {
|
||||
add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
|
||||
{
|
||||
struct ir3_nir_block_data *bd = block->data;
|
||||
unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
|
||||
|
||||
if (!bd)
|
||||
return;
|
||||
|
||||
/* TODO use nir dom_frontier to help us with this? */
|
||||
|
||||
for (unsigned i = 1; i <= ctx->num_arrays; i++) {
|
||||
struct ir3_array_value *av = bd->arrs[i];
|
||||
BITSET_WORD visited[bitset_words];
|
||||
struct set_entry *entry;
|
||||
|
||||
if (!(av && av->phis))
|
||||
continue;
|
||||
|
||||
memset(visited, 0, sizeof(visited));
|
||||
set_foreach(block->nblock->predecessors, entry) {
|
||||
add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
|
||||
}
|
||||
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
|
||||
if (arr->var == var)
|
||||
return arr;
|
||||
}
|
||||
compile_error(ctx, "bogus var: %s\n", var->name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* allocate a n element value array (to be populated by caller) and
|
||||
|
|
@ -437,6 +253,7 @@ __get_dst(struct ir3_compile *ctx, void *key, unsigned n)
|
|||
static struct ir3_instruction **
|
||||
get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
|
||||
{
|
||||
compile_assert(ctx, dst->is_ssa);
|
||||
if (dst->is_ssa) {
|
||||
return __get_dst(ctx, &dst->ssa, n);
|
||||
} else {
|
||||
|
|
@ -454,6 +271,7 @@ static struct ir3_instruction **
|
|||
get_src(struct ir3_compile *ctx, nir_src *src)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
compile_assert(ctx, src->is_ssa);
|
||||
if (src->is_ssa) {
|
||||
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
|
||||
} else {
|
||||
|
|
@ -568,7 +386,7 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
|
|||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
ir3_reg_create(mov, 0, 0);
|
||||
ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
|
||||
ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
|
||||
|
||||
ir3_instr_set_address(mov, address);
|
||||
|
||||
|
|
@ -607,17 +425,45 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
|
|||
src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
|
||||
src->instr = collect;
|
||||
src->size = arrsz;
|
||||
src->offset = n;
|
||||
src->array.offset = n;
|
||||
|
||||
ir3_instr_set_address(mov, address);
|
||||
|
||||
return mov;
|
||||
}
|
||||
|
||||
/* relative (indirect) if address!=NULL */
|
||||
static struct ir3_instruction *
|
||||
create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
|
||||
struct ir3_instruction *src, struct ir3_instruction *address,
|
||||
struct ir3_instruction *collect)
|
||||
create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
|
||||
struct ir3_instruction *address)
|
||||
{
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3_instruction *mov;
|
||||
struct ir3_register *src;
|
||||
|
||||
mov = ir3_instr_create(block, 1, 0);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
ir3_reg_create(mov, 0, 0);
|
||||
src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
|
||||
COND(address, IR3_REG_RELATIV));
|
||||
src->instr = arr->last_write;
|
||||
src->size = arr->length;
|
||||
src->array.id = arr->id;
|
||||
src->array.offset = n;
|
||||
|
||||
if (address)
|
||||
ir3_instr_set_address(mov, address);
|
||||
|
||||
arr->last_access = mov;
|
||||
|
||||
return mov;
|
||||
}
|
||||
|
||||
/* relative (indirect) if address!=NULL */
|
||||
static struct ir3_instruction *
|
||||
create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
|
||||
struct ir3_instruction *src, struct ir3_instruction *address)
|
||||
{
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3_instruction *mov;
|
||||
|
|
@ -626,14 +472,18 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
|
|||
mov = ir3_instr_create(block, 1, 0);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV);
|
||||
dst->size = arrsz;
|
||||
dst->offset = n;
|
||||
dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
|
||||
COND(address, IR3_REG_RELATIV));
|
||||
dst->instr = arr->last_access;
|
||||
dst->size = arr->length;
|
||||
dst->array.id = arr->id;
|
||||
dst->array.offset = n;
|
||||
ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
|
||||
mov->fanin = collect;
|
||||
|
||||
ir3_instr_set_address(mov, address);
|
||||
|
||||
arr->last_write = arr->last_access = mov;
|
||||
|
||||
return mov;
|
||||
}
|
||||
|
||||
|
|
@ -1198,7 +1048,7 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
|||
{
|
||||
nir_deref_var *dvar = intr->variables[0];
|
||||
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
|
||||
struct ir3_array_value *arr = get_var(ctx, dvar->var);
|
||||
struct ir3_array *arr = get_var(ctx, dvar->var);
|
||||
|
||||
compile_assert(ctx, dvar->deref.child &&
|
||||
(dvar->deref.child->deref_type == nir_deref_type_array));
|
||||
|
|
@ -1209,19 +1059,17 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
|||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
dst[i] = arr->arr[n];
|
||||
dst[i] = create_var_load(ctx, arr, n, NULL);
|
||||
}
|
||||
break;
|
||||
case nir_deref_array_type_indirect: {
|
||||
/* for indirect, we need to collect all the array elements: */
|
||||
struct ir3_instruction *collect =
|
||||
create_collect(ctx->block, arr->arr, arr->length);
|
||||
struct ir3_instruction *addr =
|
||||
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect);
|
||||
dst[i] = create_var_load(ctx, arr, n, addr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -1238,8 +1086,9 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
{
|
||||
nir_deref_var *dvar = intr->variables[0];
|
||||
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
|
||||
struct ir3_array_value *arr = get_var(ctx, dvar->var);
|
||||
struct ir3_instruction **src;
|
||||
struct ir3_array *arr = get_var(ctx, dvar->var);
|
||||
struct ir3_instruction *addr, **src;
|
||||
unsigned wrmask = intr->const_index[0];
|
||||
|
||||
compile_assert(ctx, dvar->deref.child &&
|
||||
(dvar->deref.child->deref_type == nir_deref_type_array));
|
||||
|
|
@ -1248,66 +1097,24 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
switch (darr->deref_array_type) {
|
||||
case nir_deref_array_type_direct:
|
||||
/* direct access does not require anything special: */
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
/* ttn doesn't generate partial writemasks */
|
||||
assert(intr->const_index[0] ==
|
||||
(1 << intr->num_components) - 1);
|
||||
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
arr->arr[n] = src[i];
|
||||
}
|
||||
addr = NULL;
|
||||
break;
|
||||
case nir_deref_array_type_indirect: {
|
||||
/* for indirect, create indirect-store and fan that out: */
|
||||
struct ir3_instruction *collect =
|
||||
create_collect(ctx->block, arr->arr, arr->length);
|
||||
struct ir3_instruction *addr =
|
||||
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
/* ttn doesn't generate partial writemasks */
|
||||
assert(intr->const_index[0] ==
|
||||
(1 << intr->num_components) - 1);
|
||||
|
||||
struct ir3_instruction *store;
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
|
||||
store = create_indirect_store(ctx, arr->length,
|
||||
n, src[i], addr, collect);
|
||||
|
||||
store->fanin->fi.aid = arr->aid;
|
||||
|
||||
/* TODO: probably split this out to be used for
|
||||
* store_output_indirect? or move this into
|
||||
* create_indirect_store()?
|
||||
*/
|
||||
for (int j = i; j < arr->length; j += intr->num_components) {
|
||||
struct ir3_instruction *split;
|
||||
|
||||
split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
|
||||
split->fo.off = j;
|
||||
ir3_reg_create(split, 0, 0);
|
||||
ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store;
|
||||
|
||||
arr->arr[j] = split;
|
||||
}
|
||||
}
|
||||
/* fixup fanout/split neighbors: */
|
||||
for (int i = 0; i < arr->length; i++) {
|
||||
arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
|
||||
arr->arr[i+1] : NULL;
|
||||
arr->arr[i]->cp.left = (i > 0) ?
|
||||
arr->arr[i-1] : NULL;
|
||||
}
|
||||
case nir_deref_array_type_indirect:
|
||||
addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
compile_error(ctx, "Unhandled store deref type: %u\n",
|
||||
darr->deref_array_type);
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
if (!(wrmask & (1 << i)))
|
||||
continue;
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
create_var_store(ctx, arr, n, src[i], addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
||||
|
|
@ -1835,8 +1642,6 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
|
|||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
|
||||
}
|
||||
}
|
||||
|
||||
resolve_array_phis(ctx, block);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -202,6 +202,7 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
|
|||
*dstflags |= srcflags & IR3_REG_CONST;
|
||||
*dstflags |= srcflags & IR3_REG_IMMED;
|
||||
*dstflags |= srcflags & IR3_REG_RELATIV;
|
||||
*dstflags |= srcflags & IR3_REG_ARRAY;
|
||||
}
|
||||
|
||||
/* the "plain" MAD's (ie. the ones that don't shift first src prior to
|
||||
|
|
@ -233,6 +234,10 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
combine_flags(&new_flags, src_reg->flags);
|
||||
|
||||
if (valid_flags(instr, n, new_flags)) {
|
||||
if (new_flags & IR3_REG_ARRAY) {
|
||||
debug_assert(!(reg->flags & IR3_REG_ARRAY));
|
||||
reg->array = src_reg->array;
|
||||
}
|
||||
reg->flags = new_flags;
|
||||
reg->instr = ssa(src_reg);
|
||||
}
|
||||
|
|
@ -283,6 +288,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
conflicts(instr->address, reg->instr->address))
|
||||
return;
|
||||
|
||||
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
|
||||
src_reg->flags = new_flags;
|
||||
instr->regs[n+1] = src_reg;
|
||||
|
||||
|
|
@ -294,6 +300,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
|
||||
if ((src_reg->flags & IR3_REG_RELATIV) &&
|
||||
!conflicts(instr->address, reg->instr->address)) {
|
||||
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
|
||||
src_reg->flags = new_flags;
|
||||
instr->regs[n+1] = src_reg;
|
||||
ir3_instr_set_address(instr, reg->instr->address);
|
||||
|
|
@ -329,6 +336,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
/* other than category 1 (mov) we can only encode up to 10 bits: */
|
||||
if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
|
||||
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
|
||||
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
|
||||
src_reg->flags = new_flags;
|
||||
src_reg->iim_val = iim_val;
|
||||
instr->regs[n+1] = src_reg;
|
||||
|
|
@ -349,9 +357,11 @@ eliminate_output_mov(struct ir3_instruction *instr)
|
|||
{
|
||||
if (is_eligible_mov(instr, false)) {
|
||||
struct ir3_register *reg = instr->regs[1];
|
||||
struct ir3_instruction *src_instr = ssa(reg);
|
||||
debug_assert(src_instr);
|
||||
return src_instr;
|
||||
if (!(reg->flags & IR3_REG_ARRAY)) {
|
||||
struct ir3_instruction *src_instr = ssa(reg);
|
||||
debug_assert(src_instr);
|
||||
return src_instr;
|
||||
}
|
||||
}
|
||||
return instr;
|
||||
}
|
||||
|
|
@ -379,9 +389,22 @@ instr_cp(struct ir3_instruction *instr)
|
|||
continue;
|
||||
|
||||
instr_cp(src);
|
||||
|
||||
/* TODO non-indirect access we could figure out which register
|
||||
* we actually want and allow cp..
|
||||
*/
|
||||
if (reg->flags & IR3_REG_ARRAY)
|
||||
continue;
|
||||
|
||||
reg_cp(instr, reg, n);
|
||||
}
|
||||
|
||||
if (instr->regs[0]->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_instruction *src = ssa(instr->regs[0]);
|
||||
if (src)
|
||||
instr_cp(src);
|
||||
}
|
||||
|
||||
if (instr->address) {
|
||||
instr_cp(instr->address);
|
||||
ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
|
||||
|
|
|
|||
|
|
@ -118,6 +118,10 @@ ir3_instr_depth(struct ir3_instruction *instr)
|
|||
/* visit child to compute it's depth: */
|
||||
ir3_instr_depth(src);
|
||||
|
||||
/* for array writes, no need to delay on previous write: */
|
||||
if (i == 0)
|
||||
continue;
|
||||
|
||||
sd = ir3_delayslots(src, instr, i) + src->depth;
|
||||
|
||||
instr->depth = MAX2(instr->depth, sd);
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ static void print_instr_name(struct ir3_instruction *instr)
|
|||
}
|
||||
}
|
||||
|
||||
static void print_reg_name(struct ir3_register *reg, bool followssa)
|
||||
static void print_reg_name(struct ir3_register *reg)
|
||||
{
|
||||
if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
|
||||
(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
|
||||
|
|
@ -106,20 +106,29 @@ static void print_reg_name(struct ir3_register *reg, bool followssa)
|
|||
|
||||
if (reg->flags & IR3_REG_IMMED) {
|
||||
printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
|
||||
} else if (reg->flags & IR3_REG_SSA) {
|
||||
printf("_");
|
||||
if (followssa) {
|
||||
printf("[");
|
||||
} else if (reg->flags & IR3_REG_ARRAY) {
|
||||
printf("arr[id=%u, offset=%u, size=%u", reg->array.id,
|
||||
reg->array.offset, reg->size);
|
||||
/* for ARRAY we could have null src, for example first write
|
||||
* instruction..
|
||||
*/
|
||||
if (reg->instr) {
|
||||
printf(", _[");
|
||||
print_instr_name(reg->instr);
|
||||
printf("]");
|
||||
}
|
||||
printf("]");
|
||||
} else if (reg->flags & IR3_REG_SSA) {
|
||||
printf("_[");
|
||||
print_instr_name(reg->instr);
|
||||
printf("]");
|
||||
} else if (reg->flags & IR3_REG_RELATIV) {
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
printf("h");
|
||||
if (reg->flags & IR3_REG_CONST)
|
||||
printf("c<a0.x + %u>", reg->num);
|
||||
printf("c<a0.x + %u>", reg->array.offset);
|
||||
else
|
||||
printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
|
||||
printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->array.offset, reg->size);
|
||||
} else {
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
printf("h");
|
||||
|
|
@ -158,7 +167,7 @@ print_instr(struct ir3_instruction *instr, int lvl)
|
|||
for (i = 0; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
printf(i ? ", " : " ");
|
||||
print_reg_name(reg, !!i);
|
||||
print_reg_name(reg);
|
||||
}
|
||||
|
||||
if (instr->address) {
|
||||
|
|
@ -168,13 +177,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
|
|||
printf("]");
|
||||
}
|
||||
|
||||
if (instr->fanin) {
|
||||
printf(", fanin=_");
|
||||
printf("[");
|
||||
print_instr_name(instr->fanin);
|
||||
printf("]");
|
||||
}
|
||||
|
||||
if (instr->cp.left) {
|
||||
printf(", left=_");
|
||||
printf("[");
|
||||
|
|
@ -192,8 +194,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
|
|||
if (is_meta(instr)) {
|
||||
if (instr->opc == OPC_META_FO) {
|
||||
printf(", off=%d", instr->fo.off);
|
||||
} else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
|
||||
printf(", aid=%d", instr->fi.aid);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -68,25 +68,24 @@
|
|||
* LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
|
||||
* register assignment. But for us that is horrible from a scheduling
|
||||
* standpoint. Instead what we do is use idea of 'definer' instruction.
|
||||
* Ie. the first instruction (lowest ip) to write to the array is the
|
||||
* Ie. the first instruction (lowest ip) to write to the variable is the
|
||||
* one we consider from use/def perspective when building interference
|
||||
* graph. (Other instructions which write other array elements just
|
||||
* define the variable some more.)
|
||||
* graph. (Other instructions which write other variable components
|
||||
* just define the variable some more.)
|
||||
*
|
||||
* Arrays of arbitrary size are handled via pre-coloring a consecutive
|
||||
* sequence of registers. Additional scalar (single component) reg
|
||||
* names are allocated starting at ctx->class_base[total_class_count]
|
||||
* (see arr->base), which are pre-colored. In the use/def graph direct
|
||||
* access is treated as a single element use/def, and indirect access
|
||||
* is treated as use or def of all array elements. (Only the first
|
||||
* def is tracked, in case of multiple indirect writes, etc.)
|
||||
*/
|
||||
|
||||
static const unsigned class_sizes[] = {
|
||||
1, 2, 3, 4,
|
||||
4 + 4, /* txd + 1d/2d */
|
||||
4 + 6, /* txd + 3d */
|
||||
/* temporary: until we can assign arrays, create classes so we
|
||||
* can round up array to fit. NOTE with tgsi arrays should
|
||||
* really all be multiples of four:
|
||||
*/
|
||||
4 * 4,
|
||||
4 * 8,
|
||||
4 * 16,
|
||||
4 * 32,
|
||||
|
||||
};
|
||||
#define class_count ARRAY_SIZE(class_sizes)
|
||||
|
||||
|
|
@ -265,8 +264,9 @@ struct ir3_ra_ctx {
|
|||
struct ir3_ra_reg_set *set;
|
||||
struct ra_graph *g;
|
||||
unsigned alloc_count;
|
||||
unsigned class_alloc_count[total_class_count];
|
||||
unsigned class_base[total_class_count];
|
||||
/* one per class, plus one slot for arrays: */
|
||||
unsigned class_alloc_count[total_class_count + 1];
|
||||
unsigned class_base[total_class_count + 1];
|
||||
unsigned instr_cnt;
|
||||
unsigned *def, *use; /* def/use table */
|
||||
struct ir3_ra_instr_data *instrd;
|
||||
|
|
@ -329,9 +329,6 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
|||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
struct ir3_instruction *d = NULL;
|
||||
|
||||
if (instr->fanin)
|
||||
return get_definer(ctx, instr->fanin, sz, off);
|
||||
|
||||
if (id->defn) {
|
||||
*sz = id->sz;
|
||||
*off = id->off;
|
||||
|
|
@ -485,10 +482,13 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
/* couple special cases: */
|
||||
if (writes_addr(instr) || writes_pred(instr)) {
|
||||
id->cls = -1;
|
||||
continue;
|
||||
} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
|
||||
id->cls = total_class_count;
|
||||
id->defn = instr;
|
||||
} else {
|
||||
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
|
||||
id->cls = size_to_class(id->sz, is_half(id->defn));
|
||||
}
|
||||
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
|
||||
id->cls = size_to_class(id->sz, is_half(id->defn));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -518,8 +518,6 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
|
||||
/* arrays which don't fit in one of the pre-defined class
|
||||
* sizes are pre-colored:
|
||||
*
|
||||
* TODO but we still need to allocate names for them, don't we??
|
||||
*/
|
||||
if (id->cls >= 0) {
|
||||
instr->name = ctx->class_alloc_count[id->cls]++;
|
||||
|
|
@ -531,7 +529,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
static void
|
||||
ra_init(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
unsigned n;
|
||||
unsigned n, base;
|
||||
|
||||
ir3_clear_mark(ctx->ir);
|
||||
n = ir3_count_instructions(ctx->ir);
|
||||
|
|
@ -550,11 +548,20 @@ ra_init(struct ir3_ra_ctx *ctx)
|
|||
* actual ra name is class_base[cls] + instr->name;
|
||||
*/
|
||||
ctx->class_base[0] = 0;
|
||||
for (unsigned i = 1; i < total_class_count; i++) {
|
||||
for (unsigned i = 1; i <= total_class_count; i++) {
|
||||
ctx->class_base[i] = ctx->class_base[i-1] +
|
||||
ctx->class_alloc_count[i-1];
|
||||
}
|
||||
|
||||
/* and vreg names for array elements: */
|
||||
base = ctx->class_base[total_class_count];
|
||||
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
|
||||
arr->base = base;
|
||||
ctx->class_alloc_count[total_class_count] += arr->length;
|
||||
base += arr->length;
|
||||
}
|
||||
ctx->alloc_count += ctx->class_alloc_count[total_class_count];
|
||||
|
||||
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
|
||||
ralloc_steal(ctx->g, ctx->instrd);
|
||||
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
|
||||
|
|
@ -566,6 +573,7 @@ __ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
|
|||
{
|
||||
unsigned name;
|
||||
debug_assert(cls >= 0);
|
||||
debug_assert(cls < total_class_count); /* we shouldn't get arrays here.. */
|
||||
name = ctx->class_base[cls] + defn->name;
|
||||
debug_assert(name < ctx->alloc_count);
|
||||
return name;
|
||||
|
|
@ -590,6 +598,22 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
struct ir3_ra_block_data *bd;
|
||||
unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
|
||||
|
||||
void def(unsigned name, struct ir3_instruction *instr)
|
||||
{
|
||||
/* defined on first write: */
|
||||
if (!ctx->def[name])
|
||||
ctx->def[name] = instr->ip;
|
||||
ctx->use[name] = instr->ip;
|
||||
BITSET_SET(bd->def, name);
|
||||
}
|
||||
|
||||
void use(unsigned name, struct ir3_instruction *instr)
|
||||
{
|
||||
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
|
||||
if (!BITSET_TEST(bd->def, name))
|
||||
BITSET_SET(bd->use, name);
|
||||
}
|
||||
|
||||
bd = rzalloc(ctx->g, struct ir3_ra_block_data);
|
||||
|
||||
bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words);
|
||||
|
|
@ -601,6 +625,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
struct ir3_instruction *src;
|
||||
struct ir3_register *reg;
|
||||
|
||||
if (instr->regs_count == 0)
|
||||
continue;
|
||||
|
|
@ -632,17 +657,45 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
|
||||
if (writes_gpr(instr)) {
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
|
||||
if (id->defn == instr) {
|
||||
if (dst->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr =
|
||||
ir3_lookup_array(ctx->ir, dst->array.id);
|
||||
unsigned i;
|
||||
|
||||
debug_assert(!(dst->flags & IR3_REG_PHI_SRC));
|
||||
|
||||
/* set the node class now.. in case we don't encounter
|
||||
* this array dst again. From register_alloc algo's
|
||||
* perspective, these are all single/scalar regs:
|
||||
*/
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
unsigned name = arr->base + i;
|
||||
ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
|
||||
}
|
||||
|
||||
/* indirect write is treated like a write to all array
|
||||
* elements, since we don't know which one is actually
|
||||
* written:
|
||||
*/
|
||||
if (dst->flags & IR3_REG_RELATIV) {
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
unsigned name = arr->base + i;
|
||||
def(name, instr);
|
||||
}
|
||||
} else {
|
||||
unsigned name = arr->base + dst->array.offset;
|
||||
def(name, instr);
|
||||
}
|
||||
|
||||
} else if (id->defn == instr) {
|
||||
unsigned name = ra_name(ctx, id);
|
||||
|
||||
ctx->def[name] = id->defn->ip;
|
||||
ctx->use[name] = id->defn->ip;
|
||||
|
||||
/* since we are in SSA at this point: */
|
||||
debug_assert(!BITSET_TEST(bd->use, name));
|
||||
|
||||
BITSET_SET(bd->def, name);
|
||||
def(name, id->defn);
|
||||
|
||||
if (is_half(id->defn)) {
|
||||
ra_set_node_class(ctx->g, name,
|
||||
|
|
@ -672,12 +725,28 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
foreach_ssa_src(src, instr) {
|
||||
if (writes_gpr(src)) {
|
||||
foreach_src(reg, instr) {
|
||||
if (reg->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr =
|
||||
ir3_lookup_array(ctx->ir, reg->array.id);
|
||||
/* indirect read is treated like a read fromall array
|
||||
* elements, since we don't know which one is actually
|
||||
* read:
|
||||
*/
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
unsigned i;
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
unsigned name = arr->base + i;
|
||||
use(name, instr);
|
||||
}
|
||||
} else {
|
||||
unsigned name = arr->base + reg->array.offset;
|
||||
use(name, instr);
|
||||
debug_assert(reg->array.offset < arr->length);
|
||||
}
|
||||
} else if ((src = ssa(reg)) && writes_gpr(src)) {
|
||||
unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
|
||||
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
|
||||
if (!BITSET_TEST(bd->def, name))
|
||||
BITSET_SET(bd->use, name);
|
||||
use(name, instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -830,18 +899,36 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
|
|||
}
|
||||
}
|
||||
|
||||
/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
|
||||
* array access(es) which do not have any previous access to depend
|
||||
* on from scheduling point of view
|
||||
*/
|
||||
static void
|
||||
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
if (id->defn) {
|
||||
struct ir3_ra_instr_data *id;
|
||||
|
||||
if (reg->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr =
|
||||
ir3_lookup_array(ctx->ir, reg->array.id);
|
||||
unsigned name = arr->base + reg->array.offset;
|
||||
unsigned r = ra_get_node_reg(ctx->g, name);
|
||||
unsigned num = ctx->set->ra_reg_to_gpr[r];
|
||||
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
reg->array.offset = num;
|
||||
} else {
|
||||
reg->num = num;
|
||||
}
|
||||
|
||||
reg->flags &= ~IR3_REG_ARRAY;
|
||||
} else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
|
||||
unsigned name = ra_name(ctx, id);
|
||||
unsigned r = ra_get_node_reg(ctx->g, name);
|
||||
unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
|
||||
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
num += reg->offset;
|
||||
debug_assert(!(reg->flags & IR3_REG_RELATIV));
|
||||
|
||||
reg->num = num;
|
||||
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
|
||||
|
|
@ -868,9 +955,9 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
|
||||
foreach_src_n(reg, n, instr) {
|
||||
struct ir3_instruction *src = reg->instr;
|
||||
if (!src)
|
||||
/* Note: reg->instr could be null for IR3_REG_ARRAY */
|
||||
if (!(src || (reg->flags & IR3_REG_ARRAY)))
|
||||
continue;
|
||||
|
||||
reg_assign(ctx, instr->regs[n+1], src);
|
||||
if (instr->regs[n+1]->flags & IR3_REG_HALF)
|
||||
fixup_half_instr_src(instr);
|
||||
|
|
@ -881,6 +968,8 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
static int
|
||||
ra_alloc(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
unsigned n = 0;
|
||||
|
||||
/* frag shader inputs get pre-assigned, since we have some
|
||||
* constraints/unknowns about setup for some of these regs:
|
||||
*/
|
||||
|
|
@ -898,7 +987,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
|
|||
i += 4;
|
||||
}
|
||||
|
||||
for (j = 0; i < ir->ninputs; i++) {
|
||||
j = 0;
|
||||
for (; i < ir->ninputs; i++) {
|
||||
struct ir3_instruction *instr = ir->inputs[i];
|
||||
if (instr) {
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
|
|
@ -914,6 +1004,24 @@ ra_alloc(struct ir3_ra_ctx *ctx)
|
|||
}
|
||||
}
|
||||
}
|
||||
n = j;
|
||||
}
|
||||
|
||||
/* pre-assign array elements:
|
||||
* TODO we could be a bit more clever if we knew which arrays didn't
|
||||
* fully (partially?) conflict with each other..
|
||||
*/
|
||||
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
|
||||
unsigned i;
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
unsigned name, reg;
|
||||
|
||||
name = arr->base + i;
|
||||
reg = ctx->set->gpr_to_ra_reg[0][n++];
|
||||
|
||||
ra_set_node_reg(ctx->g, name, reg);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (!ra_allocate(ctx->g))
|
||||
|
|
|
|||
|
|
@ -187,6 +187,9 @@ delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
|
|||
|
||||
foreach_ssa_src_n(src, i, instr) {
|
||||
unsigned d;
|
||||
/* for array writes, no need to delay on previous write: */
|
||||
if (i == 0)
|
||||
continue;
|
||||
if (src->block != instr->block)
|
||||
continue;
|
||||
d = delay_calc_srcn(ctx, src, instr, i);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue