freedreno/ir3: array rework

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2016-01-10 14:10:08 -05:00
parent cc7ed34df9
commit fad158a0e0
9 changed files with 365 additions and 363 deletions

View file

@ -400,9 +400,16 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
/* Technically this should be the same as for TEMP/CONST, since
* everything is just normal registers. This is just temporary
* hack until load_input/store_output handle arrays in a similar
* way as load_var/store_var..
*/
return 0;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
/* a2xx compiler doesn't handle indirect: */
return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:

View file

@ -81,6 +81,7 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler,
shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
list_inithead(&shader->block_list);
list_inithead(&shader->array_list);
return shader;
}
@ -121,18 +122,19 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
val.iim_val = reg->iim_val;
} else {
unsigned components;
int16_t max;
if (reg->flags & IR3_REG_RELATIV) {
components = reg->size;
val.dummy10 = reg->offset;
val.dummy10 = reg->array.offset;
max = (reg->array.offset + repeat + components - 1) >> 2;
} else {
components = util_last_bit(reg->wrmask);
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
max = (reg->num + repeat + components - 1) >> 2;
}
int16_t max = (reg->num + repeat + components - 1) >> 2;
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max);
} else if (val.num == 63) {
@ -233,7 +235,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
iassert((instr->regs_count == 2) || (instr->regs_count == 3));
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->num < (1 << 10));
iassert(src1->array.offset < (1 << 10));
cat2->rel1.src1 = reg(src1, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -260,7 +262,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
!((src1->flags ^ src2->flags) & IR3_REG_HALF));
if (src2->flags & IR3_REG_RELATIV) {
iassert(src2->num < (1 << 10));
iassert(src2->array.offset < (1 << 10));
cat2->rel2.src2 = reg(src2, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -333,7 +335,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->num < (1 << 10));
iassert(src1->array.offset < (1 << 10));
cat3->rel1.src1 = reg(src1, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -361,7 +363,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
if (src3->flags & IR3_REG_RELATIV) {
iassert(src3->num < (1 << 10));
iassert(src3->array.offset < (1 << 10));
cat3->rel2.src3 = reg(src3, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -404,7 +406,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
iassert(instr->regs_count == 2);
if (src->flags & IR3_REG_RELATIV) {
iassert(src->num < (1 << 10));
iassert(src->array.offset < (1 << 10));
cat4->rel.src = reg(src, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
@ -737,6 +739,14 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
return reg;
}
struct ir3_register * ir3_reg_clone(struct ir3 *shader,
struct ir3_register *reg)
{
struct ir3_register *new_reg = reg_create(shader, 0, 0);
*new_reg = *reg;
return new_reg;
}
void
ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr)
@ -777,3 +787,12 @@ ir3_count_instructions(struct ir3 *ir)
}
return cnt;
}
struct ir3_array *
ir3_lookup_array(struct ir3 *ir, unsigned id)
{
list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
if (arr->id == id)
return arr;
return NULL;
}

View file

@ -83,7 +83,8 @@ struct ir3_register {
* before register assignment is done:
*/
IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
IR3_REG_PHI_SRC= 0x4000, /* phi src, regs[0]->instr points to phi */
IR3_REG_ARRAY = 0x4000,
IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */
} flags;
union {
@ -97,11 +98,18 @@ struct ir3_register {
uint32_t uim_val;
float fim_val;
/* relative: */
int offset;
struct {
uint16_t id;
uint16_t offset;
} array;
};
/* for IR3_REG_SSA, src registers contain ptr back to
* assigning instruction.
/* For IR3_REG_SSA, src registers contain ptr back to assigning
* instruction.
*
* For IR3_REG_ARRAY, the pointer is back to the last dependent
* array access (although the net effect is the same, it points
* back to a previous instruction that we depend on).
*/
struct ir3_instruction *instr;
@ -221,9 +229,6 @@ struct ir3_instruction {
struct {
int off; /* component/offset */
} fo;
struct {
int aid;
} fi;
struct {
/* used to temporarily hold reference to nir_phi_instr
* until we resolve the phi srcs
@ -293,19 +298,6 @@ struct ir3_instruction {
*/
struct ir3_instruction *address;
/* in case of a instruction with relative dst instruction, we need to
* capture the dependency on the fanin for the previous values of
* the array elements. Since we don't know at compile time actually
* which array elements are written, this serves to preserve the
* unconditional write to array elements prior to the conditional
* write.
*
* TODO only cat1 can do indirect write.. we could maybe move this
* into instr->cat1.fanin (but would require the frontend to insert
* the extra mov)
*/
struct ir3_instruction *fanin;
/* Entry in ir3_block's instruction list: */
struct list_head node;
@ -379,10 +371,39 @@ struct ir3 {
/* List of blocks: */
struct list_head block_list;
/* List of ir3_array's: */
struct list_head array_list;
unsigned heap_idx;
struct ir3_heap_chunk *chunk;
};
typedef struct nir_variable nir_variable;
struct ir3_array {
struct list_head node;
unsigned length;
unsigned id;
nir_variable *var;
/* We track the last write and last access (read or write) to
* setup dependencies on instructions that read or write the
* array. Reads can be re-ordered wrt. other reads, but should
* not be re-ordered wrt. to writes. Writes cannot be reordered
* wrt. any other access to the array.
*
* So array reads depend on last write, and array writes depend
* on the last access.
*/
struct ir3_instruction *last_write, *last_access;
/* extra stuff used in RA pass: */
unsigned base;
};
struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
typedef struct nir_block nir_block;
struct ir3_block {
@ -430,6 +451,8 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
struct ir3_register * ir3_reg_clone(struct ir3 *shader,
struct ir3_register *reg);
void ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr);
@ -510,6 +533,9 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
if (dst->num == regid(REG_A0, 0))
return false;
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
return false;
if ((instr->category == 1) &&
(instr->cat1.src_type == instr->cat1.dst_type))
return true;
@ -623,8 +649,10 @@ static inline bool writes_pred(struct ir3_instruction *instr)
/* TODO better name */
static inline struct ir3_instruction *ssa(struct ir3_register *reg)
{
if (reg->flags & IR3_REG_SSA)
if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
return reg->instr;
}
return NULL;
}
@ -813,8 +841,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
{
if (instr->fanin)
return instr->regs_count + 2;
if (instr->address)
return instr->regs_count + 1;
return instr->regs_count;
@ -822,8 +848,6 @@ static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
{
if (n == (instr->regs_count + 1))
return instr->fanin;
if (n == (instr->regs_count + 0))
return instr->address;
return ssa(instr->regs[n]);
@ -834,8 +858,8 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
/* iterator for an instruction's SSA sources (instr), also returns src #: */
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
if ((__instr)->regs_count) \
for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
if ((__srcinst = __ssa_src_n(__instr, __n)))
/* iterator for an instruction's SSA sources (instr): */
#define foreach_ssa_src(__srcinst, __instr) \
@ -878,7 +902,15 @@ ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
struct ir3_instruction *instr =
ir3_instr_create(block, 1, 0);
ir3_reg_create(instr, 0, 0); /* dst */
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
if (src->regs[0]->flags & IR3_REG_ARRAY) {
struct ir3_register *src_reg =
ir3_reg_create(instr, 0, IR3_REG_ARRAY);
src_reg->array = src->regs[0]->array;
src_reg->instr = src;
} else {
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
}
debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
instr->cat1.src_type = type;
instr->cat1.dst_type = type;
return instr;
@ -894,6 +926,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
instr->cat1.src_type = src_type;
instr->cat1.dst_type = dst_type;
debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
return instr;
}
@ -1083,7 +1116,7 @@ typedef uint8_t regmask_t[2 * MAX_REG / 8];
static inline unsigned regmask_idx(struct ir3_register *reg)
{
unsigned num = reg->num;
unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
debug_assert(num < MAX_REG);
if (reg->flags & IR3_REG_HALF)
num += MAX_REG;

View file

@ -74,8 +74,6 @@ struct ir3_compile {
/* mapping from nir_register to defining instruction: */
struct hash_table *def_ht;
/* mapping from nir_variable to ir3_array: */
struct hash_table *var_ht;
unsigned num_arrays;
/* a common pattern for indirect addressing is to request the
@ -142,8 +140,6 @@ compile_init(struct ir3_compiler *compiler,
ctx->so = so;
ctx->def_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->var_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
@ -220,206 +216,26 @@ compile_free(struct ir3_compile *ctx)
ralloc_free(ctx);
}
/* global per-array information: */
struct ir3_array {
unsigned length, aid;
};
/* per-block array state: */
struct ir3_array_value {
/* TODO drop length/aid, and just have ptr back to ir3_array */
unsigned length, aid;
/* initial array element values are phi's, other than for the
* entry block. The phi src's get added later in a resolve step
* after we have visited all the blocks, to account for back
* edges in the cfg.
*/
struct ir3_instruction **phis;
/* current array element values (as block is processed). When
* the array phi's are resolved, it will contain the array state
* at exit of block, so successor blocks can use it to add their
* phi srcs.
*/
struct ir3_instruction *arr[];
};
/* track array assignments per basic block. When an array is read
* outside of the same basic block, we can use NIR's dominance-frontier
* information to figure out where phi nodes are needed.
*/
struct ir3_nir_block_data {
unsigned foo;
/* indexed by array-id (aid): */
struct ir3_array_value *arrs[];
};
static struct ir3_nir_block_data *
get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
{
if (!block->data) {
struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
block->data = bd;
}
return block->data;
}
static void
declare_var(struct ir3_compile *ctx, nir_variable *var)
{
unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */
struct ir3_array *arr = ralloc(ctx, struct ir3_array);
arr->id = ++ctx->num_arrays;
arr->length = length;
arr->aid = ++ctx->num_arrays;
_mesa_hash_table_insert(ctx->var_ht, var, arr);
arr->var = var;
list_addtail(&arr->node, &ctx->ir->array_list);
}
static nir_block *
nir_block_pred(nir_block *block)
{
assert(block->predecessors->entries < 2);
if (block->predecessors->entries == 0)
return NULL;
return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
}
static struct ir3_array_value *
static struct ir3_array *
get_var(struct ir3_compile *ctx, nir_variable *var)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
struct ir3_block *block = ctx->block;
struct ir3_nir_block_data *bd = get_block_data(ctx, block);
struct ir3_array *arr = entry->data;
if (!bd->arrs[arr->aid]) {
struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
(arr->length * sizeof(av->arr[0])));
struct ir3_array_value *defn = NULL;
nir_block *pred_block;
av->length = arr->length;
av->aid = arr->aid;
/* For loops, we have to consider that we have not visited some
* of the blocks who should feed into the phi (ie. back-edges in
* the cfg).. for example:
*
* loop {
* block { load_var; ... }
* if then block {} else block {}
* block { store_var; ... }
* if then block {} else block {}
* block {...}
* }
*
* We can skip the phi if we can chase the block predecessors
* until finding the block previously defining the array without
* crossing a block that has more than one predecessor.
*
* Otherwise create phi's and resolve them as a post-pass after
* all the blocks have been visited (to handle back-edges).
*/
for (pred_block = block->nblock;
pred_block && (pred_block->predecessors->entries < 2) && !defn;
pred_block = nir_block_pred(pred_block)) {
struct ir3_block *pblock = get_block(ctx, pred_block);
struct ir3_nir_block_data *pbd = pblock->data;
if (!pbd)
continue;
defn = pbd->arrs[arr->aid];
}
if (defn) {
/* only one possible definer: */
for (unsigned i = 0; i < arr->length; i++)
av->arr[i] = defn->arr[i];
} else if (pred_block) {
/* not the first block, and multiple potential definers: */
av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
for (unsigned i = 0; i < arr->length; i++) {
struct ir3_instruction *phi;
phi = ir3_instr_create2(block, -1, OPC_META_PHI,
1 + ctx->impl->num_blocks);
ir3_reg_create(phi, 0, 0); /* dst */
/* phi's should go at head of block: */
list_delinit(&phi->node);
list_add(&phi->node, &block->instr_list);
av->phis[i] = av->arr[i] = phi;
}
} else {
/* Some shaders end up reading array elements without
* first writing.. so initialize things to prevent null
* instr ptrs later:
*/
for (unsigned i = 0; i < arr->length; i++)
av->arr[i] = create_immed(block, 0);
}
bd->arrs[arr->aid] = av;
}
return bd->arrs[arr->aid];
}
static void
add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
struct ir3_array_value *av, BITSET_WORD *visited)
{
struct ir3_block *block;
struct ir3_nir_block_data *bd;
if (BITSET_TEST(visited, nblock->index))
return;
BITSET_SET(visited, nblock->index);
block = get_block(ctx, nblock);
bd = block->data;
if (bd && bd->arrs[av->aid]) {
struct ir3_array_value *dav = bd->arrs[av->aid];
for (unsigned i = 0; i < av->length; i++) {
ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
dav->arr[i];
}
} else {
/* didn't find defn, recurse predecessors: */
struct set_entry *entry;
set_foreach(nblock->predecessors, entry) {
add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
}
}
}
static void
resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
{
struct ir3_nir_block_data *bd = block->data;
unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
if (!bd)
return;
/* TODO use nir dom_frontier to help us with this? */
for (unsigned i = 1; i <= ctx->num_arrays; i++) {
struct ir3_array_value *av = bd->arrs[i];
BITSET_WORD visited[bitset_words];
struct set_entry *entry;
if (!(av && av->phis))
continue;
memset(visited, 0, sizeof(visited));
set_foreach(block->nblock->predecessors, entry) {
add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
}
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
if (arr->var == var)
return arr;
}
compile_error(ctx, "bogus var: %s\n", var->name);
return NULL;
}
/* allocate a n element value array (to be populated by caller) and
@ -437,6 +253,7 @@ __get_dst(struct ir3_compile *ctx, void *key, unsigned n)
static struct ir3_instruction **
get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
{
compile_assert(ctx, dst->is_ssa);
if (dst->is_ssa) {
return __get_dst(ctx, &dst->ssa, n);
} else {
@ -454,6 +271,7 @@ static struct ir3_instruction **
get_src(struct ir3_compile *ctx, nir_src *src)
{
struct hash_entry *entry;
compile_assert(ctx, src->is_ssa);
if (src->is_ssa) {
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
} else {
@ -568,7 +386,7 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
ir3_instr_set_address(mov, address);
@ -607,17 +425,45 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
src->instr = collect;
src->size = arrsz;
src->offset = n;
src->array.offset = n;
ir3_instr_set_address(mov, address);
return mov;
}
/* relative (indirect) if address!=NULL */
static struct ir3_instruction *
create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
struct ir3_instruction *src, struct ir3_instruction *address,
struct ir3_instruction *collect)
create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
struct ir3_instruction *address)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *mov;
struct ir3_register *src;
mov = ir3_instr_create(block, 1, 0);
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
COND(address, IR3_REG_RELATIV));
src->instr = arr->last_write;
src->size = arr->length;
src->array.id = arr->id;
src->array.offset = n;
if (address)
ir3_instr_set_address(mov, address);
arr->last_access = mov;
return mov;
}
/* relative (indirect) if address!=NULL */
static struct ir3_instruction *
create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
struct ir3_instruction *src, struct ir3_instruction *address)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *mov;
@ -626,14 +472,18 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
mov = ir3_instr_create(block, 1, 0);
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV);
dst->size = arrsz;
dst->offset = n;
dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
COND(address, IR3_REG_RELATIV));
dst->instr = arr->last_access;
dst->size = arr->length;
dst->array.id = arr->id;
dst->array.offset = n;
ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
mov->fanin = collect;
ir3_instr_set_address(mov, address);
arr->last_write = arr->last_access = mov;
return mov;
}
@ -1198,7 +1048,7 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
struct ir3_array_value *arr = get_var(ctx, dvar->var);
struct ir3_array *arr = get_var(ctx, dvar->var);
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@ -1209,19 +1059,17 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
for (int i = 0; i < intr->num_components; i++) {
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
dst[i] = arr->arr[n];
dst[i] = create_var_load(ctx, arr, n, NULL);
}
break;
case nir_deref_array_type_indirect: {
/* for indirect, we need to collect all the array elements: */
struct ir3_instruction *collect =
create_collect(ctx->block, arr->arr, arr->length);
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect);
dst[i] = create_var_load(ctx, arr, n, addr);
}
break;
}
@ -1238,8 +1086,9 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
struct ir3_array_value *arr = get_var(ctx, dvar->var);
struct ir3_instruction **src;
struct ir3_array *arr = get_var(ctx, dvar->var);
struct ir3_instruction *addr, **src;
unsigned wrmask = intr->const_index[0];
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@ -1248,66 +1097,24 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
switch (darr->deref_array_type) {
case nir_deref_array_type_direct:
/* direct access does not require anything special: */
for (int i = 0; i < intr->num_components; i++) {
/* ttn doesn't generate partial writemasks */
assert(intr->const_index[0] ==
(1 << intr->num_components) - 1);
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
arr->arr[n] = src[i];
}
addr = NULL;
break;
case nir_deref_array_type_indirect: {
/* for indirect, create indirect-store and fan that out: */
struct ir3_instruction *collect =
create_collect(ctx->block, arr->arr, arr->length);
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
/* ttn doesn't generate partial writemasks */
assert(intr->const_index[0] ==
(1 << intr->num_components) - 1);
struct ir3_instruction *store;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
store = create_indirect_store(ctx, arr->length,
n, src[i], addr, collect);
store->fanin->fi.aid = arr->aid;
/* TODO: probably split this out to be used for
* store_output_indirect? or move this into
* create_indirect_store()?
*/
for (int j = i; j < arr->length; j += intr->num_components) {
struct ir3_instruction *split;
split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
split->fo.off = j;
ir3_reg_create(split, 0, 0);
ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store;
arr->arr[j] = split;
}
}
/* fixup fanout/split neighbors: */
for (int i = 0; i < arr->length; i++) {
arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
arr->arr[i+1] : NULL;
arr->arr[i]->cp.left = (i > 0) ?
arr->arr[i-1] : NULL;
}
case nir_deref_array_type_indirect:
addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
break;
}
default:
compile_error(ctx, "Unhandled store deref type: %u\n",
darr->deref_array_type);
break;
}
for (int i = 0; i < intr->num_components; i++) {
if (!(wrmask & (1 << i)))
continue;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
create_var_store(ctx, arr, n, src[i], addr);
}
}
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
@ -1835,8 +1642,6 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
}
}
resolve_array_phis(ctx, block);
}
static void

View file

@ -202,6 +202,7 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
*dstflags |= srcflags & IR3_REG_CONST;
*dstflags |= srcflags & IR3_REG_IMMED;
*dstflags |= srcflags & IR3_REG_RELATIV;
*dstflags |= srcflags & IR3_REG_ARRAY;
}
/* the "plain" MAD's (ie. the ones that don't shift first src prior to
@ -233,6 +234,10 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
combine_flags(&new_flags, src_reg->flags);
if (valid_flags(instr, n, new_flags)) {
if (new_flags & IR3_REG_ARRAY) {
debug_assert(!(reg->flags & IR3_REG_ARRAY));
reg->array = src_reg->array;
}
reg->flags = new_flags;
reg->instr = ssa(src_reg);
}
@ -283,6 +288,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
conflicts(instr->address, reg->instr->address))
return;
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
@ -294,6 +300,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
if ((src_reg->flags & IR3_REG_RELATIV) &&
!conflicts(instr->address, reg->instr->address)) {
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
ir3_instr_set_address(instr, reg->instr->address);
@ -329,6 +336,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
/* other than category 1 (mov) we can only encode up to 10 bits: */
if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
@ -349,9 +357,11 @@ eliminate_output_mov(struct ir3_instruction *instr)
{
if (is_eligible_mov(instr, false)) {
struct ir3_register *reg = instr->regs[1];
struct ir3_instruction *src_instr = ssa(reg);
debug_assert(src_instr);
return src_instr;
if (!(reg->flags & IR3_REG_ARRAY)) {
struct ir3_instruction *src_instr = ssa(reg);
debug_assert(src_instr);
return src_instr;
}
}
return instr;
}
@ -379,9 +389,22 @@ instr_cp(struct ir3_instruction *instr)
continue;
instr_cp(src);
/* TODO non-indirect access we could figure out which register
* we actually want and allow cp..
*/
if (reg->flags & IR3_REG_ARRAY)
continue;
reg_cp(instr, reg, n);
}
if (instr->regs[0]->flags & IR3_REG_ARRAY) {
struct ir3_instruction *src = ssa(instr->regs[0]);
if (src)
instr_cp(src);
}
if (instr->address) {
instr_cp(instr->address);
ir3_instr_set_address(instr, eliminate_output_mov(instr->address));

View file

@ -118,6 +118,10 @@ ir3_instr_depth(struct ir3_instruction *instr)
/* visit child to compute it's depth: */
ir3_instr_depth(src);
/* for array writes, no need to delay on previous write: */
if (i == 0)
continue;
sd = ir3_delayslots(src, instr, i) + src->depth;
instr->depth = MAX2(instr->depth, sd);

View file

@ -94,7 +94,7 @@ static void print_instr_name(struct ir3_instruction *instr)
}
}
static void print_reg_name(struct ir3_register *reg, bool followssa)
static void print_reg_name(struct ir3_register *reg)
{
if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
@ -106,20 +106,29 @@ static void print_reg_name(struct ir3_register *reg, bool followssa)
if (reg->flags & IR3_REG_IMMED) {
printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
} else if (reg->flags & IR3_REG_SSA) {
printf("_");
if (followssa) {
printf("[");
} else if (reg->flags & IR3_REG_ARRAY) {
printf("arr[id=%u, offset=%u, size=%u", reg->array.id,
reg->array.offset, reg->size);
/* for ARRAY we could have null src, for example first write
* instruction..
*/
if (reg->instr) {
printf(", _[");
print_instr_name(reg->instr);
printf("]");
}
printf("]");
} else if (reg->flags & IR3_REG_SSA) {
printf("_[");
print_instr_name(reg->instr);
printf("]");
} else if (reg->flags & IR3_REG_RELATIV) {
if (reg->flags & IR3_REG_HALF)
printf("h");
if (reg->flags & IR3_REG_CONST)
printf("c<a0.x + %u>", reg->num);
printf("c<a0.x + %u>", reg->array.offset);
else
printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->array.offset, reg->size);
} else {
if (reg->flags & IR3_REG_HALF)
printf("h");
@ -158,7 +167,7 @@ print_instr(struct ir3_instruction *instr, int lvl)
for (i = 0; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
printf(i ? ", " : " ");
print_reg_name(reg, !!i);
print_reg_name(reg);
}
if (instr->address) {
@ -168,13 +177,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
printf("]");
}
if (instr->fanin) {
printf(", fanin=_");
printf("[");
print_instr_name(instr->fanin);
printf("]");
}
if (instr->cp.left) {
printf(", left=_");
printf("[");
@ -192,8 +194,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
if (is_meta(instr)) {
if (instr->opc == OPC_META_FO) {
printf(", off=%d", instr->fo.off);
} else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
printf(", aid=%d", instr->fi.aid);
}
}

View file

@ -68,25 +68,24 @@
* LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
* register assignment. But for us that is horrible from a scheduling
* standpoint. Instead what we do is use idea of 'definer' instruction.
* Ie. the first instruction (lowest ip) to write to the array is the
* Ie. the first instruction (lowest ip) to write to the variable is the
* one we consider from use/def perspective when building interference
* graph. (Other instructions which write other array elements just
* define the variable some more.)
* graph. (Other instructions which write other variable components
* just define the variable some more.)
*
* Arrays of arbitrary size are handled via pre-coloring a consecutive
* sequence of registers. Additional scalar (single component) reg
* names are allocated starting at ctx->class_base[total_class_count]
* (see arr->base), which are pre-colored. In the use/def graph direct
* access is treated as a single element use/def, and indirect access
* is treated as use or def of all array elements. (Only the first
* def is tracked, in case of multiple indirect writes, etc.)
*/
static const unsigned class_sizes[] = {
1, 2, 3, 4,
4 + 4, /* txd + 1d/2d */
4 + 6, /* txd + 3d */
/* temporary: until we can assign arrays, create classes so we
* can round up array to fit. NOTE with tgsi arrays should
* really all be multiples of four:
*/
4 * 4,
4 * 8,
4 * 16,
4 * 32,
};
#define class_count ARRAY_SIZE(class_sizes)
@ -265,8 +264,9 @@ struct ir3_ra_ctx {
struct ir3_ra_reg_set *set;
struct ra_graph *g;
unsigned alloc_count;
unsigned class_alloc_count[total_class_count];
unsigned class_base[total_class_count];
/* one per class, plus one slot for arrays: */
unsigned class_alloc_count[total_class_count + 1];
unsigned class_base[total_class_count + 1];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
struct ir3_ra_instr_data *instrd;
@ -329,9 +329,6 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
if (instr->fanin)
return get_definer(ctx, instr->fanin, sz, off);
if (id->defn) {
*sz = id->sz;
*off = id->off;
@ -485,10 +482,13 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
/* couple special cases: */
if (writes_addr(instr) || writes_pred(instr)) {
id->cls = -1;
continue;
} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
id->cls = total_class_count;
id->defn = instr;
} else {
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
id->cls = size_to_class(id->sz, is_half(id->defn));
}
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
id->cls = size_to_class(id->sz, is_half(id->defn));
}
}
@ -518,8 +518,6 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*
* TODO but we still need to allocate names for them, don't we??
*/
if (id->cls >= 0) {
instr->name = ctx->class_alloc_count[id->cls]++;
@ -531,7 +529,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
unsigned n;
unsigned n, base;
ir3_clear_mark(ctx->ir);
n = ir3_count_instructions(ctx->ir);
@ -550,11 +548,20 @@ ra_init(struct ir3_ra_ctx *ctx)
* actual ra name is class_base[cls] + instr->name;
*/
ctx->class_base[0] = 0;
for (unsigned i = 1; i < total_class_count; i++) {
for (unsigned i = 1; i <= total_class_count; i++) {
ctx->class_base[i] = ctx->class_base[i-1] +
ctx->class_alloc_count[i-1];
}
/* and vreg names for array elements: */
base = ctx->class_base[total_class_count];
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
arr->base = base;
ctx->class_alloc_count[total_class_count] += arr->length;
base += arr->length;
}
ctx->alloc_count += ctx->class_alloc_count[total_class_count];
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@ -566,6 +573,7 @@ __ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
{
unsigned name;
debug_assert(cls >= 0);
debug_assert(cls < total_class_count); /* we shouldn't get arrays here.. */
name = ctx->class_base[cls] + defn->name;
debug_assert(name < ctx->alloc_count);
return name;
@ -590,6 +598,22 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
struct ir3_ra_block_data *bd;
unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
void def(unsigned name, struct ir3_instruction *instr)
{
/* defined on first write: */
if (!ctx->def[name])
ctx->def[name] = instr->ip;
ctx->use[name] = instr->ip;
BITSET_SET(bd->def, name);
}
void use(unsigned name, struct ir3_instruction *instr)
{
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
}
bd = rzalloc(ctx->g, struct ir3_ra_block_data);
bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words);
@ -601,6 +625,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
struct ir3_instruction *src;
struct ir3_register *reg;
if (instr->regs_count == 0)
continue;
@ -632,17 +657,45 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (writes_gpr(instr)) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_register *dst = instr->regs[0];
if (id->defn == instr) {
if (dst->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, dst->array.id);
unsigned i;
debug_assert(!(dst->flags & IR3_REG_PHI_SRC));
/* set the node class now.. in case we don't encounter
* this array dst again. From register_alloc algo's
* perspective, these are all single/scalar regs:
*/
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
}
/* indirect write is treated like a write to all array
* elements, since we don't know which one is actually
* written:
*/
if (dst->flags & IR3_REG_RELATIV) {
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
def(name, instr);
}
} else {
unsigned name = arr->base + dst->array.offset;
def(name, instr);
}
} else if (id->defn == instr) {
unsigned name = ra_name(ctx, id);
ctx->def[name] = id->defn->ip;
ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
def(name, id->defn);
if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
@ -672,12 +725,28 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
}
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
foreach_src(reg, instr) {
if (reg->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, reg->array.id);
/* indirect read is treated like a read fromall array
* elements, since we don't know which one is actually
* read:
*/
if (reg->flags & IR3_REG_RELATIV) {
unsigned i;
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
use(name, instr);
}
} else {
unsigned name = arr->base + reg->array.offset;
use(name, instr);
debug_assert(reg->array.offset < arr->length);
}
} else if ((src = ssa(reg)) && writes_gpr(src)) {
unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
use(name, instr);
}
}
}
@ -830,18 +899,36 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
}
}
/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
* array access(es) which do not have any previous access to depend
* on from scheduling point of view
*/
static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
if (id->defn) {
struct ir3_ra_instr_data *id;
if (reg->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, reg->array.id);
unsigned name = arr->base + reg->array.offset;
unsigned r = ra_get_node_reg(ctx->g, name);
unsigned num = ctx->set->ra_reg_to_gpr[r];
if (reg->flags & IR3_REG_RELATIV) {
reg->array.offset = num;
} else {
reg->num = num;
}
reg->flags &= ~IR3_REG_ARRAY;
} else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
unsigned name = ra_name(ctx, id);
unsigned r = ra_get_node_reg(ctx->g, name);
unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
debug_assert(!(reg->flags & IR3_REG_RELATIV));
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
@ -868,9 +955,9 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_src_n(reg, n, instr) {
struct ir3_instruction *src = reg->instr;
if (!src)
/* Note: reg->instr could be null for IR3_REG_ARRAY */
if (!(src || (reg->flags & IR3_REG_ARRAY)))
continue;
reg_assign(ctx, instr->regs[n+1], src);
if (instr->regs[n+1]->flags & IR3_REG_HALF)
fixup_half_instr_src(instr);
@ -881,6 +968,8 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static int
ra_alloc(struct ir3_ra_ctx *ctx)
{
unsigned n = 0;
/* frag shader inputs get pre-assigned, since we have some
* constraints/unknowns about setup for some of these regs:
*/
@ -898,7 +987,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
i += 4;
}
for (j = 0; i < ir->ninputs; i++) {
j = 0;
for (; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
@ -914,6 +1004,24 @@ ra_alloc(struct ir3_ra_ctx *ctx)
}
}
}
n = j;
}
/* pre-assign array elements:
* TODO we could be a bit more clever if we knew which arrays didn't
* fully (partially?) conflict with each other..
*/
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
unsigned i;
for (i = 0; i < arr->length; i++) {
unsigned name, reg;
name = arr->base + i;
reg = ctx->set->gpr_to_ra_reg[0][n++];
ra_set_node_reg(ctx->g, name, reg);
}
}
if (!ra_allocate(ctx->g))

View file

@ -187,6 +187,9 @@ delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
foreach_ssa_src_n(src, i, instr) {
unsigned d;
/* for array writes, no need to delay on previous write: */
if (i == 0)
continue;
if (src->block != instr->block)
continue;
d = delay_calc_srcn(ctx, src, instr, i);