freedreno/a3xx/compiler: new compiler

The new compiler generates a dependency graph of instructions, including
a few meta-instructions to handle PHI and preserve some extra
information needed for register assignment, etc.

The depth pass assigned a weight/depth to each node (based on sum of
instruction cycles of a given node and all it's dependent nodes), which
is used to schedule instructions.  The scheduling takes into account the
minimum number of cycles/slots between dependent instructions, etc.
Which was something that could not be handled properly with the original
compiler (which was more of a naive TGSI translator than an actual
compiler).

The register assignment is currently split out as a standalone pass.  I
expect that it will be replaced at some point, once I figure out what to
do about relative addressing (which is currently the only thing that
should cause fallback to old compiler).

There are a couple new debug options for FD_MESA_DEBUG env var:

  optmsgs - enable debug prints in optimizer
  optdump - dump instruction graph in .dot format, for example:

http://people.freedesktop.org/~robclark/a3xx/frag-0000.dot.png
http://people.freedesktop.org/~robclark/a3xx/frag-0000.dot

At this point, thanks to proper handling of instruction scheduling, the
new compiler fixes a lot of things that were broken before, and does not
appear to break anything that was working before[1].  So even though it
is not finished, it seems useful to merge it in it's current state.

[1] Not merged in this commit, because I'm not sure if it really belongs
in mesa tree, but the following commit implements a simple shader
emulator, which I've used to compare the output of the new compiler to
the original compiler (ie. run it on all the TGSI shaders dumped out via
ST_DEBUG=tgsi with various games/apps):

163b6306b1

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2014-01-29 17:18:49 -05:00
parent f0e2d7ab46
commit 554f1ac00c
17 changed files with 2778 additions and 210 deletions

View file

@ -43,4 +43,10 @@ a3xx_SOURCES := \
a3xx/fd3_util.c \
a3xx/fd3_zsa.c \
a3xx/disasm-a3xx.c \
a3xx/ir3_cp.c \
a3xx/ir3_depth.c \
a3xx/ir3_dump.c \
a3xx/ir3_flatten.c \
a3xx/ir3_ra.c \
a3xx/ir3_sched.c \
a3xx/ir3.c

View file

@ -735,6 +735,14 @@ struct opc_info {
#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
// XXX hack.. probably should move this table somewhere common:
#include "ir3.h"
const char *ir3_instr_name(struct ir3_instruction *instr)
{
if (instr->category == -1) return "??meta??";
return opcs[(instr->category << NOPC_BITS) | instr->opc].name;
}
static void print_instr(uint32_t *dwords, int level, int n)
{
instr_t *instr = (instr_t *)dwords;

File diff suppressed because it is too large Load diff

View file

@ -1161,23 +1161,6 @@ instr_cat2(const struct instr_translater *t,
put_dst(ctx, inst, dst);
}
static bool is_mad(opc_t opc)
{
switch (opc) {
case OPC_MAD_U16:
case OPC_MADSH_U16:
case OPC_MAD_S16:
case OPC_MADSH_M16:
case OPC_MAD_U24:
case OPC_MAD_S24:
case OPC_MAD_F16:
case OPC_MAD_F32:
return true;
default:
return false;
}
}
static void
instr_cat3(const struct instr_translater *t,
struct fd3_compile_context *ctx,

View file

@ -80,9 +80,9 @@ fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
{
unsigned i;
for (i = 0; i < so->inputs_count; i++)
so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
so->info.max_reg = MAX2(so->info.max_reg, (so->inputs[i].regid + 3) >> 2);
for (i = 0; i < so->outputs_count; i++)
so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2);
so->info.max_reg = MAX2(so->info.max_reg, (so->outputs[i].regid + 3) >> 2);
}
static struct fd3_shader_stateobj *

View file

@ -190,6 +190,22 @@ typedef enum {
OPC_LDC_4 = 30,
OPC_LDLV = 31,
/* meta instructions (category -1): */
/* placeholder instr to mark inputs/outputs: */
OPC_META_INPUT = 0,
OPC_META_OUTPUT = 1,
/* The "fan-in" and "fan-out" instructions are used for keeping
* track of instructions that write to multiple dst registers
* (fan-out) like texture sample instructions, or read multiple
* consecutive scalar registers (fan-in) (bary.f, texture samp)
*/
OPC_META_FO = 2,
OPC_META_FI = 3,
/* branches/flow control */
OPC_META_FLOW = 4,
OPC_META_PHI = 5,
} opc_t;
typedef enum {
@ -643,4 +659,21 @@ static inline uint32_t instr_opc(instr_t *instr)
}
}
static inline bool is_mad(opc_t opc)
{
switch (opc) {
case OPC_MAD_U16:
case OPC_MADSH_U16:
case OPC_MAD_S16:
case OPC_MADSH_M16:
case OPC_MAD_U24:
case OPC_MAD_S24:
case OPC_MAD_F16:
case OPC_MAD_F32:
return true;
default:
return false;
}
}
#endif /* INSTR_A3XX_H_ */

View file

@ -36,7 +36,7 @@
/* simple allocator to carve allocations out of an up-front allocated heap,
* so that we can free everything easily in one shot.
*/
static void * ir3_alloc(struct ir3_shader *shader, int sz)
void * ir3_alloc(struct ir3_shader *shader, int sz)
{
void *ptr = &shader->heap[shader->heap_idx];
shader->heap_idx += align(sz, 4);

View file

@ -65,6 +65,11 @@ struct ir3_register {
* that the shader needs no more input:
*/
IR3_REG_EI = 0x200,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */
IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */
} flags;
union {
/* normal registers:
@ -77,6 +82,10 @@ struct ir3_register {
float fim_val;
/* relative: */
int offset;
/* for IR3_REG_SSA, src registers contain ptr back to
* assigning instruction.
*/
struct ir3_instruction *instr;
};
/* used for cat5 instructions, but also for internal/IR level
@ -139,6 +148,10 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
IR3_INSTR_MARK = 0x1000,
} flags;
int repeat;
unsigned regs_count;
@ -171,7 +184,33 @@ struct ir3_instruction {
int offset;
int iim_val;
} cat6;
/* for meta-instructions, just used to hold extra data
* before instruction scheduling, etc
*/
struct {
int off; /* component/offset */
} fo;
struct {
struct ir3_block *if_block, *else_block;
} flow;
struct {
struct ir3_block *block;
} inout;
};
/* transient values used during various algorithms: */
union {
/* The instruction depth is the max dependency distance to output.
*
* You can also think of it as the "cost", if we did any sort of
* optimization for register footprint. Ie. a value that is just
* result of moving a const to a reg would have a low cost, so to
* it could make sense to duplicate the instruction at various
* points where the result is needed to reduce register footprint.
*/
unsigned depth;
};
struct ir3_instruction *next;
#ifdef DEBUG
uint32_t serialno;
#endif
@ -201,6 +240,7 @@ struct ir3_shader * ir3_shader_create(void);
void ir3_shader_destroy(struct ir3_shader *shader);
void * ir3_shader_assemble(struct ir3_shader *shader,
struct ir3_shader_info *info);
void * ir3_alloc(struct ir3_shader *shader, int sz);
struct ir3_block * ir3_block_create(struct ir3_shader *shader,
unsigned ntmp, unsigned nin, unsigned nout);
@ -208,11 +248,44 @@ struct ir3_block * ir3_block_create(struct ir3_shader *shader,
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
int category, opc_t opc);
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
{
if (instr->flags & IR3_INSTR_MARK)
return true; /* already visited */
instr->flags ^= IR3_INSTR_MARK;
return false;
}
static inline void ir3_shader_clear_mark(struct ir3_shader *shader)
{
/* TODO would be nice to drop the instruction array.. for
* new compiler, _clear_mark() is all we use it for, and
* we could probably manage a linked list instead..
*/
unsigned i;
for (i = 0; i < shader->instrs_count; i++) {
struct ir3_instruction *instr = shader->instrs[i];
instr->flags &= ~IR3_INSTR_MARK;
}
}
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
{
unsigned i;
for (i = 0; i < instr->regs_count; i++)
if (reg == instr->regs[i])
return i;
return -1;
}
/* comp:
* 0 - x
* 1 - y
@ -254,6 +327,15 @@ static inline bool is_input(struct ir3_instruction *instr)
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
}
static inline bool is_meta(struct ir3_instruction *instr)
{
/* TODO how should we count PHI (and maybe fan-in/out) which
* might actually contribute some instructions to the final
* result?
*/
return (instr->category == -1);
}
static inline bool is_gpr(struct ir3_register *reg)
{
return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED));
@ -262,13 +344,39 @@ static inline bool is_gpr(struct ir3_register *reg)
/* TODO combine is_gpr()/reg_gpr().. */
static inline bool reg_gpr(struct ir3_register *r)
{
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV))
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_SSA))
return false;
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
return false;
return true;
}
/* dump: */
#include <stdio.h>
void ir3_shader_dump(struct ir3_shader *shader, const char *name,
struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3_shader? */,
FILE *f);
void ir3_dump_instr_single(struct ir3_instruction *instr);
void ir3_dump_instr_list(struct ir3_instruction *instr);
/* flatten if/else: */
int ir3_block_flatten(struct ir3_block *block);
/* depth calculation: */
int ir3_delayslots(struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned n);
void ir3_block_depth(struct ir3_block *block);
/* copy-propagate: */
void ir3_block_cp(struct ir3_block *block);
/* scheduling: */
void ir3_block_sched(struct ir3_block *block);
/* register assignment: */
int ir3_block_ra(struct ir3_block *block, enum shader_t type);
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif

View file

@ -0,0 +1,155 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "ir3.h"
/*
* Copy Propagate:
*
* TODO probably want some sort of visitor sort of interface to
* avoid duplicating the same graph traversal logic everywhere..
*
*/
static void block_cp(struct ir3_block *block);
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep);
static bool is_eligible_mov(struct ir3_instruction *instr)
{
if ((instr->category == 1) &&
(instr->cat1.src_type == instr->cat1.dst_type)) {
struct ir3_register *src = instr->regs[1];
if ((src->flags & IR3_REG_SSA) &&
/* TODO: propagate abs/neg modifiers if possible */
!(src->flags & (IR3_REG_ABS | IR3_REG_NEGATE)))
return true;
}
return false;
}
static void walk_children(struct ir3_instruction *instr, bool keep)
{
unsigned i;
/* walk down the graph from each src: */
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *src = instr->regs[i];
if (src->flags & IR3_REG_SSA)
src->instr = instr_cp(src->instr, keep);
}
}
static struct ir3_instruction *
instr_cp_fanin(struct ir3_instruction *instr)
{
unsigned i;
/* we need to handle fanin specially, to detect cases
* when we need to keep a mov
*/
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *src = instr->regs[i];
if (src->flags & IR3_REG_SSA) {
struct ir3_instruction *cand =
instr_cp(src->instr, false);
/* if the candidate is a fanout, then keep
* the move.
*
* This is a bit, um, fragile, but it should
* catch the extra mov's that the front-end
* puts in for us already in these cases.
*/
if (is_meta(cand) && (cand->opc == OPC_META_FO))
cand = instr_cp(src->instr, true);
src->instr = cand;
}
}
walk_children(instr, false);
return instr;
}
static struct ir3_instruction *
instr_cp(struct ir3_instruction *instr, bool keep)
{
/* if we've already visited this instruction, bail now: */
if (ir3_instr_check_mark(instr))
return instr;
if (is_meta(instr) && (instr->opc == OPC_META_FI))
return instr_cp_fanin(instr);
if (is_eligible_mov(instr) && !keep) {
struct ir3_register *src = instr->regs[1];
return instr_cp(src->instr, false);
}
walk_children(instr, false);
return instr;
}
static void block_cp(struct ir3_block *block)
{
unsigned i, j;
for (i = 0; i < block->noutputs; i++) {
if (block->outputs[i]) {
struct ir3_instruction *out =
instr_cp(block->outputs[i], false);
/* To deal with things like this:
*
* 43: MOV OUT[2], TEMP[5]
* 44: MOV OUT[0], TEMP[5]
*
* we need to ensure that no two outputs point to
* the same instruction
*/
for (j = 0; j < i; j++) {
if (block->outputs[j] == out) {
out = instr_cp(block->outputs[i], true);
break;
}
}
block->outputs[i] = out;
}
}
}
void ir3_block_cp(struct ir3_block *block)
{
ir3_shader_clear_mark(block->shader);
block_cp(block);
}

View file

@ -0,0 +1,156 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "util/u_math.h"
#include "ir3.h"
/*
* Instruction Depth:
*
* Calculates weighted instruction depth, ie. the sum of # of needed
* instructions plus delay slots back to original input (ie INPUT or
* CONST). That is to say, an instructions depth is:
*
* depth(instr) {
* d = 0;
* // for each src register:
* foreach (src in instr->regs[1..n])
* d = max(d, delayslots(src->instr, n) + depth(src->instr));
* return d + 1;
* }
*
* After an instruction's depth is calculated, it is inserted into the
* blocks depth sorted list, which is used by the scheduling pass.
*/
/* calculate required # of delay slots between the instruction that
* assigns a value and the one that consumes
*/
int ir3_delayslots(struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned n)
{
/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
* alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
* handled with sync bits
*/
if (is_meta(assigner))
return 0;
/* handled via sync flags: */
if (is_sfu(assigner) || is_tex(assigner))
return 0;
/* assigner must be alu: */
if (is_sfu(consumer) || is_tex(consumer)) {
return 8;
} else if ((consumer->category == 3) &&
is_mad(consumer->opc) && (n == 2)) {
/* special case, 3rd src to cat3 not required on first cycle */
return 2;
} else {
return 5;
}
}
static void insert_by_depth(struct ir3_instruction *instr)
{
struct ir3_block *block = instr->block;
struct ir3_instruction *n = block->head;
struct ir3_instruction *p = NULL;
while (n && (n != instr) && (n->depth > instr->depth)) {
p = n;
n = n->next;
}
instr->next = n;
if (p)
p->next = instr;
else
block->head = instr;
}
static void ir3_instr_depth(struct ir3_instruction *instr)
{
unsigned i;
/* if we've already visited this instruction, bail now: */
if (ir3_instr_check_mark(instr))
return;
instr->depth = 0;
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *src = instr->regs[i];
if (src->flags & IR3_REG_SSA) {
unsigned sd;
/* visit child to compute it's depth: */
ir3_instr_depth(src->instr);
sd = ir3_delayslots(src->instr, instr, i-1) +
src->instr->depth;
instr->depth = MAX2(instr->depth, sd);
}
}
/* meta-instructions don't add cycles, other than PHI.. which
* might translate to a real instruction..
*
* well, not entirely true, fan-in/out, etc might need to need
* to generate some extra mov's in edge cases, etc.. probably
* we might want to do depth calculation considering the worst
* case for these??
*/
if (!is_meta(instr))
instr->depth++;
insert_by_depth(instr);
}
void ir3_block_depth(struct ir3_block *block)
{
unsigned i;
block->head = NULL;
ir3_shader_clear_mark(block->shader);
for (i = 0; i < block->noutputs; i++)
if (block->outputs[i])
ir3_instr_depth(block->outputs[i]);
/* at this point, any unvisited input is unused: */
for (i = 0; i < block->ninputs; i++) {
struct ir3_instruction *in = block->inputs[i];
if (in && !ir3_instr_check_mark(in))
block->inputs[i] = NULL;
}
}

View file

@ -0,0 +1,416 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include <stdarg.h>
#include "ir3.h"
#define PTRID(x) ((unsigned long)(x))
struct ir3_dump_ctx {
FILE *f;
bool verbose;
};
static void dump_instr_name(struct ir3_dump_ctx *ctx,
struct ir3_instruction *instr)
{
/* for debugging: */
if (ctx->verbose) {
#ifdef DEBUG
fprintf(ctx->f, "%04u:", instr->serialno);
#endif
fprintf(ctx->f, "%03u: ", instr->depth);
}
if (is_meta(instr)) {
switch(instr->opc) {
case OPC_META_PHI:
fprintf(ctx->f, "&#934;");
break;
default:
/* shouldn't hit here.. just for debugging: */
switch (instr->opc) {
case OPC_META_INPUT: fprintf(ctx->f, "_meta:in"); break;
case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out"); break;
case OPC_META_FO: fprintf(ctx->f, "_meta:fo"); break;
case OPC_META_FI: fprintf(ctx->f, "_meta:fi"); break;
case OPC_META_FLOW: fprintf(ctx->f, "_meta:flow"); break;
case OPC_META_PHI: fprintf(ctx->f, "_meta:phi"); break;
default: fprintf(ctx->f, "_meta:%d", instr->opc); break;
}
break;
}
} else if (instr->category == 1) {
static const char *type[] = {
[TYPE_F16] = "f16",
[TYPE_F32] = "f32",
[TYPE_U16] = "u16",
[TYPE_U32] = "u32",
[TYPE_S16] = "s16",
[TYPE_S32] = "s32",
[TYPE_U8] = "u8",
[TYPE_S8] = "s8",
};
if (instr->cat1.src_type == instr->cat1.dst_type)
fprintf(ctx->f, "mov");
else
fprintf(ctx->f, "cov");
fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
} else {
fprintf(ctx->f, "%s", ir3_instr_name(instr));
if (instr->flags & IR3_INSTR_3D)
fprintf(ctx->f, ".3d");
if (instr->flags & IR3_INSTR_A)
fprintf(ctx->f, ".a");
if (instr->flags & IR3_INSTR_O)
fprintf(ctx->f, ".o");
if (instr->flags & IR3_INSTR_P)
fprintf(ctx->f, ".p");
if (instr->flags & IR3_INSTR_S)
fprintf(ctx->f, ".s");
if (instr->flags & IR3_INSTR_S2EN)
fprintf(ctx->f, ".s2en");
}
}
static void dump_reg_name(struct ir3_dump_ctx *ctx,
struct ir3_register *reg)
{
if ((reg->flags & IR3_REG_ABS) && (reg->flags & IR3_REG_NEGATE))
fprintf(ctx->f, "(absneg)");
else if (reg->flags & IR3_REG_NEGATE)
fprintf(ctx->f, "(neg)");
else if (reg->flags & IR3_REG_ABS)
fprintf(ctx->f, "(abs)");
if (reg->flags & IR3_REG_IMMED) {
fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
} else if (reg->flags & IR3_REG_SSA) {
if (ctx->verbose) {
fprintf(ctx->f, "_[");
dump_instr_name(ctx, reg->instr);
fprintf(ctx->f, "]");
}
} else {
if (reg->flags & IR3_REG_HALF)
fprintf(ctx->f, "h");
if (reg->flags & IR3_REG_CONST)
fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
else
fprintf(ctx->f, "r%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
}
}
static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
struct ir3_instruction *instr);
static void ir3_block_dump(struct ir3_dump_ctx *ctx,
struct ir3_block *block, const char *name);
static void dump_instr(struct ir3_dump_ctx *ctx,
struct ir3_instruction *instr)
{
/* if we've already visited this instruction, bail now: */
if (ir3_instr_check_mark(instr))
return;
/* some meta-instructions need to be handled specially: */
if (is_meta(instr)) {
if ((instr->opc == OPC_META_FO) ||
(instr->opc == OPC_META_FI)) {
unsigned i;
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
if (reg->flags & IR3_REG_SSA)
dump_instr(ctx, reg->instr);
}
} else if (instr->opc == OPC_META_FLOW) {
struct ir3_register *reg = instr->regs[1];
ir3_block_dump(ctx, instr->flow.if_block, "if");
if (instr->flow.else_block)
ir3_block_dump(ctx, instr->flow.else_block, "else");
if (reg->flags & IR3_REG_SSA)
dump_instr(ctx, reg->instr);
} else if (instr->opc == OPC_META_PHI) {
/* treat like a normal instruction: */
ir3_instr_dump(ctx, instr);
}
} else {
ir3_instr_dump(ctx, instr);
}
}
/* arrarraggh! if link is to something outside of the current block, we
* need to defer emitting the link until the end of the block, since the
* edge triggers pre-creation of the node it links to inside the cluster,
* even though it is meant to be outside..
*/
static struct {
char buf[40960];
unsigned n;
} edge_buf;
/* helper to print or defer: */
static void printdef(struct ir3_dump_ctx *ctx,
bool defer, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
if (defer) {
unsigned n = edge_buf.n;
n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n,
fmt, ap);
edge_buf.n = n;
} else {
vfprintf(ctx->f, fmt, ap);
}
va_end(ap);
}
static void dump_link2(struct ir3_dump_ctx *ctx,
struct ir3_instruction *instr, const char *target, bool defer)
{
/* some meta-instructions need to be handled specially: */
if (is_meta(instr)) {
if (instr->opc == OPC_META_INPUT) {
printdef(ctx, defer, "input%lx:<in%u>:w -> %s",
PTRID(instr->inout.block),
instr->regs[0]->num, target);
} else if (instr->opc == OPC_META_FO) {
struct ir3_register *reg = instr->regs[1];
dump_link2(ctx, reg->instr, target, defer);
printdef(ctx, defer, "[label=\".%c\"]",
"xyzw"[instr->fo.off & 0x3]);
} else if (instr->opc == OPC_META_FI) {
unsigned i;
/* recursively dump all parents and links */
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
if (reg->flags & IR3_REG_SSA) {
dump_link2(ctx, reg->instr, target, defer);
printdef(ctx, defer, "[label=\".%c\"]",
"xyzw"[(i - 1) & 0x3]);
}
}
} else if (instr->opc == OPC_META_OUTPUT) {
printdef(ctx, defer, "output%lx:<out%u>:w -> %s",
PTRID(instr->inout.block),
instr->regs[0]->num, target);
} else if (instr->opc == OPC_META_PHI) {
/* treat like a normal instruction: */
printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
}
} else {
printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
}
}
static void dump_link(struct ir3_dump_ctx *ctx,
struct ir3_instruction *instr,
struct ir3_block *block, const char *target)
{
bool defer = instr->block != block;
dump_link2(ctx, instr, target, defer);
printdef(ctx, defer, "\n");
}
static struct ir3_register *follow_flow(struct ir3_register *reg)
{
if (reg->flags & IR3_REG_SSA) {
struct ir3_instruction *instr = reg->instr;
/* go with the flow.. */
if (is_meta(instr) && (instr->opc == OPC_META_FLOW))
return instr->regs[1];
}
return reg;
}
static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
struct ir3_instruction *instr)
{
unsigned i;
fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{",
PTRID(instr));
dump_instr_name(ctx, instr);
/* destination register: */
fprintf(ctx->f, "|<dst0>");
/* source register(s): */
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = follow_flow(instr->regs[i]);
fprintf(ctx->f, "|");
if (reg->flags & IR3_REG_SSA)
fprintf(ctx->f, "<src%u> ", (i - 1));
dump_reg_name(ctx, reg);
}
fprintf(ctx->f, "}\"];\n");
/* and recursively dump dependent instructions: */
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
char target[32]; /* link target */
if (!(reg->flags & IR3_REG_SSA))
continue;
snprintf(target, sizeof(target), "instr%lx:<src%u>",
PTRID(instr), (i - 1));
dump_instr(ctx, reg->instr);
dump_link(ctx, follow_flow(reg)->instr, instr->block, target);
}
}
static void ir3_block_dump(struct ir3_dump_ctx *ctx,
struct ir3_block *block, const char *name)
{
unsigned i, n;
n = edge_buf.n;
fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block));
fprintf(ctx->f, "label=\"%s\";\n", name);
/* draw inputs: */
fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block));
for (i = 0; i < block->ninputs; i++)
if (block->inputs[i])
fprintf(ctx->f, "|<in%u> i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
fprintf(ctx->f, "\"];\n");
/* draw instruction graph: */
for (i = 0; i < block->noutputs; i++)
dump_instr(ctx, block->outputs[i]);
/* draw outputs: */
fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block));
for (i = 0; i < block->noutputs; i++)
fprintf(ctx->f, "|<out%u> o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
fprintf(ctx->f, "\"];\n");
/* and links to outputs: */
for (i = 0; i < block->noutputs; i++) {
char target[32]; /* link target */
/* NOTE: there could be outputs that are never assigned,
* so skip them
*/
if (!block->outputs[i])
continue;
snprintf(target, sizeof(target), "output%lx:<out%u>:e",
PTRID(block), i);
dump_link(ctx, block->outputs[i], block, target);
}
fprintf(ctx->f, "}\n");
/* and links to inputs: */
if (block->parent) {
for (i = 0; i < block->ninputs; i++) {
char target[32]; /* link target */
if (!block->inputs[i])
continue;
dump_instr(ctx, block->inputs[i]);
snprintf(target, sizeof(target), "input%lx:<in%u>:e",
PTRID(block), i);
dump_link(ctx, block->inputs[i], block, target);
}
}
/* dump deferred edges: */
if (edge_buf.n > n) {
fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]);
edge_buf.n = n;
}
}
void ir3_shader_dump(struct ir3_shader *shader, const char *name,
struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3_shader? */,
FILE *f)
{
struct ir3_dump_ctx ctx = {
.f = f,
};
ir3_shader_clear_mark(shader);
fprintf(ctx.f, "digraph G {\n");
fprintf(ctx.f, "rankdir=RL;\n");
fprintf(ctx.f, "nodesep=0.25;\n");
fprintf(ctx.f, "ranksep=1.5;\n");
ir3_block_dump(&ctx, block, name);
fprintf(ctx.f, "}\n");
}
/*
* For Debugging:
*/
void
ir3_dump_instr_single(struct ir3_instruction *instr)
{
struct ir3_dump_ctx ctx = {
.f = stdout,
.verbose = true,
};
unsigned i;
dump_instr_name(&ctx, instr);
for (i = 0; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
printf(i ? ", " : " ");
dump_reg_name(&ctx, reg);
}
printf("\n");
}
void
ir3_dump_instr_list(struct ir3_instruction *instr)
{
unsigned n = 0;
while (instr) {
ir3_dump_instr_single(instr);
if (!is_meta(instr))
n++;
instr = instr->next;
}
printf("%u instructions\n", n);
}

View file

@ -0,0 +1,140 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include <stdarg.h>
#include "ir3.h"
/*
* Flatten: flatten out legs of if/else, etc
*
* TODO probably should use some heuristic to decide to not flatten
* if one side of the other is too large / deeply nested / whatever?
*/
struct ir3_flatten_ctx {
struct ir3_block *block;
unsigned cnt;
};
static struct ir3_register *unwrap(struct ir3_register *reg)
{
if (reg->flags & IR3_REG_SSA) {
struct ir3_instruction *instr = reg->instr;
if (is_meta(instr)) {
switch (instr->opc) {
case OPC_META_OUTPUT:
case OPC_META_FLOW:
return instr->regs[1];
default:
break;
}
}
}
return reg;
}
static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx,
struct ir3_instruction *instr)
{
unsigned i;
/* if we've already visited this instruction, bail now: */
if (ir3_instr_check_mark(instr))
return;
instr->block = ctx->block;
/* TODO: maybe some threshold to decide whether to
* flatten or not??
*/
if (is_meta(instr)) {
if (instr->opc == OPC_META_PHI) {
struct ir3_register *cond, *t, *f;
/* convert the PHI instruction to sel.{f16,f32} */
instr->category = 3;
/* instruction type based on dst size: */
if (instr->regs[0]->flags & IR3_REG_HALF)
instr->opc = OPC_SEL_F16;
else
instr->opc = OPC_SEL_F32;
/* swap around src register order, to match what
* hw expects:
*/
cond = instr->regs[1];
t = instr->regs[2]; /* true val */
f = instr->regs[3]; /* false val */
instr->regs[1] = unwrap(f);
instr->regs[2] = unwrap(cond);
instr->regs[3] = unwrap(t);
ctx->cnt++;
} else if ((instr->opc == OPC_META_INPUT) &&
(instr->regs_count == 2)) {
type_t ftype;
if (instr->regs[0]->flags & IR3_REG_HALF)
ftype = TYPE_F16;
else
ftype = TYPE_F32;
/* convert meta:input to mov: */
instr->category = 1;
instr->cat1.src_type = ftype;
instr->cat1.dst_type = ftype;
}
}
/* recursively visit children: */
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *src = instr->regs[i];
if (src->flags & IR3_REG_SSA)
ir3_instr_flatten(ctx, src->instr);
}
}
/* return >= 0 is # of phi's flattened, < 0 is error */
int ir3_block_flatten(struct ir3_block *block)
{
struct ir3_flatten_ctx ctx = {
.block = block,
};
unsigned i;
ir3_shader_clear_mark(block->shader);
for(i = 0; i < block->noutputs; i++)
if (block->outputs[i])
ir3_instr_flatten(&ctx, block->outputs[i]);
return ctx.cnt;
}

View file

@ -0,0 +1,580 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "ir3.h"
#include "ir3_visitor.h"
/*
* Register Assignment:
*
* NOTE: currently only works on a single basic block.. need to think
* about how multiple basic blocks are going to get scheduled. But
* I think I want to re-arrange how blocks work, ie. get rid of the
* block nesting thing..
*
* NOTE: we could do register coalescing (eliminate moves) as part of
* the RA step.. OTOH I think we need to do scheduling before register
* assignment. And if we remove a mov that effects scheduling (unless
* we leave a placeholder nop, which seems lame), so I'm not really
* sure how practical this is to do both in a single stage. But OTOH
* I'm not really sure a sane way for the CP stage to realize when it
* cannot remove a mov due to multi-register constraints..
*
*/
struct ir3_ra_ctx {
struct ir3_block *block;
enum shader_t type;
int cnt;
bool error;
};
struct ir3_ra_assignment {
int8_t off; /* offset of instruction dst within range */
uint8_t num; /* number of components for the range */
};
static void ra_assign(struct ir3_ra_ctx *ctx,
struct ir3_instruction *assigner, int num);
static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
/*
* Register Allocation:
*/
#define REG(n, wm) (struct ir3_register){ \
/*.flags = ((so)->half_precision) ? IR3_REG_HALF : 0,*/ \
.num = (n), \
.wrmask = TGSI_WRITEMASK_ ## wm, \
}
/* check that the register exists, is a GPR and is not special (a0/p0) */
static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
{
if ((n < instr->regs_count) && reg_gpr(instr->regs[n]))
return instr->regs[n];
return NULL;
}
static int output_base(struct ir3_ra_ctx *ctx)
{
/* ugg, for fragment shader we need to have input at r0.x
* (or at least if there is a way to configure it, I can't
* see how because the blob driver always uses r0.x (ie.
* all zeros)
*/
if (ctx->type == SHADER_FRAGMENT)
return 2;
return 0;
}
/* live means read before written */
static void compute_liveregs(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, regmask_t *liveregs)
{
struct ir3_block *block = instr->block;
regmask_t written;
unsigned i, j;
regmask_init(liveregs);
regmask_init(&written);
for (instr = instr->next; instr; instr = instr->next) {
struct ir3_register *r;
if (is_meta(instr))
continue;
/* check first src's read: */
for (j = 1; j < instr->regs_count; j++) {
r = reg_check(instr, j);
if (r)
regmask_set_if_not(liveregs, r, &written);
}
/* then dst written (if assigned already): */
if (instr->flags & IR3_INSTR_MARK) {
r = reg_check(instr, 0);
if (r)
regmask_set(&written, r);
}
}
/* be sure to account for output registers too: */
for (i = 0; i < block->noutputs; i++) {
struct ir3_register reg = REG(output_base(ctx) + i, X);
regmask_set_if_not(liveregs, &reg, &written);
}
}
/* calculate registers that are clobbered before last use of 'assigner'.
* This needs to be done backwards, although it could possibly be
* combined into compute_liveregs(). (Ie. compute_liveregs() could
* reverse the list, then do this part backwards reversing the list
* again back to original order.) Otoh, probably I should try to
* construct a proper interference graph instead.
*
* XXX this need to follow the same recursion path that is used for
* to rename/assign registers (ie. ra_assign_src()).. this is a bit
* ugly right now, maybe refactor into node iterator sort of things
* that iterates nodes in the correct order?
*/
static bool compute_clobbers(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, struct ir3_instruction *assigner,
regmask_t *liveregs)
{
unsigned i;
bool live = false, was_live = false;
if (instr == NULL) {
struct ir3_block *block = ctx->block;
/* if at the end, check outputs: */
for (i = 0; i < block->noutputs; i++)
if (block->outputs[i] == assigner)
return true;
return false;
}
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
if (is_meta(instr)) {
switch (instr->opc) {
case OPC_META_INPUT:
// TODO
assert(0);
break;
case OPC_META_FO:
case OPC_META_FI:
was_live |= compute_clobbers(ctx, instr->next,
instr, liveregs);
break;
default:
break;
}
}
live = true;
break;
}
}
was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
if (was_live && (instr->regs_count > 0) &&
(instr->flags & IR3_INSTR_MARK) &&
!is_meta(instr))
regmask_set(liveregs, instr->regs[0]);
return live || was_live;
}
static int find_available(regmask_t *liveregs, int size)
{
unsigned i;
for (i = 0; i < MAX_REG - size; i++) {
if (!regmask_get(liveregs, &REG(i, X))) {
unsigned start = i++;
for (; (i < MAX_REG) && ((i - start) < size); i++)
if (regmask_get(liveregs, &REG(i, X)))
break;
if ((i - start) >= size)
return start;
}
}
assert(0);
return -1;
}
static int alloc_block(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, int size)
{
if (!instr) {
/* special case, allocating shader outputs. At this
* point, nothing is allocated, just start the shader
* outputs at r0.x and let compute_liveregs() take
* care of the rest from here:
*/
return 0;
} else {
regmask_t liveregs;
compute_liveregs(ctx, instr, &liveregs);
// XXX XXX XXX XXX XXX XXX XXX XXX XXX
// XXX hack.. maybe ra_calc should give us a list of
// instrs to compute_clobbers() on?
if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
(instr->regs_count == 1)) {
unsigned i, base = instr->regs[0]->num & ~0x3;
for (i = 0; i < 4; i++) {
struct ir3_instruction *in = ctx->block->inputs[base + i];
if (in)
compute_clobbers(ctx, in->next, in, &liveregs);
}
} else
// XXX XXX XXX XXX XXX XXX XXX XXX XXX
compute_clobbers(ctx, instr->next, instr, &liveregs);
return find_available(&liveregs, size);
}
}
/*
* Constraint Calculation:
*/
struct ra_calc_visitor {
struct ir3_visitor base;
struct ir3_ra_assignment a;
};
static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
{
return (struct ra_calc_visitor *)v;
}
/* calculate register assignment for the instruction. If the register
* written by this instruction is required to be part of a range, to
* handle other (input/output/sam/bary.f/etc) contiguous register range
* constraints, that is calculated handled here.
*/
static void ra_calc_dst(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_calc_visitor *c = ra_calc_visitor(v);
if (is_tex(instr)) {
c->a.off = 0;
c->a.num = 4;
} else {
c->a.off = 0;
c->a.num = 1;
}
}
static void
ra_calc_dst_shader_input(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_calc_visitor *c = ra_calc_visitor(v);
struct ir3_block *block = instr->block;
struct ir3_register *dst = instr->regs[0];
unsigned base = dst->num & ~0x3;
unsigned i, num = 0;
assert(!(dst->flags & IR3_REG_IA));
/* check what input components we need: */
for (i = 0; i < 4; i++) {
unsigned idx = base + i;
if ((idx < block->ninputs) && block->inputs[idx])
num = i + 1;
}
c->a.off = dst->num - base;
c->a.num = num;
}
static void ra_calc_src_fanin(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_calc_visitor *c = ra_calc_visitor(v);
unsigned srcn = ir3_instr_regno(instr, reg) - 1;
c->a.off -= srcn;
c->a.num += srcn;
c->a.num = MAX2(c->a.num, instr->regs_count - 1);
}
static const struct ir3_visitor_funcs calc_visitor_funcs = {
.instr = ir3_visit_instr,
.dst_shader_input = ra_calc_dst_shader_input,
.dst_fanout = ra_calc_dst,
.dst_fanin = ra_calc_dst,
.dst = ra_calc_dst,
.src_fanout = ir3_visit_reg,
.src_fanin = ra_calc_src_fanin,
.src = ir3_visit_reg,
};
static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
{
struct ra_calc_visitor v = {
.base.funcs = &calc_visitor_funcs,
};
ir3_visit_instr(&v.base, assigner);
return v.a;
}
/*
* Register Assignment:
*/
struct ra_assign_visitor {
struct ir3_visitor base;
struct ir3_ra_ctx *ctx;
int num;
};
static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
{
return (struct ra_assign_visitor *)v;
}
static void ra_assign_reg(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_assign_visitor *a = ra_assign_visitor(v);
reg->flags &= ~IR3_REG_SSA;
reg->num = a->num;
}
static void ra_assign_dst_shader_input(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_assign_visitor *a = ra_assign_visitor(v);
unsigned i, base = reg->num & ~0x3;
int off = base - reg->num;
ra_assign_reg(v, instr, reg);
reg->flags |= IR3_REG_IA;
/* trigger assignment of all our companion input components: */
for (i = 0; i < 4; i++) {
struct ir3_instruction *in = instr->block->inputs[i+base];
if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
ra_assign(a->ctx, in, a->num + off + i);
}
}
static void ra_assign_dst_fanout(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_assign_visitor *a = ra_assign_visitor(v);
struct ir3_register *src = instr->regs[1];
ra_assign_reg(v, instr, reg);
if (src->flags & IR3_REG_SSA)
ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
}
static void ra_assign_src_fanout(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_assign_visitor *a = ra_assign_visitor(v);
ra_assign_reg(v, instr, reg);
ra_assign(a->ctx, instr, a->num + instr->fo.off);
}
static void ra_assign_src_fanin(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_assign_visitor *a = ra_assign_visitor(v);
unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
ra_assign_reg(v, instr, reg);
ra_assign(a->ctx, instr, a->num - srcn);
for (j = 1; j < instr->regs_count; j++) {
struct ir3_register *reg = instr->regs[j];
if (reg->flags & IR3_REG_SSA) /* could be renamed already */
ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
}
}
static const struct ir3_visitor_funcs assign_visitor_funcs = {
.instr = ir3_visit_instr,
.dst_shader_input = ra_assign_dst_shader_input,
.dst_fanout = ra_assign_dst_fanout,
.dst_fanin = ra_assign_reg,
.dst = ra_assign_reg,
.src_fanout = ra_assign_src_fanout,
.src_fanin = ra_assign_src_fanin,
.src = ra_assign_reg,
};
static void ra_assign(struct ir3_ra_ctx *ctx,
struct ir3_instruction *assigner, int num)
{
struct ra_assign_visitor v = {
.base.funcs = &assign_visitor_funcs,
.ctx = ctx,
.num = num,
};
/* if we've already visited this instruction, bail now: */
if (ir3_instr_check_mark(assigner)) {
debug_assert(assigner->regs[0]->num == num);
if (assigner->regs[0]->num != num) {
/* impossible situation, should have been resolved
* at an earlier stage by inserting extra mov's:
*/
ctx->error = true;
}
return;
}
ir3_visit_instr(&v.base, assigner);
}
/*
*
*/
static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr)
{
struct ir3_ra_assignment a;
unsigned num;
/* skip over nop's */
if (instr->regs_count == 0)
return;
/* if we've already visited this instruction, bail now: */
if (instr->flags & IR3_INSTR_MARK)
return;
/* allocate register(s): */
a = ra_calc(instr);
num = alloc_block(ctx, instr, a.num) + a.off;
ra_assign(ctx, instr, num);
}
/* flatten into shader: */
// XXX this should probably be somewhere else:
static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
struct ir3_instruction *n;
struct ir3_shader *shader = block->shader;
struct ir3_instruction *end =
ir3_instr_create(block, 0, OPC_END);
struct ir3_instruction *last_input = NULL;
regmask_t needs_ss;
regmask_t needs_sy;
regmask_init(&needs_ss);
regmask_init(&needs_sy);
shader->instrs_count = 0;
for (n = block->head; n; n = n->next) {
unsigned i;
if (is_meta(n))
continue;
for (i = 1; i < n->regs_count; i++) {
struct ir3_register *reg = n->regs[i];
if (is_gpr(reg)) {
/* TODO: we probably only need (ss) for alu
* instr consuming sfu result.. need to make
* some tests for both this and (sy)..
*/
if (regmask_get(&needs_ss, reg)) {
n->flags |= IR3_INSTR_SS;
regmask_init(&needs_ss);
}
if (regmask_get(&needs_sy, reg)) {
n->flags |= IR3_INSTR_SY;
regmask_init(&needs_sy);
}
}
}
shader->instrs[shader->instrs_count++] = n;
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
if (is_tex(n))
regmask_set(&needs_sy, n->regs[0]);
if (is_input(n))
last_input = n;
}
if (last_input)
last_input->regs[0]->flags |= IR3_REG_EI;
shader->instrs[shader->instrs_count++] = end;
shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
}
static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
struct ir3_instruction *n;
if (!block->parent) {
unsigned i;
int base, off = output_base(ctx);
base = alloc_block(ctx, NULL, block->noutputs + off);
for (i = 0; i < block->noutputs; i++)
if (block->outputs[i])
ra_assign(ctx, block->outputs[i], base + i + off);
if (ctx->type == SHADER_FRAGMENT) {
for (i = 0; i < block->ninputs; i++)
if (block->inputs[i])
ra_assign(ctx, block->inputs[i], base + i);
} else {
for (i = 0; i < block->ninputs; i++)
if (block->inputs[i])
ir3_instr_ra(ctx, block->inputs[i]);
}
}
/* then loop over instruction list and assign registers:
*/
n = block->head;
while (n) {
ir3_instr_ra(ctx, n);
if (ctx->error)
return -1;
n = n->next;
}
legalize(ctx, block);
return 0;
}
int ir3_block_ra(struct ir3_block *block, enum shader_t type)
{
struct ir3_ra_ctx ctx = {
.block = block,
.type = type,
};
ir3_shader_clear_mark(block->shader);
return block_ra(&ctx, block);
}

View file

@ -0,0 +1,289 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "util/u_math.h"
#include "ir3.h"
/*
* Instruction Scheduling:
*
* Using the depth sorted list from depth pass, attempt to recursively
* schedule deepest unscheduled path. The first instruction that cannot
* be scheduled, returns the required delay slots it needs, at which
* point we return back up to the top and attempt to schedule by next
* highest depth. After a sufficient number of instructions have been
* scheduled, return back to beginning of list and start again. If you
* reach the end of depth sorted list without being able to insert any
* instruction, insert nop's. Repeat until no more unscheduled
* instructions.
*/
struct ir3_sched_ctx {
struct ir3_instruction *scheduled;
unsigned cnt;
};
static struct ir3_instruction *
deepest(struct ir3_instruction **srcs, unsigned nsrcs)
{
struct ir3_instruction *d = NULL;
unsigned i = 0, id = 0;
while ((i < nsrcs) && !(d = srcs[id = i]))
i++;
if (!d)
return NULL;
for (; i < nsrcs; i++)
if (srcs[i] && (srcs[i]->depth > d->depth))
d = srcs[id = i];
srcs[id] = NULL;
return d;
}
static unsigned distance(struct ir3_sched_ctx *ctx,
struct ir3_instruction *instr, unsigned maxd)
{
struct ir3_instruction *n = ctx->scheduled;
unsigned d = 0;
while (n && (n != instr) && (d < maxd)) {
if (!is_meta(n))
d++;
n = n->next;
}
return d;
}
/* TODO maybe we want double linked list? */
static struct ir3_instruction * prev(struct ir3_instruction *instr)
{
struct ir3_instruction *p = instr->block->head;
while (p && (p->next != instr))
p = p->next;
return p;
}
static void schedule(struct ir3_sched_ctx *ctx,
struct ir3_instruction *instr, bool remove)
{
struct ir3_block *block = instr->block;
/* maybe there is a better way to handle this than just stuffing
* a nop.. ideally we'd know about this constraint in the
* scheduling and depth calculation..
*/
if (ctx->scheduled && is_sfu(ctx->scheduled) && is_sfu(instr))
schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
/* remove from depth list:
*/
if (remove) {
struct ir3_instruction *p = prev(instr);
/* NOTE: this can happen for inputs which are not
* read.. in that case there is no need to schedule
* the input, so just bail:
*/
if (instr != (p ? p->next : block->head))
return;
if (p)
p->next = instr->next;
else
block->head = instr->next;
}
instr->flags |= IR3_INSTR_MARK;
instr->next = ctx->scheduled;
ctx->scheduled = instr;
ctx->cnt++;
}
/*
* Delay-slot calculation. Follows fanin/fanout.
*/
static unsigned delay_calc2(struct ir3_sched_ctx *ctx,
struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned srcn)
{
unsigned delay = 0;
if (is_meta(assigner)) {
unsigned i;
for (i = 1; i < assigner->regs_count; i++) {
struct ir3_register *reg = assigner->regs[i];
if (reg->flags & IR3_REG_SSA) {
unsigned d = delay_calc2(ctx, reg->instr,
consumer, srcn);
delay = MAX2(delay, d);
}
}
} else {
delay = ir3_delayslots(assigner, consumer, srcn);
delay -= distance(ctx, assigner, delay);
}
return delay;
}
static unsigned delay_calc(struct ir3_sched_ctx *ctx,
struct ir3_instruction *instr)
{
unsigned i, delay = 0;
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
if (reg->flags & IR3_REG_SSA) {
unsigned d = delay_calc2(ctx, reg->instr,
instr, i - 1);
delay = MAX2(delay, d);
}
}
return delay;
}
/* A negative return value signals that an instruction has been newly
* scheduled, return back up to the top of the stack (to block_sched())
*/
static int trysched(struct ir3_sched_ctx *ctx,
struct ir3_instruction *instr)
{
struct ir3_instruction *srcs[ARRAY_SIZE(instr->regs) - 1];
struct ir3_instruction *src;
unsigned i, delay, nsrcs = 0;
/* if already scheduled: */
if (instr->flags & IR3_INSTR_MARK)
return 0;
/* figure out our src's: */
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
if (reg->flags & IR3_REG_SSA)
srcs[nsrcs++] = reg->instr;
}
/* for each src register in sorted order:
*/
delay = 0;
while ((src = deepest(srcs, nsrcs))) {
delay = trysched(ctx, src);
if (delay)
return delay;
}
/* all our dependents are scheduled, figure out if
* we have enough delay slots to schedule ourself:
*/
delay = delay_calc(ctx, instr);
if (!delay) {
schedule(ctx, instr, true);
return -1;
}
return delay;
}
static struct ir3_instruction * reverse(struct ir3_instruction *instr)
{
struct ir3_instruction *reversed = NULL;
while (instr) {
struct ir3_instruction *next = instr->next;
instr->next = reversed;
reversed = instr;
instr = next;
}
return reversed;
}
static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
{
struct ir3_instruction *instr;
/* schedule all the shader input's (meta-instr) first so that
* the RA step sees that the input registers contain a value
* from the start of the shader:
*/
if (!block->parent) {
unsigned i;
for (i = 0; i < block->ninputs; i++) {
struct ir3_instruction *in = block->inputs[i];
if (in)
schedule(ctx, in, true);
}
}
while ((instr = block->head)) {
/* NOTE: always grab next *before* trysched(), in case the
* instruction is actually scheduled (and therefore moved
* from depth list into scheduled list)
*/
struct ir3_instruction *next = instr->next;
int cnt = trysched(ctx, instr);
/* -1 is signal to return up stack, but to us means same as 0: */
cnt = MAX2(0, cnt);
cnt += ctx->cnt;
instr = next;
/* if deepest remaining instruction cannot be scheduled, try
* the increasingly more shallow instructions until needed
* number of delay slots is filled:
*/
while (instr && (cnt > ctx->cnt)) {
next = instr->next;
trysched(ctx, instr);
instr = next;
}
/* and if we run out of instructions that can be scheduled,
* then it is time for nop's:
*/
while (cnt > ctx->cnt)
schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
}
/* at this point, scheduled list is in reverse order, so fix that: */
block->head = reverse(ctx->scheduled);
}
void ir3_block_sched(struct ir3_block *block)
{
struct ir3_sched_ctx ctx = {0};
ir3_shader_clear_mark(block->shader);
block_sched(&ctx, block);
}

View file

@ -0,0 +1,154 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#ifndef IR3_VISITOR_H_
#define IR3_VISITOR_H_
/**
* Visitor which follows dst to src relationships between instructions,
* first visiting the dst (writer) instruction, followed by src (reader)
* instruction(s).
*
* TODO maybe we want multiple different visitors to walk the
* graph in different ways?
*/
struct ir3_visitor;
typedef void (*ir3_visit_instr_func)(struct ir3_visitor *v,
struct ir3_instruction *instr);
typedef void (*ir3_visit_reg_func)(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg);
struct ir3_visitor_funcs {
ir3_visit_instr_func instr; // TODO do we need??
ir3_visit_reg_func dst_shader_input;
ir3_visit_reg_func dst_block_input;
ir3_visit_reg_func dst_fanout;
ir3_visit_reg_func dst_fanin;
ir3_visit_reg_func dst;
ir3_visit_reg_func src_block_input;
ir3_visit_reg_func src_fanout;
ir3_visit_reg_func src_fanin;
ir3_visit_reg_func src;
};
struct ir3_visitor {
const struct ir3_visitor_funcs *funcs;
bool error;
};
#include "util/u_debug.h"
static void visit_instr_dst(struct ir3_visitor *v,
struct ir3_instruction *instr)
{
struct ir3_register *reg = instr->regs[0];
if (is_meta(instr)) {
switch (instr->opc) {
case OPC_META_INPUT:
if (instr->regs_count == 1)
v->funcs->dst_shader_input(v, instr, reg);
else
v->funcs->dst_block_input(v, instr, reg);
return;
case OPC_META_FO:
v->funcs->dst_fanout(v, instr, reg);
return;
case OPC_META_FI:
v->funcs->dst_fanin(v, instr, reg);
return;
default:
break;
}
}
v->funcs->dst(v, instr, reg);
}
static void visit_instr_src(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
if (is_meta(instr)) {
switch (instr->opc) {
case OPC_META_INPUT:
/* shader-input does not have a src, only block input: */
debug_assert(instr->regs_count == 2);
v->funcs->src_block_input(v, instr, reg);
return;
case OPC_META_FO:
v->funcs->src_fanout(v, instr, reg);
return;
case OPC_META_FI:
v->funcs->src_fanin(v, instr, reg);
return;
default:
break;
}
}
v->funcs->src(v, instr, reg);
}
static void ir3_visit_instr(struct ir3_visitor *v,
struct ir3_instruction *instr)
{
struct ir3_instruction *n;
/* visit instruction that assigns value: */
if (instr->regs_count > 0)
visit_instr_dst(v, instr);
/* and of any following instructions which read that value: */
n = instr->next;
while (n && !v->error) {
unsigned i;
for (i = 1; i < n->regs_count; i++) {
struct ir3_register *reg = n->regs[i];
if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr))
visit_instr_src(v, n, reg);
}
n = n->next;
}
}
static void ir3_visit_reg(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
/* no-op */
}
#endif /* IR3_VISITOR_H_ */

View file

@ -68,6 +68,8 @@ static const struct debug_named_value debug_options[] = {
{"binning", FD_DBG_BINNING, "Enable hw binning"},
{"dbinning", FD_DBG_DBINNING, "Disable hw binning"},
{"optimize", FD_DBG_OPTIMIZE, "Enable optimization passes in compiler"},
{"optmsgs", FD_DBG_OPTMSGS, "Enable optimizater debug messages"},
{"optdump", FD_DBG_OPTDUMP, "Dump shader DAG to .dot files"},
DEBUG_NAMED_VALUE_END
};

View file

@ -64,6 +64,8 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_BINNING 0x0100
#define FD_DBG_DBINNING 0x0200
#define FD_DBG_OPTIMIZE 0x0400
#define FD_DBG_OPTMSGS 0x0800
#define FD_DBG_OPTDUMP 0x1000
extern int fd_mesa_debug;
extern bool fd_binning_enabled;