mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
freedreno/a3xx/compiler: new compiler
The new compiler generates a dependency graph of instructions, including
a few meta-instructions to handle PHI and preserve some extra
information needed for register assignment, etc.
The depth pass assigned a weight/depth to each node (based on sum of
instruction cycles of a given node and all it's dependent nodes), which
is used to schedule instructions. The scheduling takes into account the
minimum number of cycles/slots between dependent instructions, etc.
Which was something that could not be handled properly with the original
compiler (which was more of a naive TGSI translator than an actual
compiler).
The register assignment is currently split out as a standalone pass. I
expect that it will be replaced at some point, once I figure out what to
do about relative addressing (which is currently the only thing that
should cause fallback to old compiler).
There are a couple new debug options for FD_MESA_DEBUG env var:
optmsgs - enable debug prints in optimizer
optdump - dump instruction graph in .dot format, for example:
http://people.freedesktop.org/~robclark/a3xx/frag-0000.dot.png
http://people.freedesktop.org/~robclark/a3xx/frag-0000.dot
At this point, thanks to proper handling of instruction scheduling, the
new compiler fixes a lot of things that were broken before, and does not
appear to break anything that was working before[1]. So even though it
is not finished, it seems useful to merge it in it's current state.
[1] Not merged in this commit, because I'm not sure if it really belongs
in mesa tree, but the following commit implements a simple shader
emulator, which I've used to compare the output of the new compiler to
the original compiler (ie. run it on all the TGSI shaders dumped out via
ST_DEBUG=tgsi with various games/apps):
163b6306b1
Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
f0e2d7ab46
commit
554f1ac00c
17 changed files with 2778 additions and 210 deletions
|
|
@ -43,4 +43,10 @@ a3xx_SOURCES := \
|
|||
a3xx/fd3_util.c \
|
||||
a3xx/fd3_zsa.c \
|
||||
a3xx/disasm-a3xx.c \
|
||||
a3xx/ir3_cp.c \
|
||||
a3xx/ir3_depth.c \
|
||||
a3xx/ir3_dump.c \
|
||||
a3xx/ir3_flatten.c \
|
||||
a3xx/ir3_ra.c \
|
||||
a3xx/ir3_sched.c \
|
||||
a3xx/ir3.c
|
||||
|
|
|
|||
|
|
@ -735,6 +735,14 @@ struct opc_info {
|
|||
|
||||
#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
|
||||
|
||||
// XXX hack.. probably should move this table somewhere common:
|
||||
#include "ir3.h"
|
||||
const char *ir3_instr_name(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->category == -1) return "??meta??";
|
||||
return opcs[(instr->category << NOPC_BITS) | instr->opc].name;
|
||||
}
|
||||
|
||||
static void print_instr(uint32_t *dwords, int level, int n)
|
||||
{
|
||||
instr_t *instr = (instr_t *)dwords;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1161,23 +1161,6 @@ instr_cat2(const struct instr_translater *t,
|
|||
put_dst(ctx, inst, dst);
|
||||
}
|
||||
|
||||
static bool is_mad(opc_t opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case OPC_MAD_U16:
|
||||
case OPC_MADSH_U16:
|
||||
case OPC_MAD_S16:
|
||||
case OPC_MADSH_M16:
|
||||
case OPC_MAD_U24:
|
||||
case OPC_MAD_S24:
|
||||
case OPC_MAD_F16:
|
||||
case OPC_MAD_F32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
instr_cat3(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
|
|
|
|||
|
|
@ -80,9 +80,9 @@ fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
|
|||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < so->inputs_count; i++)
|
||||
so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
|
||||
so->info.max_reg = MAX2(so->info.max_reg, (so->inputs[i].regid + 3) >> 2);
|
||||
for (i = 0; i < so->outputs_count; i++)
|
||||
so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2);
|
||||
so->info.max_reg = MAX2(so->info.max_reg, (so->outputs[i].regid + 3) >> 2);
|
||||
}
|
||||
|
||||
static struct fd3_shader_stateobj *
|
||||
|
|
|
|||
|
|
@ -190,6 +190,22 @@ typedef enum {
|
|||
OPC_LDC_4 = 30,
|
||||
OPC_LDLV = 31,
|
||||
|
||||
/* meta instructions (category -1): */
|
||||
/* placeholder instr to mark inputs/outputs: */
|
||||
OPC_META_INPUT = 0,
|
||||
OPC_META_OUTPUT = 1,
|
||||
/* The "fan-in" and "fan-out" instructions are used for keeping
|
||||
* track of instructions that write to multiple dst registers
|
||||
* (fan-out) like texture sample instructions, or read multiple
|
||||
* consecutive scalar registers (fan-in) (bary.f, texture samp)
|
||||
*/
|
||||
OPC_META_FO = 2,
|
||||
OPC_META_FI = 3,
|
||||
/* branches/flow control */
|
||||
OPC_META_FLOW = 4,
|
||||
OPC_META_PHI = 5,
|
||||
|
||||
|
||||
} opc_t;
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -643,4 +659,21 @@ static inline uint32_t instr_opc(instr_t *instr)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool is_mad(opc_t opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case OPC_MAD_U16:
|
||||
case OPC_MADSH_U16:
|
||||
case OPC_MAD_S16:
|
||||
case OPC_MADSH_M16:
|
||||
case OPC_MAD_U24:
|
||||
case OPC_MAD_S24:
|
||||
case OPC_MAD_F16:
|
||||
case OPC_MAD_F32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* INSTR_A3XX_H_ */
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@
|
|||
/* simple allocator to carve allocations out of an up-front allocated heap,
|
||||
* so that we can free everything easily in one shot.
|
||||
*/
|
||||
static void * ir3_alloc(struct ir3_shader *shader, int sz)
|
||||
void * ir3_alloc(struct ir3_shader *shader, int sz)
|
||||
{
|
||||
void *ptr = &shader->heap[shader->heap_idx];
|
||||
shader->heap_idx += align(sz, 4);
|
||||
|
|
|
|||
|
|
@ -65,6 +65,11 @@ struct ir3_register {
|
|||
* that the shader needs no more input:
|
||||
*/
|
||||
IR3_REG_EI = 0x200,
|
||||
/* meta-flags, for intermediate stages of IR, ie.
|
||||
* before register assignment is done:
|
||||
*/
|
||||
IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */
|
||||
IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */
|
||||
} flags;
|
||||
union {
|
||||
/* normal registers:
|
||||
|
|
@ -77,6 +82,10 @@ struct ir3_register {
|
|||
float fim_val;
|
||||
/* relative: */
|
||||
int offset;
|
||||
/* for IR3_REG_SSA, src registers contain ptr back to
|
||||
* assigning instruction.
|
||||
*/
|
||||
struct ir3_instruction *instr;
|
||||
};
|
||||
|
||||
/* used for cat5 instructions, but also for internal/IR level
|
||||
|
|
@ -139,6 +148,10 @@ struct ir3_instruction {
|
|||
IR3_INSTR_P = 0x080,
|
||||
IR3_INSTR_S = 0x100,
|
||||
IR3_INSTR_S2EN = 0x200,
|
||||
/* meta-flags, for intermediate stages of IR, ie.
|
||||
* before register assignment is done:
|
||||
*/
|
||||
IR3_INSTR_MARK = 0x1000,
|
||||
} flags;
|
||||
int repeat;
|
||||
unsigned regs_count;
|
||||
|
|
@ -171,7 +184,33 @@ struct ir3_instruction {
|
|||
int offset;
|
||||
int iim_val;
|
||||
} cat6;
|
||||
/* for meta-instructions, just used to hold extra data
|
||||
* before instruction scheduling, etc
|
||||
*/
|
||||
struct {
|
||||
int off; /* component/offset */
|
||||
} fo;
|
||||
struct {
|
||||
struct ir3_block *if_block, *else_block;
|
||||
} flow;
|
||||
struct {
|
||||
struct ir3_block *block;
|
||||
} inout;
|
||||
};
|
||||
|
||||
/* transient values used during various algorithms: */
|
||||
union {
|
||||
/* The instruction depth is the max dependency distance to output.
|
||||
*
|
||||
* You can also think of it as the "cost", if we did any sort of
|
||||
* optimization for register footprint. Ie. a value that is just
|
||||
* result of moving a const to a reg would have a low cost, so to
|
||||
* it could make sense to duplicate the instruction at various
|
||||
* points where the result is needed to reduce register footprint.
|
||||
*/
|
||||
unsigned depth;
|
||||
};
|
||||
struct ir3_instruction *next;
|
||||
#ifdef DEBUG
|
||||
uint32_t serialno;
|
||||
#endif
|
||||
|
|
@ -201,6 +240,7 @@ struct ir3_shader * ir3_shader_create(void);
|
|||
void ir3_shader_destroy(struct ir3_shader *shader);
|
||||
void * ir3_shader_assemble(struct ir3_shader *shader,
|
||||
struct ir3_shader_info *info);
|
||||
void * ir3_alloc(struct ir3_shader *shader, int sz);
|
||||
|
||||
struct ir3_block * ir3_block_create(struct ir3_shader *shader,
|
||||
unsigned ntmp, unsigned nin, unsigned nout);
|
||||
|
|
@ -208,11 +248,44 @@ struct ir3_block * ir3_block_create(struct ir3_shader *shader,
|
|||
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
|
||||
int category, opc_t opc);
|
||||
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
|
||||
const char *ir3_instr_name(struct ir3_instruction *instr);
|
||||
|
||||
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
|
||||
int num, int flags);
|
||||
|
||||
|
||||
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->flags & IR3_INSTR_MARK)
|
||||
return true; /* already visited */
|
||||
instr->flags ^= IR3_INSTR_MARK;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void ir3_shader_clear_mark(struct ir3_shader *shader)
|
||||
{
|
||||
/* TODO would be nice to drop the instruction array.. for
|
||||
* new compiler, _clear_mark() is all we use it for, and
|
||||
* we could probably manage a linked list instead..
|
||||
*/
|
||||
unsigned i;
|
||||
for (i = 0; i < shader->instrs_count; i++) {
|
||||
struct ir3_instruction *instr = shader->instrs[i];
|
||||
instr->flags &= ~IR3_INSTR_MARK;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ir3_instr_regno(struct ir3_instruction *instr,
|
||||
struct ir3_register *reg)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < instr->regs_count; i++)
|
||||
if (reg == instr->regs[i])
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* comp:
|
||||
* 0 - x
|
||||
* 1 - y
|
||||
|
|
@ -254,6 +327,15 @@ static inline bool is_input(struct ir3_instruction *instr)
|
|||
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
|
||||
}
|
||||
|
||||
static inline bool is_meta(struct ir3_instruction *instr)
|
||||
{
|
||||
/* TODO how should we count PHI (and maybe fan-in/out) which
|
||||
* might actually contribute some instructions to the final
|
||||
* result?
|
||||
*/
|
||||
return (instr->category == -1);
|
||||
}
|
||||
|
||||
static inline bool is_gpr(struct ir3_register *reg)
|
||||
{
|
||||
return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED));
|
||||
|
|
@ -262,13 +344,39 @@ static inline bool is_gpr(struct ir3_register *reg)
|
|||
/* TODO combine is_gpr()/reg_gpr().. */
|
||||
static inline bool reg_gpr(struct ir3_register *r)
|
||||
{
|
||||
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV))
|
||||
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_SSA))
|
||||
return false;
|
||||
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* dump: */
|
||||
#include <stdio.h>
|
||||
void ir3_shader_dump(struct ir3_shader *shader, const char *name,
|
||||
struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3_shader? */,
|
||||
FILE *f);
|
||||
void ir3_dump_instr_single(struct ir3_instruction *instr);
|
||||
void ir3_dump_instr_list(struct ir3_instruction *instr);
|
||||
|
||||
/* flatten if/else: */
|
||||
int ir3_block_flatten(struct ir3_block *block);
|
||||
|
||||
/* depth calculation: */
|
||||
int ir3_delayslots(struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned n);
|
||||
void ir3_block_depth(struct ir3_block *block);
|
||||
|
||||
/* copy-propagate: */
|
||||
void ir3_block_cp(struct ir3_block *block);
|
||||
|
||||
/* scheduling: */
|
||||
void ir3_block_sched(struct ir3_block *block);
|
||||
|
||||
/* register assignment: */
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type);
|
||||
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
#endif
|
||||
|
|
|
|||
155
src/gallium/drivers/freedreno/a3xx/ir3_cp.c
Normal file
155
src/gallium/drivers/freedreno/a3xx/ir3_cp.c
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
/*
|
||||
* Copy Propagate:
|
||||
*
|
||||
* TODO probably want some sort of visitor sort of interface to
|
||||
* avoid duplicating the same graph traversal logic everywhere..
|
||||
*
|
||||
*/
|
||||
|
||||
static void block_cp(struct ir3_block *block);
|
||||
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep);
|
||||
|
||||
static bool is_eligible_mov(struct ir3_instruction *instr)
|
||||
{
|
||||
if ((instr->category == 1) &&
|
||||
(instr->cat1.src_type == instr->cat1.dst_type)) {
|
||||
struct ir3_register *src = instr->regs[1];
|
||||
if ((src->flags & IR3_REG_SSA) &&
|
||||
/* TODO: propagate abs/neg modifiers if possible */
|
||||
!(src->flags & (IR3_REG_ABS | IR3_REG_NEGATE)))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void walk_children(struct ir3_instruction *instr, bool keep)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* walk down the graph from each src: */
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *src = instr->regs[i];
|
||||
if (src->flags & IR3_REG_SSA)
|
||||
src->instr = instr_cp(src->instr, keep);
|
||||
}
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
instr_cp_fanin(struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* we need to handle fanin specially, to detect cases
|
||||
* when we need to keep a mov
|
||||
*/
|
||||
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *src = instr->regs[i];
|
||||
if (src->flags & IR3_REG_SSA) {
|
||||
struct ir3_instruction *cand =
|
||||
instr_cp(src->instr, false);
|
||||
|
||||
/* if the candidate is a fanout, then keep
|
||||
* the move.
|
||||
*
|
||||
* This is a bit, um, fragile, but it should
|
||||
* catch the extra mov's that the front-end
|
||||
* puts in for us already in these cases.
|
||||
*/
|
||||
if (is_meta(cand) && (cand->opc == OPC_META_FO))
|
||||
cand = instr_cp(src->instr, true);
|
||||
|
||||
src->instr = cand;
|
||||
}
|
||||
}
|
||||
|
||||
walk_children(instr, false);
|
||||
|
||||
return instr;
|
||||
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
instr_cp(struct ir3_instruction *instr, bool keep)
|
||||
{
|
||||
/* if we've already visited this instruction, bail now: */
|
||||
if (ir3_instr_check_mark(instr))
|
||||
return instr;
|
||||
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_FI))
|
||||
return instr_cp_fanin(instr);
|
||||
|
||||
if (is_eligible_mov(instr) && !keep) {
|
||||
struct ir3_register *src = instr->regs[1];
|
||||
return instr_cp(src->instr, false);
|
||||
}
|
||||
|
||||
walk_children(instr, false);
|
||||
|
||||
return instr;
|
||||
}
|
||||
|
||||
static void block_cp(struct ir3_block *block)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
if (block->outputs[i]) {
|
||||
struct ir3_instruction *out =
|
||||
instr_cp(block->outputs[i], false);
|
||||
|
||||
/* To deal with things like this:
|
||||
*
|
||||
* 43: MOV OUT[2], TEMP[5]
|
||||
* 44: MOV OUT[0], TEMP[5]
|
||||
*
|
||||
* we need to ensure that no two outputs point to
|
||||
* the same instruction
|
||||
*/
|
||||
for (j = 0; j < i; j++) {
|
||||
if (block->outputs[j] == out) {
|
||||
out = instr_cp(block->outputs[i], true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
block->outputs[i] = out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ir3_block_cp(struct ir3_block *block)
|
||||
{
|
||||
ir3_shader_clear_mark(block->shader);
|
||||
block_cp(block);
|
||||
}
|
||||
156
src/gallium/drivers/freedreno/a3xx/ir3_depth.c
Normal file
156
src/gallium/drivers/freedreno/a3xx/ir3_depth.c
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
/*
|
||||
* Instruction Depth:
|
||||
*
|
||||
* Calculates weighted instruction depth, ie. the sum of # of needed
|
||||
* instructions plus delay slots back to original input (ie INPUT or
|
||||
* CONST). That is to say, an instructions depth is:
|
||||
*
|
||||
* depth(instr) {
|
||||
* d = 0;
|
||||
* // for each src register:
|
||||
* foreach (src in instr->regs[1..n])
|
||||
* d = max(d, delayslots(src->instr, n) + depth(src->instr));
|
||||
* return d + 1;
|
||||
* }
|
||||
*
|
||||
* After an instruction's depth is calculated, it is inserted into the
|
||||
* blocks depth sorted list, which is used by the scheduling pass.
|
||||
*/
|
||||
|
||||
/* calculate required # of delay slots between the instruction that
|
||||
* assigns a value and the one that consumes
|
||||
*/
|
||||
int ir3_delayslots(struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned n)
|
||||
{
|
||||
/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
|
||||
* alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
|
||||
* handled with sync bits
|
||||
*/
|
||||
|
||||
if (is_meta(assigner))
|
||||
return 0;
|
||||
|
||||
/* handled via sync flags: */
|
||||
if (is_sfu(assigner) || is_tex(assigner))
|
||||
return 0;
|
||||
|
||||
/* assigner must be alu: */
|
||||
if (is_sfu(consumer) || is_tex(consumer)) {
|
||||
return 8;
|
||||
} else if ((consumer->category == 3) &&
|
||||
is_mad(consumer->opc) && (n == 2)) {
|
||||
/* special case, 3rd src to cat3 not required on first cycle */
|
||||
return 2;
|
||||
} else {
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
||||
static void insert_by_depth(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_block *block = instr->block;
|
||||
struct ir3_instruction *n = block->head;
|
||||
struct ir3_instruction *p = NULL;
|
||||
|
||||
while (n && (n != instr) && (n->depth > instr->depth)) {
|
||||
p = n;
|
||||
n = n->next;
|
||||
}
|
||||
|
||||
instr->next = n;
|
||||
if (p)
|
||||
p->next = instr;
|
||||
else
|
||||
block->head = instr;
|
||||
}
|
||||
|
||||
static void ir3_instr_depth(struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* if we've already visited this instruction, bail now: */
|
||||
if (ir3_instr_check_mark(instr))
|
||||
return;
|
||||
|
||||
instr->depth = 0;
|
||||
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *src = instr->regs[i];
|
||||
if (src->flags & IR3_REG_SSA) {
|
||||
unsigned sd;
|
||||
|
||||
/* visit child to compute it's depth: */
|
||||
ir3_instr_depth(src->instr);
|
||||
|
||||
sd = ir3_delayslots(src->instr, instr, i-1) +
|
||||
src->instr->depth;
|
||||
|
||||
instr->depth = MAX2(instr->depth, sd);
|
||||
}
|
||||
}
|
||||
|
||||
/* meta-instructions don't add cycles, other than PHI.. which
|
||||
* might translate to a real instruction..
|
||||
*
|
||||
* well, not entirely true, fan-in/out, etc might need to need
|
||||
* to generate some extra mov's in edge cases, etc.. probably
|
||||
* we might want to do depth calculation considering the worst
|
||||
* case for these??
|
||||
*/
|
||||
if (!is_meta(instr))
|
||||
instr->depth++;
|
||||
|
||||
insert_by_depth(instr);
|
||||
}
|
||||
|
||||
void ir3_block_depth(struct ir3_block *block)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
block->head = NULL;
|
||||
|
||||
ir3_shader_clear_mark(block->shader);
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
if (block->outputs[i])
|
||||
ir3_instr_depth(block->outputs[i]);
|
||||
|
||||
/* at this point, any unvisited input is unused: */
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
struct ir3_instruction *in = block->inputs[i];
|
||||
if (in && !ir3_instr_check_mark(in))
|
||||
block->inputs[i] = NULL;
|
||||
}
|
||||
}
|
||||
416
src/gallium/drivers/freedreno/a3xx/ir3_dump.c
Normal file
416
src/gallium/drivers/freedreno/a3xx/ir3_dump.c
Normal file
|
|
@ -0,0 +1,416 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
#define PTRID(x) ((unsigned long)(x))
|
||||
|
||||
struct ir3_dump_ctx {
|
||||
FILE *f;
|
||||
bool verbose;
|
||||
};
|
||||
|
||||
static void dump_instr_name(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
/* for debugging: */
|
||||
if (ctx->verbose) {
|
||||
#ifdef DEBUG
|
||||
fprintf(ctx->f, "%04u:", instr->serialno);
|
||||
#endif
|
||||
fprintf(ctx->f, "%03u: ", instr->depth);
|
||||
}
|
||||
|
||||
if (is_meta(instr)) {
|
||||
switch(instr->opc) {
|
||||
case OPC_META_PHI:
|
||||
fprintf(ctx->f, "Φ");
|
||||
break;
|
||||
default:
|
||||
/* shouldn't hit here.. just for debugging: */
|
||||
switch (instr->opc) {
|
||||
case OPC_META_INPUT: fprintf(ctx->f, "_meta:in"); break;
|
||||
case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out"); break;
|
||||
case OPC_META_FO: fprintf(ctx->f, "_meta:fo"); break;
|
||||
case OPC_META_FI: fprintf(ctx->f, "_meta:fi"); break;
|
||||
case OPC_META_FLOW: fprintf(ctx->f, "_meta:flow"); break;
|
||||
case OPC_META_PHI: fprintf(ctx->f, "_meta:phi"); break;
|
||||
|
||||
default: fprintf(ctx->f, "_meta:%d", instr->opc); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else if (instr->category == 1) {
|
||||
static const char *type[] = {
|
||||
[TYPE_F16] = "f16",
|
||||
[TYPE_F32] = "f32",
|
||||
[TYPE_U16] = "u16",
|
||||
[TYPE_U32] = "u32",
|
||||
[TYPE_S16] = "s16",
|
||||
[TYPE_S32] = "s32",
|
||||
[TYPE_U8] = "u8",
|
||||
[TYPE_S8] = "s8",
|
||||
};
|
||||
if (instr->cat1.src_type == instr->cat1.dst_type)
|
||||
fprintf(ctx->f, "mov");
|
||||
else
|
||||
fprintf(ctx->f, "cov");
|
||||
fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
|
||||
} else {
|
||||
fprintf(ctx->f, "%s", ir3_instr_name(instr));
|
||||
if (instr->flags & IR3_INSTR_3D)
|
||||
fprintf(ctx->f, ".3d");
|
||||
if (instr->flags & IR3_INSTR_A)
|
||||
fprintf(ctx->f, ".a");
|
||||
if (instr->flags & IR3_INSTR_O)
|
||||
fprintf(ctx->f, ".o");
|
||||
if (instr->flags & IR3_INSTR_P)
|
||||
fprintf(ctx->f, ".p");
|
||||
if (instr->flags & IR3_INSTR_S)
|
||||
fprintf(ctx->f, ".s");
|
||||
if (instr->flags & IR3_INSTR_S2EN)
|
||||
fprintf(ctx->f, ".s2en");
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_reg_name(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_register *reg)
|
||||
{
|
||||
if ((reg->flags & IR3_REG_ABS) && (reg->flags & IR3_REG_NEGATE))
|
||||
fprintf(ctx->f, "(absneg)");
|
||||
else if (reg->flags & IR3_REG_NEGATE)
|
||||
fprintf(ctx->f, "(neg)");
|
||||
else if (reg->flags & IR3_REG_ABS)
|
||||
fprintf(ctx->f, "(abs)");
|
||||
|
||||
if (reg->flags & IR3_REG_IMMED) {
|
||||
fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
|
||||
} else if (reg->flags & IR3_REG_SSA) {
|
||||
if (ctx->verbose) {
|
||||
fprintf(ctx->f, "_[");
|
||||
dump_instr_name(ctx, reg->instr);
|
||||
fprintf(ctx->f, "]");
|
||||
}
|
||||
} else {
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
fprintf(ctx->f, "h");
|
||||
if (reg->flags & IR3_REG_CONST)
|
||||
fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
|
||||
else
|
||||
fprintf(ctx->f, "r%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_instruction *instr);
|
||||
static void ir3_block_dump(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_block *block, const char *name);
|
||||
|
||||
static void dump_instr(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
/* if we've already visited this instruction, bail now: */
|
||||
if (ir3_instr_check_mark(instr))
|
||||
return;
|
||||
|
||||
/* some meta-instructions need to be handled specially: */
|
||||
if (is_meta(instr)) {
|
||||
if ((instr->opc == OPC_META_FO) ||
|
||||
(instr->opc == OPC_META_FI)) {
|
||||
unsigned i;
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA)
|
||||
dump_instr(ctx, reg->instr);
|
||||
}
|
||||
} else if (instr->opc == OPC_META_FLOW) {
|
||||
struct ir3_register *reg = instr->regs[1];
|
||||
ir3_block_dump(ctx, instr->flow.if_block, "if");
|
||||
if (instr->flow.else_block)
|
||||
ir3_block_dump(ctx, instr->flow.else_block, "else");
|
||||
if (reg->flags & IR3_REG_SSA)
|
||||
dump_instr(ctx, reg->instr);
|
||||
} else if (instr->opc == OPC_META_PHI) {
|
||||
/* treat like a normal instruction: */
|
||||
ir3_instr_dump(ctx, instr);
|
||||
}
|
||||
} else {
|
||||
ir3_instr_dump(ctx, instr);
|
||||
}
|
||||
}
|
||||
|
||||
/* arrarraggh! if link is to something outside of the current block, we
|
||||
* need to defer emitting the link until the end of the block, since the
|
||||
* edge triggers pre-creation of the node it links to inside the cluster,
|
||||
* even though it is meant to be outside..
|
||||
*/
|
||||
static struct {
|
||||
char buf[40960];
|
||||
unsigned n;
|
||||
} edge_buf;
|
||||
|
||||
/* helper to print or defer: */
|
||||
static void printdef(struct ir3_dump_ctx *ctx,
|
||||
bool defer, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
if (defer) {
|
||||
unsigned n = edge_buf.n;
|
||||
n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n,
|
||||
fmt, ap);
|
||||
edge_buf.n = n;
|
||||
} else {
|
||||
vfprintf(ctx->f, fmt, ap);
|
||||
}
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static void dump_link2(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_instruction *instr, const char *target, bool defer)
|
||||
{
|
||||
/* some meta-instructions need to be handled specially: */
|
||||
if (is_meta(instr)) {
|
||||
if (instr->opc == OPC_META_INPUT) {
|
||||
printdef(ctx, defer, "input%lx:<in%u>:w -> %s",
|
||||
PTRID(instr->inout.block),
|
||||
instr->regs[0]->num, target);
|
||||
} else if (instr->opc == OPC_META_FO) {
|
||||
struct ir3_register *reg = instr->regs[1];
|
||||
dump_link2(ctx, reg->instr, target, defer);
|
||||
printdef(ctx, defer, "[label=\".%c\"]",
|
||||
"xyzw"[instr->fo.off & 0x3]);
|
||||
} else if (instr->opc == OPC_META_FI) {
|
||||
unsigned i;
|
||||
|
||||
/* recursively dump all parents and links */
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
dump_link2(ctx, reg->instr, target, defer);
|
||||
printdef(ctx, defer, "[label=\".%c\"]",
|
||||
"xyzw"[(i - 1) & 0x3]);
|
||||
}
|
||||
}
|
||||
} else if (instr->opc == OPC_META_OUTPUT) {
|
||||
printdef(ctx, defer, "output%lx:<out%u>:w -> %s",
|
||||
PTRID(instr->inout.block),
|
||||
instr->regs[0]->num, target);
|
||||
} else if (instr->opc == OPC_META_PHI) {
|
||||
/* treat like a normal instruction: */
|
||||
printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
|
||||
}
|
||||
} else {
|
||||
printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_link(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_instruction *instr,
|
||||
struct ir3_block *block, const char *target)
|
||||
{
|
||||
bool defer = instr->block != block;
|
||||
dump_link2(ctx, instr, target, defer);
|
||||
printdef(ctx, defer, "\n");
|
||||
}
|
||||
|
||||
static struct ir3_register *follow_flow(struct ir3_register *reg)
|
||||
{
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
struct ir3_instruction *instr = reg->instr;
|
||||
/* go with the flow.. */
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_FLOW))
|
||||
return instr->regs[1];
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{",
|
||||
PTRID(instr));
|
||||
dump_instr_name(ctx, instr);
|
||||
|
||||
/* destination register: */
|
||||
fprintf(ctx->f, "|<dst0>");
|
||||
|
||||
/* source register(s): */
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = follow_flow(instr->regs[i]);
|
||||
|
||||
fprintf(ctx->f, "|");
|
||||
|
||||
if (reg->flags & IR3_REG_SSA)
|
||||
fprintf(ctx->f, "<src%u> ", (i - 1));
|
||||
|
||||
dump_reg_name(ctx, reg);
|
||||
}
|
||||
|
||||
fprintf(ctx->f, "}\"];\n");
|
||||
|
||||
/* and recursively dump dependent instructions: */
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
char target[32]; /* link target */
|
||||
|
||||
if (!(reg->flags & IR3_REG_SSA))
|
||||
continue;
|
||||
|
||||
snprintf(target, sizeof(target), "instr%lx:<src%u>",
|
||||
PTRID(instr), (i - 1));
|
||||
|
||||
dump_instr(ctx, reg->instr);
|
||||
dump_link(ctx, follow_flow(reg)->instr, instr->block, target);
|
||||
}
|
||||
}
|
||||
|
||||
static void ir3_block_dump(struct ir3_dump_ctx *ctx,
|
||||
struct ir3_block *block, const char *name)
|
||||
{
|
||||
unsigned i, n;
|
||||
|
||||
n = edge_buf.n;
|
||||
|
||||
fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block));
|
||||
fprintf(ctx->f, "label=\"%s\";\n", name);
|
||||
|
||||
/* draw inputs: */
|
||||
fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block));
|
||||
for (i = 0; i < block->ninputs; i++)
|
||||
if (block->inputs[i])
|
||||
fprintf(ctx->f, "|<in%u> i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
|
||||
fprintf(ctx->f, "\"];\n");
|
||||
|
||||
/* draw instruction graph: */
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
dump_instr(ctx, block->outputs[i]);
|
||||
|
||||
/* draw outputs: */
|
||||
fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block));
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
fprintf(ctx->f, "|<out%u> o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
|
||||
fprintf(ctx->f, "\"];\n");
|
||||
|
||||
/* and links to outputs: */
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
char target[32]; /* link target */
|
||||
|
||||
/* NOTE: there could be outputs that are never assigned,
|
||||
* so skip them
|
||||
*/
|
||||
if (!block->outputs[i])
|
||||
continue;
|
||||
|
||||
snprintf(target, sizeof(target), "output%lx:<out%u>:e",
|
||||
PTRID(block), i);
|
||||
|
||||
dump_link(ctx, block->outputs[i], block, target);
|
||||
}
|
||||
|
||||
fprintf(ctx->f, "}\n");
|
||||
|
||||
/* and links to inputs: */
|
||||
if (block->parent) {
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
char target[32]; /* link target */
|
||||
|
||||
if (!block->inputs[i])
|
||||
continue;
|
||||
|
||||
dump_instr(ctx, block->inputs[i]);
|
||||
|
||||
snprintf(target, sizeof(target), "input%lx:<in%u>:e",
|
||||
PTRID(block), i);
|
||||
|
||||
dump_link(ctx, block->inputs[i], block, target);
|
||||
}
|
||||
}
|
||||
|
||||
/* dump deferred edges: */
|
||||
if (edge_buf.n > n) {
|
||||
fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]);
|
||||
edge_buf.n = n;
|
||||
}
|
||||
}
|
||||
|
||||
void ir3_shader_dump(struct ir3_shader *shader, const char *name,
|
||||
struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3_shader? */,
|
||||
FILE *f)
|
||||
{
|
||||
struct ir3_dump_ctx ctx = {
|
||||
.f = f,
|
||||
};
|
||||
ir3_shader_clear_mark(shader);
|
||||
fprintf(ctx.f, "digraph G {\n");
|
||||
fprintf(ctx.f, "rankdir=RL;\n");
|
||||
fprintf(ctx.f, "nodesep=0.25;\n");
|
||||
fprintf(ctx.f, "ranksep=1.5;\n");
|
||||
ir3_block_dump(&ctx, block, name);
|
||||
fprintf(ctx.f, "}\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* For Debugging:
|
||||
*/
|
||||
|
||||
void
|
||||
ir3_dump_instr_single(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_dump_ctx ctx = {
|
||||
.f = stdout,
|
||||
.verbose = true,
|
||||
};
|
||||
unsigned i;
|
||||
|
||||
dump_instr_name(&ctx, instr);
|
||||
for (i = 0; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
printf(i ? ", " : " ");
|
||||
dump_reg_name(&ctx, reg);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void
|
||||
ir3_dump_instr_list(struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned n = 0;
|
||||
|
||||
while (instr) {
|
||||
ir3_dump_instr_single(instr);
|
||||
if (!is_meta(instr))
|
||||
n++;
|
||||
instr = instr->next;
|
||||
}
|
||||
printf("%u instructions\n", n);
|
||||
}
|
||||
140
src/gallium/drivers/freedreno/a3xx/ir3_flatten.c
Normal file
140
src/gallium/drivers/freedreno/a3xx/ir3_flatten.c
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
/*
|
||||
* Flatten: flatten out legs of if/else, etc
|
||||
*
|
||||
* TODO probably should use some heuristic to decide to not flatten
|
||||
* if one side of the other is too large / deeply nested / whatever?
|
||||
*/
|
||||
|
||||
struct ir3_flatten_ctx {
|
||||
struct ir3_block *block;
|
||||
unsigned cnt;
|
||||
};
|
||||
|
||||
static struct ir3_register *unwrap(struct ir3_register *reg)
|
||||
{
|
||||
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
struct ir3_instruction *instr = reg->instr;
|
||||
if (is_meta(instr)) {
|
||||
switch (instr->opc) {
|
||||
case OPC_META_OUTPUT:
|
||||
case OPC_META_FLOW:
|
||||
return instr->regs[1];
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* if we've already visited this instruction, bail now: */
|
||||
if (ir3_instr_check_mark(instr))
|
||||
return;
|
||||
|
||||
instr->block = ctx->block;
|
||||
|
||||
/* TODO: maybe some threshold to decide whether to
|
||||
* flatten or not??
|
||||
*/
|
||||
if (is_meta(instr)) {
|
||||
if (instr->opc == OPC_META_PHI) {
|
||||
struct ir3_register *cond, *t, *f;
|
||||
|
||||
/* convert the PHI instruction to sel.{f16,f32} */
|
||||
instr->category = 3;
|
||||
|
||||
/* instruction type based on dst size: */
|
||||
if (instr->regs[0]->flags & IR3_REG_HALF)
|
||||
instr->opc = OPC_SEL_F16;
|
||||
else
|
||||
instr->opc = OPC_SEL_F32;
|
||||
|
||||
/* swap around src register order, to match what
|
||||
* hw expects:
|
||||
*/
|
||||
cond = instr->regs[1];
|
||||
t = instr->regs[2]; /* true val */
|
||||
f = instr->regs[3]; /* false val */
|
||||
|
||||
instr->regs[1] = unwrap(f);
|
||||
instr->regs[2] = unwrap(cond);
|
||||
instr->regs[3] = unwrap(t);
|
||||
|
||||
ctx->cnt++;
|
||||
} else if ((instr->opc == OPC_META_INPUT) &&
|
||||
(instr->regs_count == 2)) {
|
||||
type_t ftype;
|
||||
|
||||
if (instr->regs[0]->flags & IR3_REG_HALF)
|
||||
ftype = TYPE_F16;
|
||||
else
|
||||
ftype = TYPE_F32;
|
||||
|
||||
/* convert meta:input to mov: */
|
||||
instr->category = 1;
|
||||
instr->cat1.src_type = ftype;
|
||||
instr->cat1.dst_type = ftype;
|
||||
}
|
||||
}
|
||||
|
||||
/* recursively visit children: */
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *src = instr->regs[i];
|
||||
if (src->flags & IR3_REG_SSA)
|
||||
ir3_instr_flatten(ctx, src->instr);
|
||||
}
|
||||
}
|
||||
|
||||
/* return >= 0 is # of phi's flattened, < 0 is error */
|
||||
int ir3_block_flatten(struct ir3_block *block)
|
||||
{
|
||||
struct ir3_flatten_ctx ctx = {
|
||||
.block = block,
|
||||
};
|
||||
unsigned i;
|
||||
|
||||
ir3_shader_clear_mark(block->shader);
|
||||
for(i = 0; i < block->noutputs; i++)
|
||||
if (block->outputs[i])
|
||||
ir3_instr_flatten(&ctx, block->outputs[i]);
|
||||
|
||||
return ctx.cnt;
|
||||
}
|
||||
580
src/gallium/drivers/freedreno/a3xx/ir3_ra.c
Normal file
580
src/gallium/drivers/freedreno/a3xx/ir3_ra.c
Normal file
|
|
@ -0,0 +1,580 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "ir3.h"
|
||||
#include "ir3_visitor.h"
|
||||
|
||||
/*
|
||||
* Register Assignment:
|
||||
*
|
||||
* NOTE: currently only works on a single basic block.. need to think
|
||||
* about how multiple basic blocks are going to get scheduled. But
|
||||
* I think I want to re-arrange how blocks work, ie. get rid of the
|
||||
* block nesting thing..
|
||||
*
|
||||
* NOTE: we could do register coalescing (eliminate moves) as part of
|
||||
* the RA step.. OTOH I think we need to do scheduling before register
|
||||
* assignment. And if we remove a mov that effects scheduling (unless
|
||||
* we leave a placeholder nop, which seems lame), so I'm not really
|
||||
* sure how practical this is to do both in a single stage. But OTOH
|
||||
* I'm not really sure a sane way for the CP stage to realize when it
|
||||
* cannot remove a mov due to multi-register constraints..
|
||||
*
|
||||
*/
|
||||
|
||||
struct ir3_ra_ctx {
|
||||
struct ir3_block *block;
|
||||
enum shader_t type;
|
||||
int cnt;
|
||||
bool error;
|
||||
};
|
||||
|
||||
struct ir3_ra_assignment {
|
||||
int8_t off; /* offset of instruction dst within range */
|
||||
uint8_t num; /* number of components for the range */
|
||||
};
|
||||
|
||||
static void ra_assign(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *assigner, int num);
|
||||
static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
|
||||
|
||||
/*
|
||||
* Register Allocation:
|
||||
*/
|
||||
|
||||
#define REG(n, wm) (struct ir3_register){ \
|
||||
/*.flags = ((so)->half_precision) ? IR3_REG_HALF : 0,*/ \
|
||||
.num = (n), \
|
||||
.wrmask = TGSI_WRITEMASK_ ## wm, \
|
||||
}
|
||||
|
||||
/* check that the register exists, is a GPR and is not special (a0/p0) */
|
||||
static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
|
||||
{
|
||||
if ((n < instr->regs_count) && reg_gpr(instr->regs[n]))
|
||||
return instr->regs[n];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int output_base(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
/* ugg, for fragment shader we need to have input at r0.x
|
||||
* (or at least if there is a way to configure it, I can't
|
||||
* see how because the blob driver always uses r0.x (ie.
|
||||
* all zeros)
|
||||
*/
|
||||
if (ctx->type == SHADER_FRAGMENT)
|
||||
return 2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* live means read before written */
|
||||
static void compute_liveregs(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *instr, regmask_t *liveregs)
|
||||
{
|
||||
struct ir3_block *block = instr->block;
|
||||
regmask_t written;
|
||||
unsigned i, j;
|
||||
|
||||
regmask_init(liveregs);
|
||||
regmask_init(&written);
|
||||
|
||||
for (instr = instr->next; instr; instr = instr->next) {
|
||||
struct ir3_register *r;
|
||||
|
||||
if (is_meta(instr))
|
||||
continue;
|
||||
|
||||
/* check first src's read: */
|
||||
for (j = 1; j < instr->regs_count; j++) {
|
||||
r = reg_check(instr, j);
|
||||
if (r)
|
||||
regmask_set_if_not(liveregs, r, &written);
|
||||
}
|
||||
|
||||
/* then dst written (if assigned already): */
|
||||
if (instr->flags & IR3_INSTR_MARK) {
|
||||
r = reg_check(instr, 0);
|
||||
if (r)
|
||||
regmask_set(&written, r);
|
||||
}
|
||||
}
|
||||
|
||||
/* be sure to account for output registers too: */
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
struct ir3_register reg = REG(output_base(ctx) + i, X);
|
||||
regmask_set_if_not(liveregs, ®, &written);
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate registers that are clobbered before last use of 'assigner'.
|
||||
* This needs to be done backwards, although it could possibly be
|
||||
* combined into compute_liveregs(). (Ie. compute_liveregs() could
|
||||
* reverse the list, then do this part backwards reversing the list
|
||||
* again back to original order.) Otoh, probably I should try to
|
||||
* construct a proper interference graph instead.
|
||||
*
|
||||
* XXX this need to follow the same recursion path that is used for
|
||||
* to rename/assign registers (ie. ra_assign_src()).. this is a bit
|
||||
* ugly right now, maybe refactor into node iterator sort of things
|
||||
* that iterates nodes in the correct order?
|
||||
*/
|
||||
static bool compute_clobbers(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *instr, struct ir3_instruction *assigner,
|
||||
regmask_t *liveregs)
|
||||
{
|
||||
unsigned i;
|
||||
bool live = false, was_live = false;
|
||||
|
||||
if (instr == NULL) {
|
||||
struct ir3_block *block = ctx->block;
|
||||
|
||||
/* if at the end, check outputs: */
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
if (block->outputs[i] == assigner)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
|
||||
if (is_meta(instr)) {
|
||||
switch (instr->opc) {
|
||||
case OPC_META_INPUT:
|
||||
// TODO
|
||||
assert(0);
|
||||
break;
|
||||
case OPC_META_FO:
|
||||
case OPC_META_FI:
|
||||
was_live |= compute_clobbers(ctx, instr->next,
|
||||
instr, liveregs);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
live = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
|
||||
|
||||
if (was_live && (instr->regs_count > 0) &&
|
||||
(instr->flags & IR3_INSTR_MARK) &&
|
||||
!is_meta(instr))
|
||||
regmask_set(liveregs, instr->regs[0]);
|
||||
|
||||
return live || was_live;
|
||||
}
|
||||
|
||||
static int find_available(regmask_t *liveregs, int size)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MAX_REG - size; i++) {
|
||||
if (!regmask_get(liveregs, ®(i, X))) {
|
||||
unsigned start = i++;
|
||||
for (; (i < MAX_REG) && ((i - start) < size); i++)
|
||||
if (regmask_get(liveregs, ®(i, X)))
|
||||
break;
|
||||
if ((i - start) >= size)
|
||||
return start;
|
||||
}
|
||||
}
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int alloc_block(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *instr, int size)
|
||||
{
|
||||
if (!instr) {
|
||||
/* special case, allocating shader outputs. At this
|
||||
* point, nothing is allocated, just start the shader
|
||||
* outputs at r0.x and let compute_liveregs() take
|
||||
* care of the rest from here:
|
||||
*/
|
||||
return 0;
|
||||
} else {
|
||||
regmask_t liveregs;
|
||||
compute_liveregs(ctx, instr, &liveregs);
|
||||
|
||||
// XXX XXX XXX XXX XXX XXX XXX XXX XXX
|
||||
// XXX hack.. maybe ra_calc should give us a list of
|
||||
// instrs to compute_clobbers() on?
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
|
||||
(instr->regs_count == 1)) {
|
||||
unsigned i, base = instr->regs[0]->num & ~0x3;
|
||||
for (i = 0; i < 4; i++) {
|
||||
struct ir3_instruction *in = ctx->block->inputs[base + i];
|
||||
if (in)
|
||||
compute_clobbers(ctx, in->next, in, &liveregs);
|
||||
}
|
||||
} else
|
||||
// XXX XXX XXX XXX XXX XXX XXX XXX XXX
|
||||
compute_clobbers(ctx, instr->next, instr, &liveregs);
|
||||
return find_available(&liveregs, size);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Constraint Calculation:
|
||||
*/
|
||||
|
||||
struct ra_calc_visitor {
|
||||
struct ir3_visitor base;
|
||||
struct ir3_ra_assignment a;
|
||||
};
|
||||
|
||||
static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
|
||||
{
|
||||
return (struct ra_calc_visitor *)v;
|
||||
}
|
||||
|
||||
/* calculate register assignment for the instruction. If the register
|
||||
* written by this instruction is required to be part of a range, to
|
||||
* handle other (input/output/sam/bary.f/etc) contiguous register range
|
||||
* constraints, that is calculated handled here.
|
||||
*/
|
||||
static void ra_calc_dst(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_calc_visitor *c = ra_calc_visitor(v);
|
||||
if (is_tex(instr)) {
|
||||
c->a.off = 0;
|
||||
c->a.num = 4;
|
||||
} else {
|
||||
c->a.off = 0;
|
||||
c->a.num = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ra_calc_dst_shader_input(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_calc_visitor *c = ra_calc_visitor(v);
|
||||
struct ir3_block *block = instr->block;
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
unsigned base = dst->num & ~0x3;
|
||||
unsigned i, num = 0;
|
||||
|
||||
assert(!(dst->flags & IR3_REG_IA));
|
||||
|
||||
/* check what input components we need: */
|
||||
for (i = 0; i < 4; i++) {
|
||||
unsigned idx = base + i;
|
||||
if ((idx < block->ninputs) && block->inputs[idx])
|
||||
num = i + 1;
|
||||
}
|
||||
|
||||
c->a.off = dst->num - base;
|
||||
c->a.num = num;
|
||||
}
|
||||
|
||||
static void ra_calc_src_fanin(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_calc_visitor *c = ra_calc_visitor(v);
|
||||
unsigned srcn = ir3_instr_regno(instr, reg) - 1;
|
||||
c->a.off -= srcn;
|
||||
c->a.num += srcn;
|
||||
c->a.num = MAX2(c->a.num, instr->regs_count - 1);
|
||||
}
|
||||
|
||||
static const struct ir3_visitor_funcs calc_visitor_funcs = {
|
||||
.instr = ir3_visit_instr,
|
||||
.dst_shader_input = ra_calc_dst_shader_input,
|
||||
.dst_fanout = ra_calc_dst,
|
||||
.dst_fanin = ra_calc_dst,
|
||||
.dst = ra_calc_dst,
|
||||
.src_fanout = ir3_visit_reg,
|
||||
.src_fanin = ra_calc_src_fanin,
|
||||
.src = ir3_visit_reg,
|
||||
};
|
||||
|
||||
static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
|
||||
{
|
||||
struct ra_calc_visitor v = {
|
||||
.base.funcs = &calc_visitor_funcs,
|
||||
};
|
||||
|
||||
ir3_visit_instr(&v.base, assigner);
|
||||
|
||||
return v.a;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register Assignment:
|
||||
*/
|
||||
|
||||
struct ra_assign_visitor {
|
||||
struct ir3_visitor base;
|
||||
struct ir3_ra_ctx *ctx;
|
||||
int num;
|
||||
};
|
||||
|
||||
static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
|
||||
{
|
||||
return (struct ra_assign_visitor *)v;
|
||||
}
|
||||
|
||||
static void ra_assign_reg(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_assign_visitor *a = ra_assign_visitor(v);
|
||||
reg->flags &= ~IR3_REG_SSA;
|
||||
reg->num = a->num;
|
||||
}
|
||||
|
||||
static void ra_assign_dst_shader_input(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_assign_visitor *a = ra_assign_visitor(v);
|
||||
unsigned i, base = reg->num & ~0x3;
|
||||
int off = base - reg->num;
|
||||
|
||||
ra_assign_reg(v, instr, reg);
|
||||
reg->flags |= IR3_REG_IA;
|
||||
|
||||
/* trigger assignment of all our companion input components: */
|
||||
for (i = 0; i < 4; i++) {
|
||||
struct ir3_instruction *in = instr->block->inputs[i+base];
|
||||
if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
|
||||
ra_assign(a->ctx, in, a->num + off + i);
|
||||
}
|
||||
}
|
||||
|
||||
static void ra_assign_dst_fanout(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_assign_visitor *a = ra_assign_visitor(v);
|
||||
struct ir3_register *src = instr->regs[1];
|
||||
ra_assign_reg(v, instr, reg);
|
||||
if (src->flags & IR3_REG_SSA)
|
||||
ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
|
||||
}
|
||||
|
||||
static void ra_assign_src_fanout(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_assign_visitor *a = ra_assign_visitor(v);
|
||||
ra_assign_reg(v, instr, reg);
|
||||
ra_assign(a->ctx, instr, a->num + instr->fo.off);
|
||||
}
|
||||
|
||||
|
||||
static void ra_assign_src_fanin(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
struct ra_assign_visitor *a = ra_assign_visitor(v);
|
||||
unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
|
||||
ra_assign_reg(v, instr, reg);
|
||||
ra_assign(a->ctx, instr, a->num - srcn);
|
||||
for (j = 1; j < instr->regs_count; j++) {
|
||||
struct ir3_register *reg = instr->regs[j];
|
||||
if (reg->flags & IR3_REG_SSA) /* could be renamed already */
|
||||
ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct ir3_visitor_funcs assign_visitor_funcs = {
|
||||
.instr = ir3_visit_instr,
|
||||
.dst_shader_input = ra_assign_dst_shader_input,
|
||||
.dst_fanout = ra_assign_dst_fanout,
|
||||
.dst_fanin = ra_assign_reg,
|
||||
.dst = ra_assign_reg,
|
||||
.src_fanout = ra_assign_src_fanout,
|
||||
.src_fanin = ra_assign_src_fanin,
|
||||
.src = ra_assign_reg,
|
||||
};
|
||||
|
||||
static void ra_assign(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *assigner, int num)
|
||||
{
|
||||
struct ra_assign_visitor v = {
|
||||
.base.funcs = &assign_visitor_funcs,
|
||||
.ctx = ctx,
|
||||
.num = num,
|
||||
};
|
||||
|
||||
/* if we've already visited this instruction, bail now: */
|
||||
if (ir3_instr_check_mark(assigner)) {
|
||||
debug_assert(assigner->regs[0]->num == num);
|
||||
if (assigner->regs[0]->num != num) {
|
||||
/* impossible situation, should have been resolved
|
||||
* at an earlier stage by inserting extra mov's:
|
||||
*/
|
||||
ctx->error = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ir3_visit_instr(&v.base, assigner);
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_ra_assignment a;
|
||||
unsigned num;
|
||||
|
||||
/* skip over nop's */
|
||||
if (instr->regs_count == 0)
|
||||
return;
|
||||
|
||||
/* if we've already visited this instruction, bail now: */
|
||||
if (instr->flags & IR3_INSTR_MARK)
|
||||
return;
|
||||
|
||||
/* allocate register(s): */
|
||||
a = ra_calc(instr);
|
||||
num = alloc_block(ctx, instr, a.num) + a.off;
|
||||
|
||||
ra_assign(ctx, instr, num);
|
||||
}
|
||||
|
||||
/* flatten into shader: */
|
||||
// XXX this should probably be somewhere else:
|
||||
static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
struct ir3_instruction *n;
|
||||
struct ir3_shader *shader = block->shader;
|
||||
struct ir3_instruction *end =
|
||||
ir3_instr_create(block, 0, OPC_END);
|
||||
struct ir3_instruction *last_input = NULL;
|
||||
regmask_t needs_ss;
|
||||
regmask_t needs_sy;
|
||||
|
||||
regmask_init(&needs_ss);
|
||||
regmask_init(&needs_sy);
|
||||
|
||||
shader->instrs_count = 0;
|
||||
|
||||
for (n = block->head; n; n = n->next) {
|
||||
unsigned i;
|
||||
|
||||
if (is_meta(n))
|
||||
continue;
|
||||
|
||||
for (i = 1; i < n->regs_count; i++) {
|
||||
struct ir3_register *reg = n->regs[i];
|
||||
|
||||
if (is_gpr(reg)) {
|
||||
|
||||
/* TODO: we probably only need (ss) for alu
|
||||
* instr consuming sfu result.. need to make
|
||||
* some tests for both this and (sy)..
|
||||
*/
|
||||
if (regmask_get(&needs_ss, reg)) {
|
||||
n->flags |= IR3_INSTR_SS;
|
||||
regmask_init(&needs_ss);
|
||||
}
|
||||
|
||||
if (regmask_get(&needs_sy, reg)) {
|
||||
n->flags |= IR3_INSTR_SY;
|
||||
regmask_init(&needs_sy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
shader->instrs[shader->instrs_count++] = n;
|
||||
|
||||
if (is_sfu(n))
|
||||
regmask_set(&needs_ss, n->regs[0]);
|
||||
if (is_tex(n))
|
||||
regmask_set(&needs_sy, n->regs[0]);
|
||||
if (is_input(n))
|
||||
last_input = n;
|
||||
}
|
||||
|
||||
if (last_input)
|
||||
last_input->regs[0]->flags |= IR3_REG_EI;
|
||||
|
||||
shader->instrs[shader->instrs_count++] = end;
|
||||
|
||||
shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
|
||||
}
|
||||
|
||||
static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
struct ir3_instruction *n;
|
||||
|
||||
if (!block->parent) {
|
||||
unsigned i;
|
||||
int base, off = output_base(ctx);
|
||||
|
||||
base = alloc_block(ctx, NULL, block->noutputs + off);
|
||||
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
if (block->outputs[i])
|
||||
ra_assign(ctx, block->outputs[i], base + i + off);
|
||||
|
||||
if (ctx->type == SHADER_FRAGMENT) {
|
||||
for (i = 0; i < block->ninputs; i++)
|
||||
if (block->inputs[i])
|
||||
ra_assign(ctx, block->inputs[i], base + i);
|
||||
} else {
|
||||
for (i = 0; i < block->ninputs; i++)
|
||||
if (block->inputs[i])
|
||||
ir3_instr_ra(ctx, block->inputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* then loop over instruction list and assign registers:
|
||||
*/
|
||||
n = block->head;
|
||||
while (n) {
|
||||
ir3_instr_ra(ctx, n);
|
||||
if (ctx->error)
|
||||
return -1;
|
||||
n = n->next;
|
||||
}
|
||||
|
||||
legalize(ctx, block);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type)
|
||||
{
|
||||
struct ir3_ra_ctx ctx = {
|
||||
.block = block,
|
||||
.type = type,
|
||||
};
|
||||
ir3_shader_clear_mark(block->shader);
|
||||
return block_ra(&ctx, block);
|
||||
}
|
||||
289
src/gallium/drivers/freedreno/a3xx/ir3_sched.c
Normal file
289
src/gallium/drivers/freedreno/a3xx/ir3_sched.c
Normal file
|
|
@ -0,0 +1,289 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
/*
|
||||
* Instruction Scheduling:
|
||||
*
|
||||
* Using the depth sorted list from depth pass, attempt to recursively
|
||||
* schedule deepest unscheduled path. The first instruction that cannot
|
||||
* be scheduled, returns the required delay slots it needs, at which
|
||||
* point we return back up to the top and attempt to schedule by next
|
||||
* highest depth. After a sufficient number of instructions have been
|
||||
* scheduled, return back to beginning of list and start again. If you
|
||||
* reach the end of depth sorted list without being able to insert any
|
||||
* instruction, insert nop's. Repeat until no more unscheduled
|
||||
* instructions.
|
||||
*/
|
||||
|
||||
struct ir3_sched_ctx {
|
||||
struct ir3_instruction *scheduled;
|
||||
unsigned cnt;
|
||||
};
|
||||
|
||||
static struct ir3_instruction *
|
||||
deepest(struct ir3_instruction **srcs, unsigned nsrcs)
|
||||
{
|
||||
struct ir3_instruction *d = NULL;
|
||||
unsigned i = 0, id = 0;
|
||||
|
||||
while ((i < nsrcs) && !(d = srcs[id = i]))
|
||||
i++;
|
||||
|
||||
if (!d)
|
||||
return NULL;
|
||||
|
||||
for (; i < nsrcs; i++)
|
||||
if (srcs[i] && (srcs[i]->depth > d->depth))
|
||||
d = srcs[id = i];
|
||||
|
||||
srcs[id] = NULL;
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
static unsigned distance(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr, unsigned maxd)
|
||||
{
|
||||
struct ir3_instruction *n = ctx->scheduled;
|
||||
unsigned d = 0;
|
||||
while (n && (n != instr) && (d < maxd)) {
|
||||
if (!is_meta(n))
|
||||
d++;
|
||||
n = n->next;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/* TODO maybe we want double linked list? */
|
||||
static struct ir3_instruction * prev(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *p = instr->block->head;
|
||||
while (p && (p->next != instr))
|
||||
p = p->next;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void schedule(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr, bool remove)
|
||||
{
|
||||
struct ir3_block *block = instr->block;
|
||||
|
||||
/* maybe there is a better way to handle this than just stuffing
|
||||
* a nop.. ideally we'd know about this constraint in the
|
||||
* scheduling and depth calculation..
|
||||
*/
|
||||
if (ctx->scheduled && is_sfu(ctx->scheduled) && is_sfu(instr))
|
||||
schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
|
||||
|
||||
/* remove from depth list:
|
||||
*/
|
||||
if (remove) {
|
||||
struct ir3_instruction *p = prev(instr);
|
||||
|
||||
/* NOTE: this can happen for inputs which are not
|
||||
* read.. in that case there is no need to schedule
|
||||
* the input, so just bail:
|
||||
*/
|
||||
if (instr != (p ? p->next : block->head))
|
||||
return;
|
||||
|
||||
if (p)
|
||||
p->next = instr->next;
|
||||
else
|
||||
block->head = instr->next;
|
||||
}
|
||||
|
||||
instr->flags |= IR3_INSTR_MARK;
|
||||
|
||||
instr->next = ctx->scheduled;
|
||||
ctx->scheduled = instr;
|
||||
|
||||
ctx->cnt++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delay-slot calculation. Follows fanin/fanout.
|
||||
*/
|
||||
|
||||
static unsigned delay_calc2(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned srcn)
|
||||
{
|
||||
unsigned delay = 0;
|
||||
|
||||
if (is_meta(assigner)) {
|
||||
unsigned i;
|
||||
for (i = 1; i < assigner->regs_count; i++) {
|
||||
struct ir3_register *reg = assigner->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
unsigned d = delay_calc2(ctx, reg->instr,
|
||||
consumer, srcn);
|
||||
delay = MAX2(delay, d);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
delay = ir3_delayslots(assigner, consumer, srcn);
|
||||
delay -= distance(ctx, assigner, delay);
|
||||
}
|
||||
|
||||
return delay;
|
||||
}
|
||||
|
||||
static unsigned delay_calc(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned i, delay = 0;
|
||||
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
unsigned d = delay_calc2(ctx, reg->instr,
|
||||
instr, i - 1);
|
||||
delay = MAX2(delay, d);
|
||||
}
|
||||
}
|
||||
|
||||
return delay;
|
||||
}
|
||||
|
||||
/* A negative return value signals that an instruction has been newly
|
||||
* scheduled, return back up to the top of the stack (to block_sched())
|
||||
*/
|
||||
static int trysched(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *srcs[ARRAY_SIZE(instr->regs) - 1];
|
||||
struct ir3_instruction *src;
|
||||
unsigned i, delay, nsrcs = 0;
|
||||
|
||||
/* if already scheduled: */
|
||||
if (instr->flags & IR3_INSTR_MARK)
|
||||
return 0;
|
||||
|
||||
/* figure out our src's: */
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA)
|
||||
srcs[nsrcs++] = reg->instr;
|
||||
}
|
||||
|
||||
/* for each src register in sorted order:
|
||||
*/
|
||||
delay = 0;
|
||||
while ((src = deepest(srcs, nsrcs))) {
|
||||
delay = trysched(ctx, src);
|
||||
if (delay)
|
||||
return delay;
|
||||
}
|
||||
|
||||
/* all our dependents are scheduled, figure out if
|
||||
* we have enough delay slots to schedule ourself:
|
||||
*/
|
||||
delay = delay_calc(ctx, instr);
|
||||
|
||||
if (!delay) {
|
||||
schedule(ctx, instr, true);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return delay;
|
||||
}
|
||||
|
||||
static struct ir3_instruction * reverse(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *reversed = NULL;
|
||||
while (instr) {
|
||||
struct ir3_instruction *next = instr->next;
|
||||
instr->next = reversed;
|
||||
reversed = instr;
|
||||
instr = next;
|
||||
}
|
||||
return reversed;
|
||||
}
|
||||
|
||||
static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
|
||||
/* schedule all the shader input's (meta-instr) first so that
|
||||
* the RA step sees that the input registers contain a value
|
||||
* from the start of the shader:
|
||||
*/
|
||||
if (!block->parent) {
|
||||
unsigned i;
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
struct ir3_instruction *in = block->inputs[i];
|
||||
if (in)
|
||||
schedule(ctx, in, true);
|
||||
}
|
||||
}
|
||||
|
||||
while ((instr = block->head)) {
|
||||
/* NOTE: always grab next *before* trysched(), in case the
|
||||
* instruction is actually scheduled (and therefore moved
|
||||
* from depth list into scheduled list)
|
||||
*/
|
||||
struct ir3_instruction *next = instr->next;
|
||||
int cnt = trysched(ctx, instr);
|
||||
/* -1 is signal to return up stack, but to us means same as 0: */
|
||||
cnt = MAX2(0, cnt);
|
||||
cnt += ctx->cnt;
|
||||
instr = next;
|
||||
|
||||
/* if deepest remaining instruction cannot be scheduled, try
|
||||
* the increasingly more shallow instructions until needed
|
||||
* number of delay slots is filled:
|
||||
*/
|
||||
while (instr && (cnt > ctx->cnt)) {
|
||||
next = instr->next;
|
||||
trysched(ctx, instr);
|
||||
instr = next;
|
||||
}
|
||||
|
||||
/* and if we run out of instructions that can be scheduled,
|
||||
* then it is time for nop's:
|
||||
*/
|
||||
while (cnt > ctx->cnt)
|
||||
schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
|
||||
}
|
||||
|
||||
/* at this point, scheduled list is in reverse order, so fix that: */
|
||||
block->head = reverse(ctx->scheduled);
|
||||
}
|
||||
|
||||
void ir3_block_sched(struct ir3_block *block)
|
||||
{
|
||||
struct ir3_sched_ctx ctx = {0};
|
||||
ir3_shader_clear_mark(block->shader);
|
||||
block_sched(&ctx, block);
|
||||
}
|
||||
154
src/gallium/drivers/freedreno/a3xx/ir3_visitor.h
Normal file
154
src/gallium/drivers/freedreno/a3xx/ir3_visitor.h
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#ifndef IR3_VISITOR_H_
|
||||
#define IR3_VISITOR_H_
|
||||
|
||||
/**
|
||||
* Visitor which follows dst to src relationships between instructions,
|
||||
* first visiting the dst (writer) instruction, followed by src (reader)
|
||||
* instruction(s).
|
||||
*
|
||||
* TODO maybe we want multiple different visitors to walk the
|
||||
* graph in different ways?
|
||||
*/
|
||||
|
||||
struct ir3_visitor;
|
||||
|
||||
typedef void (*ir3_visit_instr_func)(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr);
|
||||
|
||||
typedef void (*ir3_visit_reg_func)(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg);
|
||||
|
||||
struct ir3_visitor_funcs {
|
||||
ir3_visit_instr_func instr; // TODO do we need??
|
||||
|
||||
ir3_visit_reg_func dst_shader_input;
|
||||
ir3_visit_reg_func dst_block_input;
|
||||
ir3_visit_reg_func dst_fanout;
|
||||
ir3_visit_reg_func dst_fanin;
|
||||
ir3_visit_reg_func dst;
|
||||
|
||||
ir3_visit_reg_func src_block_input;
|
||||
ir3_visit_reg_func src_fanout;
|
||||
ir3_visit_reg_func src_fanin;
|
||||
ir3_visit_reg_func src;
|
||||
};
|
||||
|
||||
struct ir3_visitor {
|
||||
const struct ir3_visitor_funcs *funcs;
|
||||
bool error;
|
||||
};
|
||||
|
||||
#include "util/u_debug.h"
|
||||
|
||||
static void visit_instr_dst(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_register *reg = instr->regs[0];
|
||||
|
||||
if (is_meta(instr)) {
|
||||
switch (instr->opc) {
|
||||
case OPC_META_INPUT:
|
||||
if (instr->regs_count == 1)
|
||||
v->funcs->dst_shader_input(v, instr, reg);
|
||||
else
|
||||
v->funcs->dst_block_input(v, instr, reg);
|
||||
return;
|
||||
case OPC_META_FO:
|
||||
v->funcs->dst_fanout(v, instr, reg);
|
||||
return;
|
||||
case OPC_META_FI:
|
||||
v->funcs->dst_fanin(v, instr, reg);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
v->funcs->dst(v, instr, reg);
|
||||
}
|
||||
|
||||
static void visit_instr_src(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
if (is_meta(instr)) {
|
||||
switch (instr->opc) {
|
||||
case OPC_META_INPUT:
|
||||
/* shader-input does not have a src, only block input: */
|
||||
debug_assert(instr->regs_count == 2);
|
||||
v->funcs->src_block_input(v, instr, reg);
|
||||
return;
|
||||
case OPC_META_FO:
|
||||
v->funcs->src_fanout(v, instr, reg);
|
||||
return;
|
||||
case OPC_META_FI:
|
||||
v->funcs->src_fanin(v, instr, reg);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
v->funcs->src(v, instr, reg);
|
||||
}
|
||||
|
||||
static void ir3_visit_instr(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *n;
|
||||
|
||||
/* visit instruction that assigns value: */
|
||||
if (instr->regs_count > 0)
|
||||
visit_instr_dst(v, instr);
|
||||
|
||||
/* and of any following instructions which read that value: */
|
||||
n = instr->next;
|
||||
while (n && !v->error) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 1; i < n->regs_count; i++) {
|
||||
struct ir3_register *reg = n->regs[i];
|
||||
if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr))
|
||||
visit_instr_src(v, n, reg);
|
||||
}
|
||||
|
||||
n = n->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void ir3_visit_reg(struct ir3_visitor *v,
|
||||
struct ir3_instruction *instr, struct ir3_register *reg)
|
||||
{
|
||||
/* no-op */
|
||||
}
|
||||
|
||||
#endif /* IR3_VISITOR_H_ */
|
||||
|
|
@ -68,6 +68,8 @@ static const struct debug_named_value debug_options[] = {
|
|||
{"binning", FD_DBG_BINNING, "Enable hw binning"},
|
||||
{"dbinning", FD_DBG_DBINNING, "Disable hw binning"},
|
||||
{"optimize", FD_DBG_OPTIMIZE, "Enable optimization passes in compiler"},
|
||||
{"optmsgs", FD_DBG_OPTMSGS, "Enable optimizater debug messages"},
|
||||
{"optdump", FD_DBG_OPTDUMP, "Dump shader DAG to .dot files"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
|
|||
#define FD_DBG_BINNING 0x0100
|
||||
#define FD_DBG_DBINNING 0x0200
|
||||
#define FD_DBG_OPTIMIZE 0x0400
|
||||
#define FD_DBG_OPTMSGS 0x0800
|
||||
#define FD_DBG_OPTDUMP 0x1000
|
||||
|
||||
extern int fd_mesa_debug;
|
||||
extern bool fd_binning_enabled;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue