panfrost/midgard: Split up midgard_compile.c (RA)

This commit moves the register allocator out of midgard_compile.c and
into its own midgard_ra.c file. In doing so, a number of dependencies
are identified and moved into their own files in turn. midgard_compile.c
is still fairly monolithic, but this should help.

Code churn, but no functional changes should be introduced by this
commit.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2019-05-19 23:20:34 +00:00
parent 9cd8cd26de
commit 1155446c19
11 changed files with 1149 additions and 928 deletions

View file

@ -27,6 +27,11 @@ files_panfrost = files(
'pan_resource.h',
'midgard/midgard_compile.c',
'midgard/midgard_print.c',
'midgard/midgard_ra.c',
'midgard/midgard_liveness.c',
'midgard/midgard_ops.c',
'midgard/nir_lower_blend.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
@ -97,6 +102,10 @@ driver_panfrost = declare_dependency(
files_midgard = files(
'midgard/midgard_compile.c',
'midgard/midgard_print.c',
'midgard/midgard_ra.c',
'midgard/midgard_liveness.c',
'midgard/midgard_ops.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
'midgard/cmdline.c',
@ -153,6 +162,7 @@ files_pandecode = files(
'pan_pretty_print.c',
'midgard/disassemble.c',
'midgard/midgard_ops.c',
'bifrost/disassemble.c',
)

View file

@ -0,0 +1,359 @@
/*
* Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _MDG_COMPILER_H
#define _MDG_COMPILER_H
#include "midgard.h"
#include "helpers.h"
#include "midgard_compile.h"
#include "util/hash_table.h"
#include "util/u_dynarray.h"
#include "util/set.h"
#include "util/list.h"
#include "main/mtypes.h"
#include "compiler/nir_types.h"
#include "compiler/nir/nir.h"
/* Forward declare */
struct midgard_block;
/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
* the hardware), hence why that must be zero. TARGET_DISCARD signals this
* instruction is actually a discard op. */
#define TARGET_GOTO 0
#define TARGET_BREAK 1
#define TARGET_CONTINUE 2
#define TARGET_DISCARD 3
typedef struct midgard_branch {
/* If conditional, the condition is specified in r31.w */
bool conditional;
/* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */
bool invert_conditional;
/* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
unsigned target_type;
/* The actual target */
union {
int target_block;
int target_break;
int target_continue;
};
} midgard_branch;
/* Instruction arguments represented as block-local SSA indices, rather than
* registers. Negative values mean unused. */
typedef struct {
int src0;
int src1;
int dest;
/* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
* in. Only valid for ALU ops. */
bool inline_constant;
} ssa_args;
/* Generic in-memory data type repesenting a single logical instruction, rather
* than a single instruction group. This is the preferred form for code gen.
* Multiple midgard_insturctions will later be combined during scheduling,
* though this is not represented in this structure. Its format bridges
* the low-level binary representation with the higher level semantic meaning.
*
* Notably, it allows registers to be specified as block local SSA, for code
* emitted before the register allocation pass.
*/
typedef struct midgard_instruction {
/* Must be first for casting */
struct list_head link;
unsigned type; /* ALU, load/store, texture */
/* If the register allocator has not run yet... */
ssa_args ssa_args;
/* Special fields for an ALU instruction */
midgard_reg_info registers;
/* I.e. (1 << alu_bit) */
int unit;
/* When emitting bundle, should this instruction have a break forced
* before it? Used for r31 writes which are valid only within a single
* bundle and *need* to happen as early as possible... this is a hack,
* TODO remove when we have a scheduler */
bool precede_break;
bool has_constants;
float constants[4];
uint16_t inline_constant;
bool has_blend_constant;
bool compact_branch;
bool writeout;
bool prepacked_branch;
union {
midgard_load_store_word load_store;
midgard_vector_alu alu;
midgard_texture_word texture;
midgard_branch_extended branch_extended;
uint16_t br_compact;
/* General branch, rather than packed br_compact. Higher level
* than the other components */
midgard_branch branch;
};
} midgard_instruction;
typedef struct midgard_block {
/* Link to next block. Must be first for mir_get_block */
struct list_head link;
/* List of midgard_instructions emitted for the current block */
struct list_head instructions;
bool is_scheduled;
/* List of midgard_bundles emitted (after the scheduler has run) */
struct util_dynarray bundles;
/* Number of quadwords _actually_ emitted, as determined after scheduling */
unsigned quadword_count;
/* Successors: always one forward (the block after us), maybe
* one backwards (for a backward branch). No need for a second
* forward, since graph traversal would get there eventually
* anyway */
struct midgard_block *successors[2];
unsigned nr_successors;
/* The successors pointer form a graph, and in the case of
* complex control flow, this graph has a cycles. To aid
* traversal during liveness analysis, we have a visited?
* boolean for passes to use as they see fit, provided they
* clean up later */
bool visited;
} midgard_block;
typedef struct midgard_bundle {
/* Tag for the overall bundle */
int tag;
/* Instructions contained by the bundle */
int instruction_count;
midgard_instruction instructions[5];
/* Bundle-wide ALU configuration */
int padding;
int control;
bool has_embedded_constants;
float constants[4];
bool has_blend_constant;
uint16_t register_words[8];
int register_words_count;
uint64_t body_words[8];
size_t body_size[8];
int body_words_count;
} midgard_bundle;
typedef struct compiler_context {
nir_shader *nir;
gl_shader_stage stage;
/* Is internally a blend shader? Depends on stage == FRAGMENT */
bool is_blend;
/* Tracking for blend constant patching */
int blend_constant_offset;
/* Current NIR function */
nir_function *func;
/* Unordered list of midgard_blocks */
int block_count;
struct list_head blocks;
midgard_block *initial_block;
midgard_block *previous_source_block;
midgard_block *final_block;
/* List of midgard_instructions emitted for the current block */
midgard_block *current_block;
/* The current "depth" of the loop, for disambiguating breaks/continues
* when using nested loops */
int current_loop_depth;
/* Constants which have been loaded, for later inlining */
struct hash_table_u64 *ssa_constants;
/* SSA indices to be outputted to corresponding varying offset */
struct hash_table_u64 *ssa_varyings;
/* SSA values / registers which have been aliased. Naively, these
* demand a fmov output; instead, we alias them in a later pass to
* avoid the wasted op.
*
* A note on encoding: to avoid dynamic memory management here, rather
* than ampping to a pointer, we map to the source index; the key
* itself is just the destination index. */
struct hash_table_u64 *ssa_to_alias;
struct set *leftover_ssa_to_alias;
/* Actual SSA-to-register for RA */
struct hash_table_u64 *ssa_to_register;
/* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
struct hash_table_u64 *hash_to_temp;
int temp_count;
int max_hash;
/* Just the count of the max register used. Higher count => higher
* register pressure */
int work_registers;
/* Used for cont/last hinting. Increase when a tex op is added.
* Decrease when a tex op is removed. */
int texture_op_count;
/* Mapping of texture register -> SSA index for unaliasing */
int texture_index[2];
/* If any path hits a discard instruction */
bool can_discard;
/* The number of uniforms allowable for the fast path */
int uniform_cutoff;
/* Count of instructions emitted from NIR overall, across all blocks */
int instruction_count;
/* Alpha ref value passed in */
float alpha_ref;
/* The index corresponding to the fragment output */
unsigned fragment_output;
/* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
unsigned sysvals[MAX_SYSVAL_COUNT];
unsigned sysval_count;
struct hash_table_u64 *sysval_to_id;
} compiler_context;
/* Helpers for manipulating the above structures (forming the driver IR) */
/* Append instruction to end of current block */
static inline midgard_instruction *
mir_upload_ins(struct midgard_instruction ins)
{
midgard_instruction *heap = malloc(sizeof(ins));
memcpy(heap, &ins, sizeof(ins));
return heap;
}
static inline void
emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
{
list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
}
static inline void
mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
{
list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
}
static inline void
mir_remove_instruction(struct midgard_instruction *ins)
{
list_del(&ins->link);
}
static inline midgard_instruction*
mir_prev_op(struct midgard_instruction *ins)
{
return list_last_entry(&(ins->link), midgard_instruction, link);
}
static inline midgard_instruction*
mir_next_op(struct midgard_instruction *ins)
{
return list_first_entry(&(ins->link), midgard_instruction, link);
}
#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link)
#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link)
#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link)
#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link)
#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link)
#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link)
#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link)
#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link)
static inline midgard_instruction *
mir_last_in_block(struct midgard_block *block)
{
return list_last_entry(&block->instructions, struct midgard_instruction, link);
}
static inline midgard_block *
mir_get_block(compiler_context *ctx, int idx)
{
struct list_head *lst = &ctx->blocks;
while ((idx--) + 1)
lst = lst->next;
return (struct midgard_block *) lst;
}
/* MIR printing */
void mir_print_instruction(midgard_instruction *ins);
void mir_print_block(midgard_block *block);
void mir_print_shader(compiler_context *ctx);
/* Register allocation */
struct ra_graph;
struct ra_graph* allocate_registers(compiler_context *ctx);
void install_registers(compiler_context *ctx, struct ra_graph *g);
bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
#endif

View file

@ -31,6 +31,7 @@
#include <string.h>
#include "midgard.h"
#include "midgard-parse.h"
#include "midgard_ops.h"
#include "disassemble.h"
#include "helpers.h"
#include "util/half_float.h"

View file

@ -1,7 +1,4 @@
/* Author(s):
* Alyssa Rosenzweig
*
* Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@ -22,6 +19,9 @@
* THE SOFTWARE.
*/
#ifndef __MDG_HELPERS_H
#define __MDG_HELPERS_H
#define OP_IS_STORE_VARY(op) (\
op == midgard_op_st_vary_16 || \
op == midgard_op_st_vary_32 \
@ -150,140 +150,12 @@
#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
/* Table of mapping opcodes to accompanying properties relevant to
* scheduling/emission/etc */
static struct {
struct mir_op_props {
const char *name;
unsigned props;
} alu_opcode_props[256] = {
[midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES},
[midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
[midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
[midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
[midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
[midgard_alu_op_fround] = {"fround", UNITS_ADD},
[midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD},
[midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD},
[midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD},
[midgard_alu_op_fceil] = {"fceil", UNITS_ADD},
[midgard_alu_op_ffma] = {"ffma", UNIT_VLUT},
/* Though they output a scalar, they need to run on a vector unit
* since they process vectors */
[midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
[midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
[midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
/* Incredibly, iadd can run on vmul, etc */
[midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iabs] = {"iabs", UNITS_ADD},
[midgard_alu_op_isub] = {"isub", UNITS_MOST},
[midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES},
[midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
/* For vector comparisons, use ball etc */
[midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_fle] = {"fle", UNITS_MOST},
[midgard_alu_op_flt] = {"flt", UNITS_MOST},
[midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_ilt] = {"ilt", UNITS_MOST},
[midgard_alu_op_ile] = {"ile", UNITS_MOST},
[midgard_alu_op_ult] = {"ult", UNITS_MOST},
[midgard_alu_op_ule] = {"ule", UNITS_MOST},
[midgard_alu_op_icsel] = {"icsel", UNITS_ADD},
[midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD},
[midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD},
[midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL},
[midgard_alu_op_frcp] = {"frcp", UNIT_VLUT},
[midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT},
[midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT},
[midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT},
[midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT},
[midgard_alu_op_flog2] = {"flog2", UNIT_VLUT},
[midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_fsin] = {"fsin", UNIT_VLUT},
[midgard_alu_op_fcos] = {"fcos", UNIT_VLUT},
/* XXX: Test case where it's right on smul but not sadd */
[midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST},
[midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iclz] = {"iclz", UNITS_ADD},
[midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD},
[midgard_alu_op_inand] = {"inand", UNITS_MOST},
[midgard_alu_op_ishl] = {"ishl", UNITS_ADD},
[midgard_alu_op_iasr] = {"iasr", UNITS_ADD},
[midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD},
[midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
/* These instructions are not yet emitted by the compiler, so
* don't speculate about units yet */
[midgard_alu_op_ishladd] = {"ishladd", 0},
[midgard_alu_op_uball_lt] = {"uball_lt", 0},
[midgard_alu_op_uball_lte] = {"uball_lte", 0},
[midgard_alu_op_iball_lt] = {"iball_lt", 0},
[midgard_alu_op_iball_lte] = {"iball_lte", 0},
[midgard_alu_op_ubany_lt] = {"ubany_lt", 0},
[midgard_alu_op_ubany_lte] = {"ubany_lte", 0},
[midgard_alu_op_ibany_lt] = {"ibany_lt", 0},
[midgard_alu_op_ibany_lte] = {"ibany_lte", 0},
[midgard_alu_op_freduce] = {"freduce", 0},
[midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES},
[midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES},
[midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0},
[midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0},
};
/* Is this opcode that of an integer (regardless of signedness)? Instruction
* names authoritatively determine types */
/* This file is common, so don't define the tables themselves. #include
* midgard_op.h if you need that, or edit midgard_ops.c directly */
static inline bool
midgard_is_integer_op(int op)
{
const char *name = alu_opcode_props[op].name;
if (!name)
return false;
return (name[0] == 'i') || (name[0] == 'u');
}
/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
* conversion between int<->float in which case we do the opposite */
static inline bool
midgard_is_integer_out_op(int op)
{
bool is_int = midgard_is_integer_op(op);
bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
return is_int ^ is_conversion;
}
#endif

View file

@ -536,54 +536,4 @@ __attribute__((__packed__))
}
midgard_texture_word;
static char *load_store_opcode_names[256] = {
[midgard_op_st_cubemap_coords] = "st_cubemap_coords",
[midgard_op_ld_global_id] = "ld_global_id",
[midgard_op_atomic_add] = "atomic_add",
[midgard_op_atomic_and] = "atomic_and",
[midgard_op_atomic_or] = "atomic_or",
[midgard_op_atomic_xor] = "atomic_xor",
[midgard_op_atomic_imin] = "atomic_imin",
[midgard_op_atomic_umin] = "atomic_umin",
[midgard_op_atomic_imax] = "atomic_imax",
[midgard_op_atomic_umax] = "atomic_umax",
[midgard_op_atomic_xchg] = "atomic_xchg",
[midgard_op_ld_char] = "ld_char",
[midgard_op_ld_char2] = "ld_char2",
[midgard_op_ld_short] = "ld_short",
[midgard_op_ld_char4] = "ld_char4",
[midgard_op_ld_short4] = "ld_short4",
[midgard_op_ld_int4] = "ld_int4",
[midgard_op_ld_attr_32] = "ld_attr_32",
[midgard_op_ld_attr_16] = "ld_attr_16",
[midgard_op_ld_attr_32i] = "ld_attr_32i",
[midgard_op_ld_vary_32] = "ld_vary_32",
[midgard_op_ld_vary_16] = "ld_vary_16",
[midgard_op_ld_vary_32i] = "ld_vary_32i",
[midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
[midgard_op_ld_uniform_16] = "ld_uniform_16",
[midgard_op_ld_uniform_32] = "ld_uniform_32",
[midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
[midgard_op_st_char] = "st_char",
[midgard_op_st_char2] = "st_char2",
[midgard_op_st_char4] = "st_char4",
[midgard_op_st_short4] = "st_short4",
[midgard_op_st_int4] = "st_int4",
[midgard_op_st_vary_32] = "st_vary_32",
[midgard_op_st_vary_16] = "st_vary_16",
[midgard_op_st_vary_32i] = "st_vary_32i",
[midgard_op_st_image_f] = "st_image_f",
[midgard_op_st_image_ui] = "st_image_ui",
[midgard_op_st_image_i] = "st_image_i",
};
#endif

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -36,7 +36,6 @@
#include "main/imports.h"
#include "compiler/nir/nir_builder.h"
#include "util/half_float.h"
#include "util/register_allocate.h"
#include "util/u_debug.h"
#include "util/u_dynarray.h"
#include "util/list.h"
@ -45,7 +44,9 @@
#include "midgard.h"
#include "midgard_nir.h"
#include "midgard_compile.h"
#include "midgard_ops.h"
#include "helpers.h"
#include "compiler.h"
#include "disassemble.h"
@ -64,138 +65,12 @@ int midgard_debug = 0;
fprintf(stderr, "%s:%d: "fmt, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
/* Instruction arguments represented as block-local SSA indices, rather than
* registers. Negative values mean unused. */
typedef struct {
int src0;
int src1;
int dest;
/* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
* in. Only valid for ALU ops. */
bool inline_constant;
} ssa_args;
/* Forward declare so midgard_branch can reference */
struct midgard_block;
/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
* the hardware), hence why that must be zero. TARGET_DISCARD signals this
* instruction is actually a discard op. */
#define TARGET_GOTO 0
#define TARGET_BREAK 1
#define TARGET_CONTINUE 2
#define TARGET_DISCARD 3
typedef struct midgard_branch {
/* If conditional, the condition is specified in r31.w */
bool conditional;
/* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */
bool invert_conditional;
/* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
unsigned target_type;
/* The actual target */
union {
int target_block;
int target_break;
int target_continue;
};
} midgard_branch;
static bool
midgard_is_branch_unit(unsigned unit)
{
return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
}
/* Generic in-memory data type repesenting a single logical instruction, rather
* than a single instruction group. This is the preferred form for code gen.
* Multiple midgard_insturctions will later be combined during scheduling,
* though this is not represented in this structure. Its format bridges
* the low-level binary representation with the higher level semantic meaning.
*
* Notably, it allows registers to be specified as block local SSA, for code
* emitted before the register allocation pass.
*/
typedef struct midgard_instruction {
/* Must be first for casting */
struct list_head link;
unsigned type; /* ALU, load/store, texture */
/* If the register allocator has not run yet... */
ssa_args ssa_args;
/* Special fields for an ALU instruction */
midgard_reg_info registers;
/* I.e. (1 << alu_bit) */
int unit;
/* When emitting bundle, should this instruction have a break forced
* before it? Used for r31 writes which are valid only within a single
* bundle and *need* to happen as early as possible... this is a hack,
* TODO remove when we have a scheduler */
bool precede_break;
bool has_constants;
float constants[4];
uint16_t inline_constant;
bool has_blend_constant;
bool compact_branch;
bool writeout;
bool prepacked_branch;
union {
midgard_load_store_word load_store;
midgard_vector_alu alu;
midgard_texture_word texture;
midgard_branch_extended branch_extended;
uint16_t br_compact;
/* General branch, rather than packed br_compact. Higher level
* than the other components */
midgard_branch branch;
};
} midgard_instruction;
typedef struct midgard_block {
/* Link to next block. Must be first for mir_get_block */
struct list_head link;
/* List of midgard_instructions emitted for the current block */
struct list_head instructions;
bool is_scheduled;
/* List of midgard_bundles emitted (after the scheduler has run) */
struct util_dynarray bundles;
/* Number of quadwords _actually_ emitted, as determined after scheduling */
unsigned quadword_count;
/* Successors: always one forward (the block after us), maybe
* one backwards (for a backward branch). No need for a second
* forward, since graph traversal would get there eventually
* anyway */
struct midgard_block *successors[2];
unsigned nr_successors;
/* The successors pointer form a graph, and in the case of
* complex control flow, this graph has a cycles. To aid
* traversal during liveness analysis, we have a visited?
* boolean for passes to use as they see fit, provided they
* clean up later */
bool visited;
} midgard_block;
static void
midgard_block_add_successor(midgard_block *block, midgard_block *successor)
{
@ -404,267 +279,6 @@ midgard_create_branch_extended( midgard_condition cond,
return branch;
}
typedef struct midgard_bundle {
/* Tag for the overall bundle */
int tag;
/* Instructions contained by the bundle */
int instruction_count;
midgard_instruction instructions[5];
/* Bundle-wide ALU configuration */
int padding;
int control;
bool has_embedded_constants;
float constants[4];
bool has_blend_constant;
uint16_t register_words[8];
int register_words_count;
uint64_t body_words[8];
size_t body_size[8];
int body_words_count;
} midgard_bundle;
typedef struct compiler_context {
nir_shader *nir;
gl_shader_stage stage;
/* Is internally a blend shader? Depends on stage == FRAGMENT */
bool is_blend;
/* Tracking for blend constant patching */
int blend_constant_offset;
/* Current NIR function */
nir_function *func;
/* Unordered list of midgard_blocks */
int block_count;
struct list_head blocks;
midgard_block *initial_block;
midgard_block *previous_source_block;
midgard_block *final_block;
/* List of midgard_instructions emitted for the current block */
midgard_block *current_block;
/* The current "depth" of the loop, for disambiguating breaks/continues
* when using nested loops */
int current_loop_depth;
/* Constants which have been loaded, for later inlining */
struct hash_table_u64 *ssa_constants;
/* SSA indices to be outputted to corresponding varying offset */
struct hash_table_u64 *ssa_varyings;
/* SSA values / registers which have been aliased. Naively, these
* demand a fmov output; instead, we alias them in a later pass to
* avoid the wasted op.
*
* A note on encoding: to avoid dynamic memory management here, rather
* than ampping to a pointer, we map to the source index; the key
* itself is just the destination index. */
struct hash_table_u64 *ssa_to_alias;
struct set *leftover_ssa_to_alias;
/* Actual SSA-to-register for RA */
struct hash_table_u64 *ssa_to_register;
/* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
struct hash_table_u64 *hash_to_temp;
int temp_count;
int max_hash;
/* Just the count of the max register used. Higher count => higher
* register pressure */
int work_registers;
/* Used for cont/last hinting. Increase when a tex op is added.
* Decrease when a tex op is removed. */
int texture_op_count;
/* Mapping of texture register -> SSA index for unaliasing */
int texture_index[2];
/* If any path hits a discard instruction */
bool can_discard;
/* The number of uniforms allowable for the fast path */
int uniform_cutoff;
/* Count of instructions emitted from NIR overall, across all blocks */
int instruction_count;
/* Alpha ref value passed in */
float alpha_ref;
/* The index corresponding to the fragment output */
unsigned fragment_output;
/* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
unsigned sysvals[MAX_SYSVAL_COUNT];
unsigned sysval_count;
struct hash_table_u64 *sysval_to_id;
} compiler_context;
/* Append instruction to end of current block */
static midgard_instruction *
mir_upload_ins(struct midgard_instruction ins)
{
midgard_instruction *heap = malloc(sizeof(ins));
memcpy(heap, &ins, sizeof(ins));
return heap;
}
static void
emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
{
list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
}
static void
mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
{
list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
}
static void
mir_remove_instruction(struct midgard_instruction *ins)
{
list_del(&ins->link);
}
static midgard_instruction*
mir_prev_op(struct midgard_instruction *ins)
{
return list_last_entry(&(ins->link), midgard_instruction, link);
}
static midgard_instruction*
mir_next_op(struct midgard_instruction *ins)
{
return list_first_entry(&(ins->link), midgard_instruction, link);
}
#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link)
#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link)
#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link)
#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link)
#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link)
#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link)
#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link)
#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link)
static midgard_instruction *
mir_last_in_block(struct midgard_block *block)
{
return list_last_entry(&block->instructions, struct midgard_instruction, link);
}
static midgard_block *
mir_get_block(compiler_context *ctx, int idx)
{
struct list_head *lst = &ctx->blocks;
while ((idx--) + 1)
lst = lst->next;
return (struct midgard_block *) lst;
}
/* Pretty printer for internal Midgard IR */
static void
print_mir_source(int source)
{
if (source >= SSA_FIXED_MINIMUM) {
/* Specific register */
int reg = SSA_REG_FROM_FIXED(source);
/* TODO: Moving threshold */
if (reg > 16 && reg < 24)
printf("u%d", 23 - reg);
else
printf("r%d", reg);
} else {
printf("%d", source);
}
}
static void
print_mir_instruction(midgard_instruction *ins)
{
printf("\t");
switch (ins->type) {
case TAG_ALU_4: {
midgard_alu_op op = ins->alu.op;
const char *name = alu_opcode_props[op].name;
if (ins->unit)
printf("%d.", ins->unit);
printf("%s", name ? name : "??");
break;
}
case TAG_LOAD_STORE_4: {
midgard_load_store_op op = ins->load_store.op;
const char *name = load_store_opcode_names[op];
assert(name);
printf("%s", name);
break;
}
case TAG_TEXTURE_4: {
printf("texture");
break;
}
default:
assert(0);
}
ssa_args *args = &ins->ssa_args;
printf(" %d, ", args->dest);
print_mir_source(args->src0);
printf(", ");
if (args->inline_constant)
printf("#%d", ins->inline_constant);
else
print_mir_source(args->src1);
if (ins->has_constants)
printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
printf("\n");
}
static void
print_mir_block(midgard_block *block)
{
printf("{\n");
mir_foreach_instr_in_block(block, ins) {
print_mir_instruction(ins);
}
printf("}\n");
}
static void
attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name)
{
@ -975,26 +589,6 @@ effective_writemask(midgard_vector_alu *alu)
return squeeze_writemask(alu->mask);
}
static unsigned
find_or_allocate_temp(compiler_context *ctx, unsigned hash)
{
if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
return hash;
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
if (temp)
return temp - 1;
/* If no temp is find, allocate one */
temp = ctx->temp_count++;
ctx->max_hash = MAX2(ctx->max_hash, hash);
_mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
return temp;
}
static unsigned
nir_src_index(compiler_context *ctx, nir_src *src)
{
@ -1983,338 +1577,6 @@ emit_instr(compiler_context *ctx, struct nir_instr *instr)
}
}
/* Determine the actual hardware from the index based on the RA results or special values */
static int
dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
{
if (reg >= SSA_FIXED_MINIMUM)
return SSA_REG_FROM_FIXED(reg);
if (reg >= 0) {
assert(reg < maxreg);
assert(g);
int r = ra_get_node_reg(g, reg);
ctx->work_registers = MAX2(ctx->work_registers, r);
return r;
}
switch (reg) {
/* fmov style unused */
case SSA_UNUSED_0:
return REGISTER_UNUSED;
/* lut style unused */
case SSA_UNUSED_1:
return REGISTER_UNUSED;
default:
DBG("Unknown SSA register alias %d\n", reg);
assert(0);
return 31;
}
}
static unsigned int
midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
{
/* Choose the first available register to minimise reported register pressure */
for (int i = 0; i < 16; ++i) {
if (BITSET_TEST(regs, i)) {
return i;
}
}
assert(0);
return 0;
}
static bool
midgard_is_live_in_instr(midgard_instruction *ins, int src)
{
if (ins->ssa_args.src0 == src) return true;
if (ins->ssa_args.src1 == src) return true;
return false;
}
/* Determine if a variable is live in the successors of a block */
static bool
is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
{
for (unsigned i = 0; i < bl->nr_successors; ++i) {
midgard_block *succ = bl->successors[i];
/* If we already visited, the value we're seeking
* isn't down this path (or we would have short
* circuited */
if (succ->visited) continue;
/* Otherwise (it's visited *now*), check the block */
succ->visited = true;
mir_foreach_instr_in_block(succ, ins) {
if (midgard_is_live_in_instr(ins, src))
return true;
}
/* ...and also, check *its* successors */
if (is_live_after_successors(ctx, succ, src))
return true;
}
/* Welp. We're really not live. */
return false;
}
static bool
is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
{
/* Check the rest of the block for liveness */
mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
if (midgard_is_live_in_instr(ins, src))
return true;
}
/* Check the rest of the blocks for liveness recursively */
bool succ = is_live_after_successors(ctx, block, src);
mir_foreach_block(ctx, block) {
block->visited = false;
}
return succ;
}
/* Once registers have been decided via register allocation
* (allocate_registers), we need to rewrite the MIR to use registers instead of
* SSA */
static void
install_registers(compiler_context *ctx, struct ra_graph *g)
{
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
ssa_args args = ins->ssa_args;
switch (ins->type) {
case TAG_ALU_4:
ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
ins->registers.src2_imm = args.inline_constant;
if (args.inline_constant) {
/* Encode inline 16-bit constant as a vector by default */
ins->registers.src2_reg = ins->inline_constant >> 11;
int lower_11 = ins->inline_constant & ((1 << 12) - 1);
uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
ins->alu.src2 = imm << 2;
} else {
ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
}
ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
break;
case TAG_LOAD_STORE_4: {
if (OP_IS_STORE_VARY(ins->load_store.op)) {
/* TODO: use ssa_args for st_vary */
ins->load_store.reg = 0;
} else {
bool has_dest = args.dest >= 0;
int ssa_arg = has_dest ? args.dest : args.src0;
ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
}
break;
}
default:
break;
}
}
}
}
/* This routine performs the actual register allocation. It should be succeeded
* by install_registers */
static struct ra_graph *
allocate_registers(compiler_context *ctx)
{
/* First, initialize the RA */
struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
/* Create a primary (general purpose) class, as well as special purpose
* pipeline register classes */
int primary_class = ra_alloc_reg_class(regs);
int varying_class = ra_alloc_reg_class(regs);
/* Add the full set of work registers */
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
for (int i = 0; i < work_count; ++i)
ra_class_add_reg(regs, primary_class, i);
/* Add special registers */
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
/* We're done setting up */
ra_set_finalize(regs, NULL);
/* Transform the MIR into squeezed index form */
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
}
if (midgard_debug & MIDGARD_DBG_SHADERS)
print_mir_block(block);
}
/* No register allocation to do with no SSA */
if (!ctx->temp_count)
return NULL;
/* Let's actually do register allocation */
int nodes = ctx->temp_count;
struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
/* Set everything to the work register class, unless it has somewhere
* special to go */
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
if (ins->ssa_args.dest < 0) continue;
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
int class = primary_class;
ra_set_node_class(g, ins->ssa_args.dest, class);
}
}
for (int index = 0; index <= ctx->max_hash; ++index) {
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
if (temp) {
unsigned reg = temp - 1;
int t = find_or_allocate_temp(ctx, index);
ra_set_node_reg(g, t, reg);
}
}
/* Determine liveness */
int *live_start = malloc(nodes * sizeof(int));
int *live_end = malloc(nodes * sizeof(int));
/* Initialize as non-existent */
for (int i = 0; i < nodes; ++i) {
live_start[i] = live_end[i] = -1;
}
int d = 0;
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
/* Dest is < 0 for st_vary instructions, which break
* the usual SSA conventions. Liveness analysis doesn't
* make sense on these instructions, so skip them to
* avoid memory corruption */
if (ins->ssa_args.dest < 0) continue;
if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
/* If this destination is not yet live, it is now since we just wrote it */
int dest = ins->ssa_args.dest;
if (live_start[dest] == -1)
live_start[dest] = d;
}
/* Since we just used a source, the source might be
* dead now. Scan the rest of the block for
* invocations, and if there are none, the source dies
* */
int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
for (int src = 0; src < 2; ++src) {
int s = sources[src];
if (s < 0) continue;
if (s >= SSA_FIXED_MINIMUM) continue;
if (!is_live_after(ctx, block, ins, s)) {
live_end[s] = d;
}
}
++d;
}
}
/* If a node still hasn't been killed, kill it now */
for (int i = 0; i < nodes; ++i) {
/* live_start == -1 most likely indicates a pinned output */
if (live_end[i] == -1)
live_end[i] = d;
}
/* Setup interference between nodes that are live at the same time */
for (int i = 0; i < nodes; ++i) {
for (int j = i + 1; j < nodes; ++j) {
if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
ra_add_node_interference(g, i, j);
}
}
ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
if (!ra_allocate(g)) {
DBG("Error allocating registers\n");
assert(0);
}
/* Cleanup */
free(live_start);
free(live_end);
return g;
}
/* Midgard IR only knows vector ALU types, but we sometimes need to actually
* use scalar ALU instructions, for functional or performance reasons. To do
* this, we just demote vector ALU payloads to scalar. */
@ -3247,7 +2509,7 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block)
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
if (midgard_is_pinned(ctx, ins->ssa_args.dest)) continue;
if (is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
if (mir_is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
mir_remove_instruction(ins);
progress = true;

View file

@ -0,0 +1,92 @@
/*
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* mir_is_live_after performs liveness analysis on the MIR, used primarily
* as part of register allocation. TODO: Algorithmic improvements for
* compiler performance (this is the worst algorithm possible -- see
* backlog with Connor on IRC) */
#include "compiler.h"
static bool
midgard_is_live_in_instr(midgard_instruction *ins, int src)
{
if (ins->ssa_args.src0 == src) return true;
if (ins->ssa_args.src1 == src) return true;
return false;
}
/* Determine if a variable is live in the successors of a block */
static bool
is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
{
for (unsigned i = 0; i < bl->nr_successors; ++i) {
midgard_block *succ = bl->successors[i];
/* If we already visited, the value we're seeking
* isn't down this path (or we would have short
* circuited */
if (succ->visited) continue;
/* Otherwise (it's visited *now*), check the block */
succ->visited = true;
mir_foreach_instr_in_block(succ, ins) {
if (midgard_is_live_in_instr(ins, src))
return true;
}
/* ...and also, check *its* successors */
if (is_live_after_successors(ctx, succ, src))
return true;
}
/* Welp. We're really not live. */
return false;
}
bool
mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
{
/* Check the rest of the block for liveness */
mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
if (midgard_is_live_in_instr(ins, src))
return true;
}
/* Check the rest of the blocks for liveness recursively */
bool succ = is_live_after_successors(ctx, block, src);
mir_foreach_block(ctx, block) {
block->visited = false;
}
return succ;
}

View file

@ -0,0 +1,188 @@
/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "midgard.h"
/* Include the definitions of the macros and such */
#define MIDGARD_OPS_TABLE
#include "helpers.h"
#undef MIDGARD_OPS_TABLE
/* Table of mapping opcodes to accompanying properties. This is used for both
* the disassembler and the compiler. It is placed in a .c file like this to
* avoid duplications in the binary */
struct mir_op_props alu_opcode_props[256] = {
[midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES},
[midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
[midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
[midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
[midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
[midgard_alu_op_fround] = {"fround", UNITS_ADD},
[midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD},
[midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD},
[midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD},
[midgard_alu_op_fceil] = {"fceil", UNITS_ADD},
[midgard_alu_op_ffma] = {"ffma", UNIT_VLUT},
/* Though they output a scalar, they need to run on a vector unit
* since they process vectors */
[midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
[midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
[midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
/* Incredibly, iadd can run on vmul, etc */
[midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iabs] = {"iabs", UNITS_ADD},
[midgard_alu_op_isub] = {"isub", UNITS_MOST},
[midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES},
[midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
/* For vector comparisons, use ball etc */
[midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_fle] = {"fle", UNITS_MOST},
[midgard_alu_op_flt] = {"flt", UNITS_MOST},
[midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_ilt] = {"ilt", UNITS_MOST},
[midgard_alu_op_ile] = {"ile", UNITS_MOST},
[midgard_alu_op_ult] = {"ult", UNITS_MOST},
[midgard_alu_op_ule] = {"ule", UNITS_MOST},
[midgard_alu_op_icsel] = {"icsel", UNITS_ADD},
[midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD},
[midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD},
[midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL},
[midgard_alu_op_frcp] = {"frcp", UNIT_VLUT},
[midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT},
[midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT},
[midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT},
[midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT},
[midgard_alu_op_flog2] = {"flog2", UNIT_VLUT},
[midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
[midgard_alu_op_fsin] = {"fsin", UNIT_VLUT},
[midgard_alu_op_fcos] = {"fcos", UNIT_VLUT},
/* XXX: Test case where it's right on smul but not sadd */
[midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST},
[midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES},
[midgard_alu_op_iclz] = {"iclz", UNITS_ADD},
[midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD},
[midgard_alu_op_inand] = {"inand", UNITS_MOST},
[midgard_alu_op_ishl] = {"ishl", UNITS_ADD},
[midgard_alu_op_iasr] = {"iasr", UNITS_ADD},
[midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD},
[midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
[midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
/* These instructions are not yet emitted by the compiler, so
* don't speculate about units yet */
[midgard_alu_op_ishladd] = {"ishladd", 0},
[midgard_alu_op_uball_lt] = {"uball_lt", 0},
[midgard_alu_op_uball_lte] = {"uball_lte", 0},
[midgard_alu_op_iball_lt] = {"iball_lt", 0},
[midgard_alu_op_iball_lte] = {"iball_lte", 0},
[midgard_alu_op_ubany_lt] = {"ubany_lt", 0},
[midgard_alu_op_ubany_lte] = {"ubany_lte", 0},
[midgard_alu_op_ibany_lt] = {"ibany_lt", 0},
[midgard_alu_op_ibany_lte] = {"ibany_lte", 0},
[midgard_alu_op_freduce] = {"freduce", 0},
[midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES},
[midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES},
[midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0},
[midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0},
};
const char *load_store_opcode_names[256] = {
[midgard_op_st_cubemap_coords] = "st_cubemap_coords",
[midgard_op_ld_global_id] = "ld_global_id",
[midgard_op_atomic_add] = "atomic_add",
[midgard_op_atomic_and] = "atomic_and",
[midgard_op_atomic_or] = "atomic_or",
[midgard_op_atomic_xor] = "atomic_xor",
[midgard_op_atomic_imin] = "atomic_imin",
[midgard_op_atomic_umin] = "atomic_umin",
[midgard_op_atomic_imax] = "atomic_imax",
[midgard_op_atomic_umax] = "atomic_umax",
[midgard_op_atomic_xchg] = "atomic_xchg",
[midgard_op_ld_char] = "ld_char",
[midgard_op_ld_char2] = "ld_char2",
[midgard_op_ld_short] = "ld_short",
[midgard_op_ld_char4] = "ld_char4",
[midgard_op_ld_short4] = "ld_short4",
[midgard_op_ld_int4] = "ld_int4",
[midgard_op_ld_attr_32] = "ld_attr_32",
[midgard_op_ld_attr_16] = "ld_attr_16",
[midgard_op_ld_attr_32i] = "ld_attr_32i",
[midgard_op_ld_vary_32] = "ld_vary_32",
[midgard_op_ld_vary_16] = "ld_vary_16",
[midgard_op_ld_vary_32i] = "ld_vary_32i",
[midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
[midgard_op_ld_uniform_16] = "ld_uniform_16",
[midgard_op_ld_uniform_32] = "ld_uniform_32",
[midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
[midgard_op_st_char] = "st_char",
[midgard_op_st_char2] = "st_char2",
[midgard_op_st_char4] = "st_char4",
[midgard_op_st_short4] = "st_short4",
[midgard_op_st_int4] = "st_int4",
[midgard_op_st_vary_32] = "st_vary_32",
[midgard_op_st_vary_16] = "st_vary_16",
[midgard_op_st_vary_32i] = "st_vary_32i",
[midgard_op_st_image_f] = "st_image_f",
[midgard_op_st_image_ui] = "st_image_ui",
[midgard_op_st_image_i] = "st_image_i",
};

View file

@ -0,0 +1,53 @@
/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "helpers.h"
/* Forward declare */
extern struct mir_op_props alu_opcode_props[256];
extern const char *load_store_opcode_names[256];
/* Is this opcode that of an integer (regardless of signedness)? Instruction
* names authoritatively determine types */
static inline bool
midgard_is_integer_op(int op)
{
const char *name = alu_opcode_props[op].name;
if (!name)
return false;
return (name[0] == 'i') || (name[0] == 'u');
}
/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
* conversion between int<->float in which case we do the opposite */
static inline bool
midgard_is_integer_out_op(int op)
{
bool is_int = midgard_is_integer_op(op);
bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
return is_int ^ is_conversion;
}

View file

@ -0,0 +1,124 @@
/*
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "compiler.h"
#include "helpers.h"
#include "midgard_ops.h"
/* Pretty printer for Midgard IR, for use debugging compiler-internal
* passes like register allocation. The output superficially resembles
* Midgard assembly, with the exception that unit information and such is
* (normally) omitted, and generic indices are usually used instead of
* registers */
static void
mir_print_source(int source)
{
if (source >= SSA_FIXED_MINIMUM) {
/* Specific register */
int reg = SSA_REG_FROM_FIXED(source);
/* TODO: Moving threshold */
if (reg > 16 && reg < 24)
printf("u%d", 23 - reg);
else
printf("r%d", reg);
} else {
printf("%d", source);
}
}
void
mir_print_instruction(midgard_instruction *ins)
{
printf("\t");
switch (ins->type) {
case TAG_ALU_4: {
midgard_alu_op op = ins->alu.op;
const char *name = alu_opcode_props[op].name;
if (ins->unit)
printf("%d.", ins->unit);
printf("%s", name ? name : "??");
break;
}
case TAG_LOAD_STORE_4: {
midgard_load_store_op op = ins->load_store.op;
const char *name = load_store_opcode_names[op];
assert(name);
printf("%s", name);
break;
}
case TAG_TEXTURE_4: {
printf("texture");
break;
}
default:
assert(0);
}
ssa_args *args = &ins->ssa_args;
printf(" %d, ", args->dest);
mir_print_source(args->src0);
printf(", ");
if (args->inline_constant)
printf("#%d", ins->inline_constant);
else
mir_print_source(args->src1);
if (ins->has_constants)
printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
printf("\n");
}
/* Dumps MIR for a block or entire shader respective */
void
mir_print_block(midgard_block *block)
{
printf("{\n");
mir_foreach_instr_in_block(block, ins) {
mir_print_instruction(ins);
}
printf("}\n");
}
void
mir_print_shader(compiler_context *ctx)
{
mir_foreach_block(ctx, block) {
mir_print_block(block);
}
}

View file

@ -0,0 +1,310 @@
/*
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "compiler.h"
#include "util/register_allocate.h"
/* When we're 'squeezing down' the values in the IR, we maintain a hash
* as such */
static unsigned
find_or_allocate_temp(compiler_context *ctx, unsigned hash)
{
if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
return hash;
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
if (temp)
return temp - 1;
/* If no temp is find, allocate one */
temp = ctx->temp_count++;
ctx->max_hash = MAX2(ctx->max_hash, hash);
_mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
return temp;
}
/* Callback for register allocation selection, trivial default for now */
static unsigned int
midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
{
/* Choose the first available register to minimise reported register pressure */
for (int i = 0; i < 16; ++i) {
if (BITSET_TEST(regs, i)) {
return i;
}
}
assert(0);
return 0;
}
/* Determine the actual hardware from the index based on the RA results or special values */
static int
dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
{
if (reg >= SSA_FIXED_MINIMUM)
return SSA_REG_FROM_FIXED(reg);
if (reg >= 0) {
assert(reg < maxreg);
assert(g);
int r = ra_get_node_reg(g, reg);
ctx->work_registers = MAX2(ctx->work_registers, r);
return r;
}
switch (reg) {
case SSA_UNUSED_0:
case SSA_UNUSED_1:
return REGISTER_UNUSED;
default:
unreachable("Unknown SSA register alias");
}
}
/* This routine performs the actual register allocation. It should be succeeded
* by install_registers */
struct ra_graph *
allocate_registers(compiler_context *ctx)
{
/* First, initialize the RA */
struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
/* Create a primary (general purpose) class, as well as special purpose
* pipeline register classes */
int primary_class = ra_alloc_reg_class(regs);
int varying_class = ra_alloc_reg_class(regs);
/* Add the full set of work registers */
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
for (int i = 0; i < work_count; ++i)
ra_class_add_reg(regs, primary_class, i);
/* Add special registers */
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
/* We're done setting up */
ra_set_finalize(regs, NULL);
/* Transform the MIR into squeezed index form */
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
}
}
/* No register allocation to do with no SSA */
if (!ctx->temp_count)
return NULL;
/* Let's actually do register allocation */
int nodes = ctx->temp_count;
struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
/* Set everything to the work register class, unless it has somewhere
* special to go */
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
if (ins->ssa_args.dest < 0) continue;
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
int class = primary_class;
ra_set_node_class(g, ins->ssa_args.dest, class);
}
}
for (int index = 0; index <= ctx->max_hash; ++index) {
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
if (temp) {
unsigned reg = temp - 1;
int t = find_or_allocate_temp(ctx, index);
ra_set_node_reg(g, t, reg);
}
}
/* Determine liveness */
int *live_start = malloc(nodes * sizeof(int));
int *live_end = malloc(nodes * sizeof(int));
/* Initialize as non-existent */
for (int i = 0; i < nodes; ++i) {
live_start[i] = live_end[i] = -1;
}
int d = 0;
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
/* Dest is < 0 for st_vary instructions, which break
* the usual SSA conventions. Liveness analysis doesn't
* make sense on these instructions, so skip them to
* avoid memory corruption */
if (ins->ssa_args.dest < 0) continue;
if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
/* If this destination is not yet live, it is now since we just wrote it */
int dest = ins->ssa_args.dest;
if (live_start[dest] == -1)
live_start[dest] = d;
}
/* Since we just used a source, the source might be
* dead now. Scan the rest of the block for
* invocations, and if there are none, the source dies
* */
int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
for (int src = 0; src < 2; ++src) {
int s = sources[src];
if (s < 0) continue;
if (s >= SSA_FIXED_MINIMUM) continue;
if (!mir_is_live_after(ctx, block, ins, s)) {
live_end[s] = d;
}
}
++d;
}
}
/* If a node still hasn't been killed, kill it now */
for (int i = 0; i < nodes; ++i) {
/* live_start == -1 most likely indicates a pinned output */
if (live_end[i] == -1)
live_end[i] = d;
}
/* Setup interference between nodes that are live at the same time */
for (int i = 0; i < nodes; ++i) {
for (int j = i + 1; j < nodes; ++j) {
if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
ra_add_node_interference(g, i, j);
}
}
ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
if (!ra_allocate(g)) {
unreachable("Error allocating registers\n");
}
/* Cleanup */
free(live_start);
free(live_end);
return g;
}
/* Once registers have been decided via register allocation
* (allocate_registers), we need to rewrite the MIR to use registers instead of
* SSA */
void
install_registers(compiler_context *ctx, struct ra_graph *g)
{
mir_foreach_block(ctx, block) {
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
ssa_args args = ins->ssa_args;
switch (ins->type) {
case TAG_ALU_4:
ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
ins->registers.src2_imm = args.inline_constant;
if (args.inline_constant) {
/* Encode inline 16-bit constant as a vector by default */
ins->registers.src2_reg = ins->inline_constant >> 11;
int lower_11 = ins->inline_constant & ((1 << 12) - 1);
uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
ins->alu.src2 = imm << 2;
} else {
ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
}
ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
break;
case TAG_LOAD_STORE_4: {
if (OP_IS_STORE_VARY(ins->load_store.op)) {
/* TODO: use ssa_args for st_vary */
ins->load_store.reg = 0;
} else {
bool has_dest = args.dest >= 0;
int ssa_arg = has_dest ? args.dest : args.src0;
ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
}
break;
}
default:
break;
}
}
}
}