mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 00:49:04 +02:00
panfrost/midgard: Split up midgard_compile.c (RA)
This commit moves the register allocator out of midgard_compile.c and into its own midgard_ra.c file. In doing so, a number of dependencies are identified and moved into their own files in turn. midgard_compile.c is still fairly monolithic, but this should help. Code churn, but no functional changes should be introduced by this commit. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
9cd8cd26de
commit
1155446c19
11 changed files with 1149 additions and 928 deletions
|
|
@ -27,6 +27,11 @@ files_panfrost = files(
|
|||
'pan_resource.h',
|
||||
|
||||
'midgard/midgard_compile.c',
|
||||
'midgard/midgard_print.c',
|
||||
'midgard/midgard_ra.c',
|
||||
'midgard/midgard_liveness.c',
|
||||
'midgard/midgard_ops.c',
|
||||
|
||||
'midgard/nir_lower_blend.c',
|
||||
'midgard/cppwrap.cpp',
|
||||
'midgard/disassemble.c',
|
||||
|
|
@ -97,6 +102,10 @@ driver_panfrost = declare_dependency(
|
|||
|
||||
files_midgard = files(
|
||||
'midgard/midgard_compile.c',
|
||||
'midgard/midgard_print.c',
|
||||
'midgard/midgard_ra.c',
|
||||
'midgard/midgard_liveness.c',
|
||||
'midgard/midgard_ops.c',
|
||||
'midgard/cppwrap.cpp',
|
||||
'midgard/disassemble.c',
|
||||
'midgard/cmdline.c',
|
||||
|
|
@ -153,6 +162,7 @@ files_pandecode = files(
|
|||
'pan_pretty_print.c',
|
||||
|
||||
'midgard/disassemble.c',
|
||||
'midgard/midgard_ops.c',
|
||||
'bifrost/disassemble.c',
|
||||
)
|
||||
|
||||
|
|
|
|||
359
src/gallium/drivers/panfrost/midgard/compiler.h
Normal file
359
src/gallium/drivers/panfrost/midgard/compiler.h
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _MDG_COMPILER_H
|
||||
#define _MDG_COMPILER_H
|
||||
|
||||
#include "midgard.h"
|
||||
#include "helpers.h"
|
||||
#include "midgard_compile.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/set.h"
|
||||
#include "util/list.h"
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "compiler/nir_types.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
/* Forward declare */
|
||||
struct midgard_block;
|
||||
|
||||
/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
|
||||
* the hardware), hence why that must be zero. TARGET_DISCARD signals this
|
||||
* instruction is actually a discard op. */
|
||||
|
||||
#define TARGET_GOTO 0
|
||||
#define TARGET_BREAK 1
|
||||
#define TARGET_CONTINUE 2
|
||||
#define TARGET_DISCARD 3
|
||||
|
||||
typedef struct midgard_branch {
|
||||
/* If conditional, the condition is specified in r31.w */
|
||||
bool conditional;
|
||||
|
||||
/* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */
|
||||
bool invert_conditional;
|
||||
|
||||
/* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
|
||||
unsigned target_type;
|
||||
|
||||
/* The actual target */
|
||||
union {
|
||||
int target_block;
|
||||
int target_break;
|
||||
int target_continue;
|
||||
};
|
||||
} midgard_branch;
|
||||
|
||||
/* Instruction arguments represented as block-local SSA indices, rather than
|
||||
* registers. Negative values mean unused. */
|
||||
|
||||
typedef struct {
|
||||
int src0;
|
||||
int src1;
|
||||
int dest;
|
||||
|
||||
/* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
|
||||
* in. Only valid for ALU ops. */
|
||||
bool inline_constant;
|
||||
} ssa_args;
|
||||
|
||||
/* Generic in-memory data type repesenting a single logical instruction, rather
|
||||
* than a single instruction group. This is the preferred form for code gen.
|
||||
* Multiple midgard_insturctions will later be combined during scheduling,
|
||||
* though this is not represented in this structure. Its format bridges
|
||||
* the low-level binary representation with the higher level semantic meaning.
|
||||
*
|
||||
* Notably, it allows registers to be specified as block local SSA, for code
|
||||
* emitted before the register allocation pass.
|
||||
*/
|
||||
|
||||
typedef struct midgard_instruction {
|
||||
/* Must be first for casting */
|
||||
struct list_head link;
|
||||
|
||||
unsigned type; /* ALU, load/store, texture */
|
||||
|
||||
/* If the register allocator has not run yet... */
|
||||
ssa_args ssa_args;
|
||||
|
||||
/* Special fields for an ALU instruction */
|
||||
midgard_reg_info registers;
|
||||
|
||||
/* I.e. (1 << alu_bit) */
|
||||
int unit;
|
||||
|
||||
/* When emitting bundle, should this instruction have a break forced
|
||||
* before it? Used for r31 writes which are valid only within a single
|
||||
* bundle and *need* to happen as early as possible... this is a hack,
|
||||
* TODO remove when we have a scheduler */
|
||||
bool precede_break;
|
||||
|
||||
bool has_constants;
|
||||
float constants[4];
|
||||
uint16_t inline_constant;
|
||||
bool has_blend_constant;
|
||||
|
||||
bool compact_branch;
|
||||
bool writeout;
|
||||
bool prepacked_branch;
|
||||
|
||||
union {
|
||||
midgard_load_store_word load_store;
|
||||
midgard_vector_alu alu;
|
||||
midgard_texture_word texture;
|
||||
midgard_branch_extended branch_extended;
|
||||
uint16_t br_compact;
|
||||
|
||||
/* General branch, rather than packed br_compact. Higher level
|
||||
* than the other components */
|
||||
midgard_branch branch;
|
||||
};
|
||||
} midgard_instruction;
|
||||
|
||||
typedef struct midgard_block {
|
||||
/* Link to next block. Must be first for mir_get_block */
|
||||
struct list_head link;
|
||||
|
||||
/* List of midgard_instructions emitted for the current block */
|
||||
struct list_head instructions;
|
||||
|
||||
bool is_scheduled;
|
||||
|
||||
/* List of midgard_bundles emitted (after the scheduler has run) */
|
||||
struct util_dynarray bundles;
|
||||
|
||||
/* Number of quadwords _actually_ emitted, as determined after scheduling */
|
||||
unsigned quadword_count;
|
||||
|
||||
/* Successors: always one forward (the block after us), maybe
|
||||
* one backwards (for a backward branch). No need for a second
|
||||
* forward, since graph traversal would get there eventually
|
||||
* anyway */
|
||||
struct midgard_block *successors[2];
|
||||
unsigned nr_successors;
|
||||
|
||||
/* The successors pointer form a graph, and in the case of
|
||||
* complex control flow, this graph has a cycles. To aid
|
||||
* traversal during liveness analysis, we have a visited?
|
||||
* boolean for passes to use as they see fit, provided they
|
||||
* clean up later */
|
||||
bool visited;
|
||||
} midgard_block;
|
||||
|
||||
typedef struct midgard_bundle {
|
||||
/* Tag for the overall bundle */
|
||||
int tag;
|
||||
|
||||
/* Instructions contained by the bundle */
|
||||
int instruction_count;
|
||||
midgard_instruction instructions[5];
|
||||
|
||||
/* Bundle-wide ALU configuration */
|
||||
int padding;
|
||||
int control;
|
||||
bool has_embedded_constants;
|
||||
float constants[4];
|
||||
bool has_blend_constant;
|
||||
|
||||
uint16_t register_words[8];
|
||||
int register_words_count;
|
||||
|
||||
uint64_t body_words[8];
|
||||
size_t body_size[8];
|
||||
int body_words_count;
|
||||
} midgard_bundle;
|
||||
|
||||
typedef struct compiler_context {
|
||||
nir_shader *nir;
|
||||
gl_shader_stage stage;
|
||||
|
||||
/* Is internally a blend shader? Depends on stage == FRAGMENT */
|
||||
bool is_blend;
|
||||
|
||||
/* Tracking for blend constant patching */
|
||||
int blend_constant_offset;
|
||||
|
||||
/* Current NIR function */
|
||||
nir_function *func;
|
||||
|
||||
/* Unordered list of midgard_blocks */
|
||||
int block_count;
|
||||
struct list_head blocks;
|
||||
|
||||
midgard_block *initial_block;
|
||||
midgard_block *previous_source_block;
|
||||
midgard_block *final_block;
|
||||
|
||||
/* List of midgard_instructions emitted for the current block */
|
||||
midgard_block *current_block;
|
||||
|
||||
/* The current "depth" of the loop, for disambiguating breaks/continues
|
||||
* when using nested loops */
|
||||
int current_loop_depth;
|
||||
|
||||
/* Constants which have been loaded, for later inlining */
|
||||
struct hash_table_u64 *ssa_constants;
|
||||
|
||||
/* SSA indices to be outputted to corresponding varying offset */
|
||||
struct hash_table_u64 *ssa_varyings;
|
||||
|
||||
/* SSA values / registers which have been aliased. Naively, these
|
||||
* demand a fmov output; instead, we alias them in a later pass to
|
||||
* avoid the wasted op.
|
||||
*
|
||||
* A note on encoding: to avoid dynamic memory management here, rather
|
||||
* than ampping to a pointer, we map to the source index; the key
|
||||
* itself is just the destination index. */
|
||||
|
||||
struct hash_table_u64 *ssa_to_alias;
|
||||
struct set *leftover_ssa_to_alias;
|
||||
|
||||
/* Actual SSA-to-register for RA */
|
||||
struct hash_table_u64 *ssa_to_register;
|
||||
|
||||
/* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
|
||||
struct hash_table_u64 *hash_to_temp;
|
||||
int temp_count;
|
||||
int max_hash;
|
||||
|
||||
/* Just the count of the max register used. Higher count => higher
|
||||
* register pressure */
|
||||
int work_registers;
|
||||
|
||||
/* Used for cont/last hinting. Increase when a tex op is added.
|
||||
* Decrease when a tex op is removed. */
|
||||
int texture_op_count;
|
||||
|
||||
/* Mapping of texture register -> SSA index for unaliasing */
|
||||
int texture_index[2];
|
||||
|
||||
/* If any path hits a discard instruction */
|
||||
bool can_discard;
|
||||
|
||||
/* The number of uniforms allowable for the fast path */
|
||||
int uniform_cutoff;
|
||||
|
||||
/* Count of instructions emitted from NIR overall, across all blocks */
|
||||
int instruction_count;
|
||||
|
||||
/* Alpha ref value passed in */
|
||||
float alpha_ref;
|
||||
|
||||
/* The index corresponding to the fragment output */
|
||||
unsigned fragment_output;
|
||||
|
||||
/* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
|
||||
unsigned sysvals[MAX_SYSVAL_COUNT];
|
||||
unsigned sysval_count;
|
||||
struct hash_table_u64 *sysval_to_id;
|
||||
} compiler_context;
|
||||
|
||||
/* Helpers for manipulating the above structures (forming the driver IR) */
|
||||
|
||||
/* Append instruction to end of current block */
|
||||
|
||||
static inline midgard_instruction *
|
||||
mir_upload_ins(struct midgard_instruction ins)
|
||||
{
|
||||
midgard_instruction *heap = malloc(sizeof(ins));
|
||||
memcpy(heap, &ins, sizeof(ins));
|
||||
return heap;
|
||||
}
|
||||
|
||||
static inline void
|
||||
emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
|
||||
{
|
||||
list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
|
||||
{
|
||||
list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mir_remove_instruction(struct midgard_instruction *ins)
|
||||
{
|
||||
list_del(&ins->link);
|
||||
}
|
||||
|
||||
static inline midgard_instruction*
|
||||
mir_prev_op(struct midgard_instruction *ins)
|
||||
{
|
||||
return list_last_entry(&(ins->link), midgard_instruction, link);
|
||||
}
|
||||
|
||||
static inline midgard_instruction*
|
||||
mir_next_op(struct midgard_instruction *ins)
|
||||
{
|
||||
return list_first_entry(&(ins->link), midgard_instruction, link);
|
||||
}
|
||||
|
||||
#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link)
|
||||
#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
|
||||
|
||||
#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link)
|
||||
#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link)
|
||||
#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link)
|
||||
|
||||
|
||||
static inline midgard_instruction *
|
||||
mir_last_in_block(struct midgard_block *block)
|
||||
{
|
||||
return list_last_entry(&block->instructions, struct midgard_instruction, link);
|
||||
}
|
||||
|
||||
static inline midgard_block *
|
||||
mir_get_block(compiler_context *ctx, int idx)
|
||||
{
|
||||
struct list_head *lst = &ctx->blocks;
|
||||
|
||||
while ((idx--) + 1)
|
||||
lst = lst->next;
|
||||
|
||||
return (struct midgard_block *) lst;
|
||||
}
|
||||
|
||||
/* MIR printing */
|
||||
|
||||
void mir_print_instruction(midgard_instruction *ins);
|
||||
void mir_print_block(midgard_block *block);
|
||||
void mir_print_shader(compiler_context *ctx);
|
||||
|
||||
/* Register allocation */
|
||||
|
||||
struct ra_graph;
|
||||
|
||||
struct ra_graph* allocate_registers(compiler_context *ctx);
|
||||
void install_registers(compiler_context *ctx, struct ra_graph *g);
|
||||
bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
|
||||
|
||||
#endif
|
||||
|
|
@ -31,6 +31,7 @@
|
|||
#include <string.h>
|
||||
#include "midgard.h"
|
||||
#include "midgard-parse.h"
|
||||
#include "midgard_ops.h"
|
||||
#include "disassemble.h"
|
||||
#include "helpers.h"
|
||||
#include "util/half_float.h"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
/* Author(s):
|
||||
* Alyssa Rosenzweig
|
||||
*
|
||||
* Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
|
||||
/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
@ -22,6 +19,9 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __MDG_HELPERS_H
|
||||
#define __MDG_HELPERS_H
|
||||
|
||||
#define OP_IS_STORE_VARY(op) (\
|
||||
op == midgard_op_st_vary_16 || \
|
||||
op == midgard_op_st_vary_32 \
|
||||
|
|
@ -150,140 +150,12 @@
|
|||
#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
|
||||
#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
|
||||
|
||||
/* Table of mapping opcodes to accompanying properties relevant to
|
||||
* scheduling/emission/etc */
|
||||
|
||||
static struct {
|
||||
struct mir_op_props {
|
||||
const char *name;
|
||||
unsigned props;
|
||||
} alu_opcode_props[256] = {
|
||||
[midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES},
|
||||
[midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
|
||||
[midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
|
||||
[midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
|
||||
[midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
|
||||
[midgard_alu_op_fround] = {"fround", UNITS_ADD},
|
||||
[midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD},
|
||||
[midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD},
|
||||
[midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD},
|
||||
[midgard_alu_op_fceil] = {"fceil", UNITS_ADD},
|
||||
[midgard_alu_op_ffma] = {"ffma", UNIT_VLUT},
|
||||
|
||||
/* Though they output a scalar, they need to run on a vector unit
|
||||
* since they process vectors */
|
||||
[midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
|
||||
[midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
|
||||
[midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
|
||||
|
||||
/* Incredibly, iadd can run on vmul, etc */
|
||||
[midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iabs] = {"iabs", UNITS_ADD},
|
||||
[midgard_alu_op_isub] = {"isub", UNITS_MOST},
|
||||
[midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES},
|
||||
[midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
|
||||
|
||||
/* For vector comparisons, use ball etc */
|
||||
[midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_fle] = {"fle", UNITS_MOST},
|
||||
[midgard_alu_op_flt] = {"flt", UNITS_MOST},
|
||||
[midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_ilt] = {"ilt", UNITS_MOST},
|
||||
[midgard_alu_op_ile] = {"ile", UNITS_MOST},
|
||||
[midgard_alu_op_ult] = {"ult", UNITS_MOST},
|
||||
[midgard_alu_op_ule] = {"ule", UNITS_MOST},
|
||||
|
||||
[midgard_alu_op_icsel] = {"icsel", UNITS_ADD},
|
||||
[midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD},
|
||||
[midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD},
|
||||
[midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL},
|
||||
|
||||
[midgard_alu_op_frcp] = {"frcp", UNIT_VLUT},
|
||||
[midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT},
|
||||
[midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT},
|
||||
[midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT},
|
||||
[midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT},
|
||||
[midgard_alu_op_flog2] = {"flog2", UNIT_VLUT},
|
||||
|
||||
[midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
|
||||
[midgard_alu_op_fsin] = {"fsin", UNIT_VLUT},
|
||||
[midgard_alu_op_fcos] = {"fcos", UNIT_VLUT},
|
||||
|
||||
/* XXX: Test case where it's right on smul but not sadd */
|
||||
[midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST},
|
||||
|
||||
[midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iclz] = {"iclz", UNITS_ADD},
|
||||
[midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD},
|
||||
[midgard_alu_op_inand] = {"inand", UNITS_MOST},
|
||||
[midgard_alu_op_ishl] = {"ishl", UNITS_ADD},
|
||||
[midgard_alu_op_iasr] = {"iasr", UNITS_ADD},
|
||||
[midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD},
|
||||
|
||||
[midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
|
||||
|
||||
/* These instructions are not yet emitted by the compiler, so
|
||||
* don't speculate about units yet */
|
||||
[midgard_alu_op_ishladd] = {"ishladd", 0},
|
||||
|
||||
[midgard_alu_op_uball_lt] = {"uball_lt", 0},
|
||||
[midgard_alu_op_uball_lte] = {"uball_lte", 0},
|
||||
[midgard_alu_op_iball_lt] = {"iball_lt", 0},
|
||||
[midgard_alu_op_iball_lte] = {"iball_lte", 0},
|
||||
[midgard_alu_op_ubany_lt] = {"ubany_lt", 0},
|
||||
[midgard_alu_op_ubany_lte] = {"ubany_lte", 0},
|
||||
[midgard_alu_op_ibany_lt] = {"ibany_lt", 0},
|
||||
[midgard_alu_op_ibany_lte] = {"ibany_lte", 0},
|
||||
|
||||
[midgard_alu_op_freduce] = {"freduce", 0},
|
||||
[midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES},
|
||||
[midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES},
|
||||
[midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0},
|
||||
[midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0},
|
||||
};
|
||||
|
||||
/* Is this opcode that of an integer (regardless of signedness)? Instruction
|
||||
* names authoritatively determine types */
|
||||
/* This file is common, so don't define the tables themselves. #include
|
||||
* midgard_op.h if you need that, or edit midgard_ops.c directly */
|
||||
|
||||
static inline bool
|
||||
midgard_is_integer_op(int op)
|
||||
{
|
||||
const char *name = alu_opcode_props[op].name;
|
||||
|
||||
if (!name)
|
||||
return false;
|
||||
|
||||
return (name[0] == 'i') || (name[0] == 'u');
|
||||
}
|
||||
|
||||
/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
|
||||
* conversion between int<->float in which case we do the opposite */
|
||||
|
||||
static inline bool
|
||||
midgard_is_integer_out_op(int op)
|
||||
{
|
||||
bool is_int = midgard_is_integer_op(op);
|
||||
bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
|
||||
|
||||
return is_int ^ is_conversion;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -536,54 +536,4 @@ __attribute__((__packed__))
|
|||
}
|
||||
midgard_texture_word;
|
||||
|
||||
static char *load_store_opcode_names[256] = {
|
||||
[midgard_op_st_cubemap_coords] = "st_cubemap_coords",
|
||||
[midgard_op_ld_global_id] = "ld_global_id",
|
||||
|
||||
[midgard_op_atomic_add] = "atomic_add",
|
||||
[midgard_op_atomic_and] = "atomic_and",
|
||||
[midgard_op_atomic_or] = "atomic_or",
|
||||
[midgard_op_atomic_xor] = "atomic_xor",
|
||||
[midgard_op_atomic_imin] = "atomic_imin",
|
||||
[midgard_op_atomic_umin] = "atomic_umin",
|
||||
[midgard_op_atomic_imax] = "atomic_imax",
|
||||
[midgard_op_atomic_umax] = "atomic_umax",
|
||||
[midgard_op_atomic_xchg] = "atomic_xchg",
|
||||
|
||||
[midgard_op_ld_char] = "ld_char",
|
||||
[midgard_op_ld_char2] = "ld_char2",
|
||||
[midgard_op_ld_short] = "ld_short",
|
||||
[midgard_op_ld_char4] = "ld_char4",
|
||||
[midgard_op_ld_short4] = "ld_short4",
|
||||
[midgard_op_ld_int4] = "ld_int4",
|
||||
|
||||
[midgard_op_ld_attr_32] = "ld_attr_32",
|
||||
[midgard_op_ld_attr_16] = "ld_attr_16",
|
||||
[midgard_op_ld_attr_32i] = "ld_attr_32i",
|
||||
|
||||
[midgard_op_ld_vary_32] = "ld_vary_32",
|
||||
[midgard_op_ld_vary_16] = "ld_vary_16",
|
||||
[midgard_op_ld_vary_32i] = "ld_vary_32i",
|
||||
|
||||
[midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
|
||||
|
||||
[midgard_op_ld_uniform_16] = "ld_uniform_16",
|
||||
[midgard_op_ld_uniform_32] = "ld_uniform_32",
|
||||
[midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
|
||||
|
||||
[midgard_op_st_char] = "st_char",
|
||||
[midgard_op_st_char2] = "st_char2",
|
||||
[midgard_op_st_char4] = "st_char4",
|
||||
[midgard_op_st_short4] = "st_short4",
|
||||
[midgard_op_st_int4] = "st_int4",
|
||||
|
||||
[midgard_op_st_vary_32] = "st_vary_32",
|
||||
[midgard_op_st_vary_16] = "st_vary_16",
|
||||
[midgard_op_st_vary_32i] = "st_vary_32i",
|
||||
|
||||
[midgard_op_st_image_f] = "st_image_f",
|
||||
[midgard_op_st_image_ui] = "st_image_ui",
|
||||
[midgard_op_st_image_i] = "st_image_i",
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
|
@ -36,7 +36,6 @@
|
|||
#include "main/imports.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "util/half_float.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/list.h"
|
||||
|
|
@ -45,7 +44,9 @@
|
|||
#include "midgard.h"
|
||||
#include "midgard_nir.h"
|
||||
#include "midgard_compile.h"
|
||||
#include "midgard_ops.h"
|
||||
#include "helpers.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include "disassemble.h"
|
||||
|
||||
|
|
@ -64,138 +65,12 @@ int midgard_debug = 0;
|
|||
fprintf(stderr, "%s:%d: "fmt, \
|
||||
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
|
||||
|
||||
/* Instruction arguments represented as block-local SSA indices, rather than
|
||||
* registers. Negative values mean unused. */
|
||||
|
||||
typedef struct {
|
||||
int src0;
|
||||
int src1;
|
||||
int dest;
|
||||
|
||||
/* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
|
||||
* in. Only valid for ALU ops. */
|
||||
bool inline_constant;
|
||||
} ssa_args;
|
||||
|
||||
/* Forward declare so midgard_branch can reference */
|
||||
struct midgard_block;
|
||||
|
||||
/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
|
||||
* the hardware), hence why that must be zero. TARGET_DISCARD signals this
|
||||
* instruction is actually a discard op. */
|
||||
|
||||
#define TARGET_GOTO 0
|
||||
#define TARGET_BREAK 1
|
||||
#define TARGET_CONTINUE 2
|
||||
#define TARGET_DISCARD 3
|
||||
|
||||
typedef struct midgard_branch {
|
||||
/* If conditional, the condition is specified in r31.w */
|
||||
bool conditional;
|
||||
|
||||
/* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */
|
||||
bool invert_conditional;
|
||||
|
||||
/* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
|
||||
unsigned target_type;
|
||||
|
||||
/* The actual target */
|
||||
union {
|
||||
int target_block;
|
||||
int target_break;
|
||||
int target_continue;
|
||||
};
|
||||
} midgard_branch;
|
||||
|
||||
static bool
|
||||
midgard_is_branch_unit(unsigned unit)
|
||||
{
|
||||
return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
|
||||
}
|
||||
|
||||
/* Generic in-memory data type repesenting a single logical instruction, rather
|
||||
* than a single instruction group. This is the preferred form for code gen.
|
||||
* Multiple midgard_insturctions will later be combined during scheduling,
|
||||
* though this is not represented in this structure. Its format bridges
|
||||
* the low-level binary representation with the higher level semantic meaning.
|
||||
*
|
||||
* Notably, it allows registers to be specified as block local SSA, for code
|
||||
* emitted before the register allocation pass.
|
||||
*/
|
||||
|
||||
typedef struct midgard_instruction {
|
||||
/* Must be first for casting */
|
||||
struct list_head link;
|
||||
|
||||
unsigned type; /* ALU, load/store, texture */
|
||||
|
||||
/* If the register allocator has not run yet... */
|
||||
ssa_args ssa_args;
|
||||
|
||||
/* Special fields for an ALU instruction */
|
||||
midgard_reg_info registers;
|
||||
|
||||
/* I.e. (1 << alu_bit) */
|
||||
int unit;
|
||||
|
||||
/* When emitting bundle, should this instruction have a break forced
|
||||
* before it? Used for r31 writes which are valid only within a single
|
||||
* bundle and *need* to happen as early as possible... this is a hack,
|
||||
* TODO remove when we have a scheduler */
|
||||
bool precede_break;
|
||||
|
||||
bool has_constants;
|
||||
float constants[4];
|
||||
uint16_t inline_constant;
|
||||
bool has_blend_constant;
|
||||
|
||||
bool compact_branch;
|
||||
bool writeout;
|
||||
bool prepacked_branch;
|
||||
|
||||
union {
|
||||
midgard_load_store_word load_store;
|
||||
midgard_vector_alu alu;
|
||||
midgard_texture_word texture;
|
||||
midgard_branch_extended branch_extended;
|
||||
uint16_t br_compact;
|
||||
|
||||
/* General branch, rather than packed br_compact. Higher level
|
||||
* than the other components */
|
||||
midgard_branch branch;
|
||||
};
|
||||
} midgard_instruction;
|
||||
|
||||
typedef struct midgard_block {
|
||||
/* Link to next block. Must be first for mir_get_block */
|
||||
struct list_head link;
|
||||
|
||||
/* List of midgard_instructions emitted for the current block */
|
||||
struct list_head instructions;
|
||||
|
||||
bool is_scheduled;
|
||||
|
||||
/* List of midgard_bundles emitted (after the scheduler has run) */
|
||||
struct util_dynarray bundles;
|
||||
|
||||
/* Number of quadwords _actually_ emitted, as determined after scheduling */
|
||||
unsigned quadword_count;
|
||||
|
||||
/* Successors: always one forward (the block after us), maybe
|
||||
* one backwards (for a backward branch). No need for a second
|
||||
* forward, since graph traversal would get there eventually
|
||||
* anyway */
|
||||
struct midgard_block *successors[2];
|
||||
unsigned nr_successors;
|
||||
|
||||
/* The successors pointer form a graph, and in the case of
|
||||
* complex control flow, this graph has a cycles. To aid
|
||||
* traversal during liveness analysis, we have a visited?
|
||||
* boolean for passes to use as they see fit, provided they
|
||||
* clean up later */
|
||||
bool visited;
|
||||
} midgard_block;
|
||||
|
||||
static void
|
||||
midgard_block_add_successor(midgard_block *block, midgard_block *successor)
|
||||
{
|
||||
|
|
@ -404,267 +279,6 @@ midgard_create_branch_extended( midgard_condition cond,
|
|||
return branch;
|
||||
}
|
||||
|
||||
typedef struct midgard_bundle {
|
||||
/* Tag for the overall bundle */
|
||||
int tag;
|
||||
|
||||
/* Instructions contained by the bundle */
|
||||
int instruction_count;
|
||||
midgard_instruction instructions[5];
|
||||
|
||||
/* Bundle-wide ALU configuration */
|
||||
int padding;
|
||||
int control;
|
||||
bool has_embedded_constants;
|
||||
float constants[4];
|
||||
bool has_blend_constant;
|
||||
|
||||
uint16_t register_words[8];
|
||||
int register_words_count;
|
||||
|
||||
uint64_t body_words[8];
|
||||
size_t body_size[8];
|
||||
int body_words_count;
|
||||
} midgard_bundle;
|
||||
|
||||
typedef struct compiler_context {
|
||||
nir_shader *nir;
|
||||
gl_shader_stage stage;
|
||||
|
||||
/* Is internally a blend shader? Depends on stage == FRAGMENT */
|
||||
bool is_blend;
|
||||
|
||||
/* Tracking for blend constant patching */
|
||||
int blend_constant_offset;
|
||||
|
||||
/* Current NIR function */
|
||||
nir_function *func;
|
||||
|
||||
/* Unordered list of midgard_blocks */
|
||||
int block_count;
|
||||
struct list_head blocks;
|
||||
|
||||
midgard_block *initial_block;
|
||||
midgard_block *previous_source_block;
|
||||
midgard_block *final_block;
|
||||
|
||||
/* List of midgard_instructions emitted for the current block */
|
||||
midgard_block *current_block;
|
||||
|
||||
/* The current "depth" of the loop, for disambiguating breaks/continues
|
||||
* when using nested loops */
|
||||
int current_loop_depth;
|
||||
|
||||
/* Constants which have been loaded, for later inlining */
|
||||
struct hash_table_u64 *ssa_constants;
|
||||
|
||||
/* SSA indices to be outputted to corresponding varying offset */
|
||||
struct hash_table_u64 *ssa_varyings;
|
||||
|
||||
/* SSA values / registers which have been aliased. Naively, these
|
||||
* demand a fmov output; instead, we alias them in a later pass to
|
||||
* avoid the wasted op.
|
||||
*
|
||||
* A note on encoding: to avoid dynamic memory management here, rather
|
||||
* than ampping to a pointer, we map to the source index; the key
|
||||
* itself is just the destination index. */
|
||||
|
||||
struct hash_table_u64 *ssa_to_alias;
|
||||
struct set *leftover_ssa_to_alias;
|
||||
|
||||
/* Actual SSA-to-register for RA */
|
||||
struct hash_table_u64 *ssa_to_register;
|
||||
|
||||
/* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
|
||||
struct hash_table_u64 *hash_to_temp;
|
||||
int temp_count;
|
||||
int max_hash;
|
||||
|
||||
/* Just the count of the max register used. Higher count => higher
|
||||
* register pressure */
|
||||
int work_registers;
|
||||
|
||||
/* Used for cont/last hinting. Increase when a tex op is added.
|
||||
* Decrease when a tex op is removed. */
|
||||
int texture_op_count;
|
||||
|
||||
/* Mapping of texture register -> SSA index for unaliasing */
|
||||
int texture_index[2];
|
||||
|
||||
/* If any path hits a discard instruction */
|
||||
bool can_discard;
|
||||
|
||||
/* The number of uniforms allowable for the fast path */
|
||||
int uniform_cutoff;
|
||||
|
||||
/* Count of instructions emitted from NIR overall, across all blocks */
|
||||
int instruction_count;
|
||||
|
||||
/* Alpha ref value passed in */
|
||||
float alpha_ref;
|
||||
|
||||
/* The index corresponding to the fragment output */
|
||||
unsigned fragment_output;
|
||||
|
||||
/* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
|
||||
unsigned sysvals[MAX_SYSVAL_COUNT];
|
||||
unsigned sysval_count;
|
||||
struct hash_table_u64 *sysval_to_id;
|
||||
} compiler_context;
|
||||
|
||||
/* Append instruction to end of current block */
|
||||
|
||||
static midgard_instruction *
|
||||
mir_upload_ins(struct midgard_instruction ins)
|
||||
{
|
||||
midgard_instruction *heap = malloc(sizeof(ins));
|
||||
memcpy(heap, &ins, sizeof(ins));
|
||||
return heap;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
|
||||
{
|
||||
list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
|
||||
}
|
||||
|
||||
static void
|
||||
mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
|
||||
{
|
||||
list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
|
||||
}
|
||||
|
||||
static void
|
||||
mir_remove_instruction(struct midgard_instruction *ins)
|
||||
{
|
||||
list_del(&ins->link);
|
||||
}
|
||||
|
||||
static midgard_instruction*
|
||||
mir_prev_op(struct midgard_instruction *ins)
|
||||
{
|
||||
return list_last_entry(&(ins->link), midgard_instruction, link);
|
||||
}
|
||||
|
||||
static midgard_instruction*
|
||||
mir_next_op(struct midgard_instruction *ins)
|
||||
{
|
||||
return list_first_entry(&(ins->link), midgard_instruction, link);
|
||||
}
|
||||
|
||||
#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link)
|
||||
#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
|
||||
|
||||
#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link)
|
||||
#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link)
|
||||
#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link)
|
||||
#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link)
|
||||
|
||||
|
||||
static midgard_instruction *
|
||||
mir_last_in_block(struct midgard_block *block)
|
||||
{
|
||||
return list_last_entry(&block->instructions, struct midgard_instruction, link);
|
||||
}
|
||||
|
||||
static midgard_block *
|
||||
mir_get_block(compiler_context *ctx, int idx)
|
||||
{
|
||||
struct list_head *lst = &ctx->blocks;
|
||||
|
||||
while ((idx--) + 1)
|
||||
lst = lst->next;
|
||||
|
||||
return (struct midgard_block *) lst;
|
||||
}
|
||||
|
||||
/* Pretty printer for internal Midgard IR */
|
||||
|
||||
static void
|
||||
print_mir_source(int source)
|
||||
{
|
||||
if (source >= SSA_FIXED_MINIMUM) {
|
||||
/* Specific register */
|
||||
int reg = SSA_REG_FROM_FIXED(source);
|
||||
|
||||
/* TODO: Moving threshold */
|
||||
if (reg > 16 && reg < 24)
|
||||
printf("u%d", 23 - reg);
|
||||
else
|
||||
printf("r%d", reg);
|
||||
} else {
|
||||
printf("%d", source);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_mir_instruction(midgard_instruction *ins)
|
||||
{
|
||||
printf("\t");
|
||||
|
||||
switch (ins->type) {
|
||||
case TAG_ALU_4: {
|
||||
midgard_alu_op op = ins->alu.op;
|
||||
const char *name = alu_opcode_props[op].name;
|
||||
|
||||
if (ins->unit)
|
||||
printf("%d.", ins->unit);
|
||||
|
||||
printf("%s", name ? name : "??");
|
||||
break;
|
||||
}
|
||||
|
||||
case TAG_LOAD_STORE_4: {
|
||||
midgard_load_store_op op = ins->load_store.op;
|
||||
const char *name = load_store_opcode_names[op];
|
||||
|
||||
assert(name);
|
||||
printf("%s", name);
|
||||
break;
|
||||
}
|
||||
|
||||
case TAG_TEXTURE_4: {
|
||||
printf("texture");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
ssa_args *args = &ins->ssa_args;
|
||||
|
||||
printf(" %d, ", args->dest);
|
||||
|
||||
print_mir_source(args->src0);
|
||||
printf(", ");
|
||||
|
||||
if (args->inline_constant)
|
||||
printf("#%d", ins->inline_constant);
|
||||
else
|
||||
print_mir_source(args->src1);
|
||||
|
||||
if (ins->has_constants)
|
||||
printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
print_mir_block(midgard_block *block)
|
||||
{
|
||||
printf("{\n");
|
||||
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
print_mir_instruction(ins);
|
||||
}
|
||||
|
||||
printf("}\n");
|
||||
}
|
||||
|
||||
static void
|
||||
attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name)
|
||||
{
|
||||
|
|
@ -975,26 +589,6 @@ effective_writemask(midgard_vector_alu *alu)
|
|||
return squeeze_writemask(alu->mask);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
find_or_allocate_temp(compiler_context *ctx, unsigned hash)
|
||||
{
|
||||
if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
|
||||
return hash;
|
||||
|
||||
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
|
||||
|
||||
if (temp)
|
||||
return temp - 1;
|
||||
|
||||
/* If no temp is find, allocate one */
|
||||
temp = ctx->temp_count++;
|
||||
ctx->max_hash = MAX2(ctx->max_hash, hash);
|
||||
|
||||
_mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
nir_src_index(compiler_context *ctx, nir_src *src)
|
||||
{
|
||||
|
|
@ -1983,338 +1577,6 @@ emit_instr(compiler_context *ctx, struct nir_instr *instr)
|
|||
}
|
||||
}
|
||||
|
||||
/* Determine the actual hardware from the index based on the RA results or special values */
|
||||
|
||||
static int
|
||||
dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
|
||||
{
|
||||
if (reg >= SSA_FIXED_MINIMUM)
|
||||
return SSA_REG_FROM_FIXED(reg);
|
||||
|
||||
if (reg >= 0) {
|
||||
assert(reg < maxreg);
|
||||
assert(g);
|
||||
int r = ra_get_node_reg(g, reg);
|
||||
ctx->work_registers = MAX2(ctx->work_registers, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
switch (reg) {
|
||||
/* fmov style unused */
|
||||
case SSA_UNUSED_0:
|
||||
return REGISTER_UNUSED;
|
||||
|
||||
/* lut style unused */
|
||||
case SSA_UNUSED_1:
|
||||
return REGISTER_UNUSED;
|
||||
|
||||
default:
|
||||
DBG("Unknown SSA register alias %d\n", reg);
|
||||
assert(0);
|
||||
return 31;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
|
||||
{
|
||||
/* Choose the first available register to minimise reported register pressure */
|
||||
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if (BITSET_TEST(regs, i)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
midgard_is_live_in_instr(midgard_instruction *ins, int src)
|
||||
{
|
||||
if (ins->ssa_args.src0 == src) return true;
|
||||
if (ins->ssa_args.src1 == src) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Determine if a variable is live in the successors of a block */
|
||||
static bool
|
||||
is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
|
||||
{
|
||||
for (unsigned i = 0; i < bl->nr_successors; ++i) {
|
||||
midgard_block *succ = bl->successors[i];
|
||||
|
||||
/* If we already visited, the value we're seeking
|
||||
* isn't down this path (or we would have short
|
||||
* circuited */
|
||||
|
||||
if (succ->visited) continue;
|
||||
|
||||
/* Otherwise (it's visited *now*), check the block */
|
||||
|
||||
succ->visited = true;
|
||||
|
||||
mir_foreach_instr_in_block(succ, ins) {
|
||||
if (midgard_is_live_in_instr(ins, src))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* ...and also, check *its* successors */
|
||||
if (is_live_after_successors(ctx, succ, src))
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
/* Welp. We're really not live. */
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
|
||||
{
|
||||
/* Check the rest of the block for liveness */
|
||||
|
||||
mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
|
||||
if (midgard_is_live_in_instr(ins, src))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check the rest of the blocks for liveness recursively */
|
||||
|
||||
bool succ = is_live_after_successors(ctx, block, src);
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
block->visited = false;
|
||||
}
|
||||
|
||||
return succ;
|
||||
}
|
||||
|
||||
/* Once registers have been decided via register allocation
|
||||
* (allocate_registers), we need to rewrite the MIR to use registers instead of
|
||||
* SSA */
|
||||
|
||||
static void
|
||||
install_registers(compiler_context *ctx, struct ra_graph *g)
|
||||
{
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
ssa_args args = ins->ssa_args;
|
||||
|
||||
switch (ins->type) {
|
||||
case TAG_ALU_4:
|
||||
ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
|
||||
|
||||
ins->registers.src2_imm = args.inline_constant;
|
||||
|
||||
if (args.inline_constant) {
|
||||
/* Encode inline 16-bit constant as a vector by default */
|
||||
|
||||
ins->registers.src2_reg = ins->inline_constant >> 11;
|
||||
|
||||
int lower_11 = ins->inline_constant & ((1 << 12) - 1);
|
||||
|
||||
uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
|
||||
ins->alu.src2 = imm << 2;
|
||||
} else {
|
||||
ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
|
||||
}
|
||||
|
||||
ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
|
||||
|
||||
break;
|
||||
|
||||
case TAG_LOAD_STORE_4: {
|
||||
if (OP_IS_STORE_VARY(ins->load_store.op)) {
|
||||
/* TODO: use ssa_args for st_vary */
|
||||
ins->load_store.reg = 0;
|
||||
} else {
|
||||
bool has_dest = args.dest >= 0;
|
||||
int ssa_arg = has_dest ? args.dest : args.src0;
|
||||
|
||||
ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* This routine performs the actual register allocation. It should be succeeded
|
||||
* by install_registers */
|
||||
|
||||
static struct ra_graph *
|
||||
allocate_registers(compiler_context *ctx)
|
||||
{
|
||||
/* First, initialize the RA */
|
||||
struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
|
||||
|
||||
/* Create a primary (general purpose) class, as well as special purpose
|
||||
* pipeline register classes */
|
||||
|
||||
int primary_class = ra_alloc_reg_class(regs);
|
||||
int varying_class = ra_alloc_reg_class(regs);
|
||||
|
||||
/* Add the full set of work registers */
|
||||
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
|
||||
for (int i = 0; i < work_count; ++i)
|
||||
ra_class_add_reg(regs, primary_class, i);
|
||||
|
||||
/* Add special registers */
|
||||
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
|
||||
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
|
||||
|
||||
/* We're done setting up */
|
||||
ra_set_finalize(regs, NULL);
|
||||
|
||||
/* Transform the MIR into squeezed index form */
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
|
||||
ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
|
||||
ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
|
||||
}
|
||||
if (midgard_debug & MIDGARD_DBG_SHADERS)
|
||||
print_mir_block(block);
|
||||
}
|
||||
|
||||
/* No register allocation to do with no SSA */
|
||||
|
||||
if (!ctx->temp_count)
|
||||
return NULL;
|
||||
|
||||
/* Let's actually do register allocation */
|
||||
int nodes = ctx->temp_count;
|
||||
struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
|
||||
|
||||
/* Set everything to the work register class, unless it has somewhere
|
||||
* special to go */
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
if (ins->ssa_args.dest < 0) continue;
|
||||
|
||||
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
|
||||
|
||||
int class = primary_class;
|
||||
|
||||
ra_set_node_class(g, ins->ssa_args.dest, class);
|
||||
}
|
||||
}
|
||||
|
||||
for (int index = 0; index <= ctx->max_hash; ++index) {
|
||||
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
|
||||
|
||||
if (temp) {
|
||||
unsigned reg = temp - 1;
|
||||
int t = find_or_allocate_temp(ctx, index);
|
||||
ra_set_node_reg(g, t, reg);
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine liveness */
|
||||
|
||||
int *live_start = malloc(nodes * sizeof(int));
|
||||
int *live_end = malloc(nodes * sizeof(int));
|
||||
|
||||
/* Initialize as non-existent */
|
||||
|
||||
for (int i = 0; i < nodes; ++i) {
|
||||
live_start[i] = live_end[i] = -1;
|
||||
}
|
||||
|
||||
int d = 0;
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
/* Dest is < 0 for st_vary instructions, which break
|
||||
* the usual SSA conventions. Liveness analysis doesn't
|
||||
* make sense on these instructions, so skip them to
|
||||
* avoid memory corruption */
|
||||
|
||||
if (ins->ssa_args.dest < 0) continue;
|
||||
|
||||
if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
|
||||
/* If this destination is not yet live, it is now since we just wrote it */
|
||||
|
||||
int dest = ins->ssa_args.dest;
|
||||
|
||||
if (live_start[dest] == -1)
|
||||
live_start[dest] = d;
|
||||
}
|
||||
|
||||
/* Since we just used a source, the source might be
|
||||
* dead now. Scan the rest of the block for
|
||||
* invocations, and if there are none, the source dies
|
||||
* */
|
||||
|
||||
int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
|
||||
|
||||
for (int src = 0; src < 2; ++src) {
|
||||
int s = sources[src];
|
||||
|
||||
if (s < 0) continue;
|
||||
|
||||
if (s >= SSA_FIXED_MINIMUM) continue;
|
||||
|
||||
if (!is_live_after(ctx, block, ins, s)) {
|
||||
live_end[s] = d;
|
||||
}
|
||||
}
|
||||
|
||||
++d;
|
||||
}
|
||||
}
|
||||
|
||||
/* If a node still hasn't been killed, kill it now */
|
||||
|
||||
for (int i = 0; i < nodes; ++i) {
|
||||
/* live_start == -1 most likely indicates a pinned output */
|
||||
|
||||
if (live_end[i] == -1)
|
||||
live_end[i] = d;
|
||||
}
|
||||
|
||||
/* Setup interference between nodes that are live at the same time */
|
||||
|
||||
for (int i = 0; i < nodes; ++i) {
|
||||
for (int j = i + 1; j < nodes; ++j) {
|
||||
if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
|
||||
ra_add_node_interference(g, i, j);
|
||||
}
|
||||
}
|
||||
|
||||
ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
|
||||
|
||||
if (!ra_allocate(g)) {
|
||||
DBG("Error allocating registers\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
free(live_start);
|
||||
free(live_end);
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
/* Midgard IR only knows vector ALU types, but we sometimes need to actually
|
||||
* use scalar ALU instructions, for functional or performance reasons. To do
|
||||
* this, we just demote vector ALU payloads to scalar. */
|
||||
|
|
@ -3247,7 +2509,7 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block)
|
|||
|
||||
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
|
||||
if (midgard_is_pinned(ctx, ins->ssa_args.dest)) continue;
|
||||
if (is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
|
||||
if (mir_is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
|
||||
|
||||
mir_remove_instruction(ins);
|
||||
progress = true;
|
||||
|
|
|
|||
92
src/gallium/drivers/panfrost/midgard/midgard_liveness.c
Normal file
92
src/gallium/drivers/panfrost/midgard/midgard_liveness.c
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
/* mir_is_live_after performs liveness analysis on the MIR, used primarily
|
||||
* as part of register allocation. TODO: Algorithmic improvements for
|
||||
* compiler performance (this is the worst algorithm possible -- see
|
||||
* backlog with Connor on IRC) */
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
static bool
|
||||
midgard_is_live_in_instr(midgard_instruction *ins, int src)
|
||||
{
|
||||
if (ins->ssa_args.src0 == src) return true;
|
||||
if (ins->ssa_args.src1 == src) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Determine if a variable is live in the successors of a block */
|
||||
static bool
|
||||
is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
|
||||
{
|
||||
for (unsigned i = 0; i < bl->nr_successors; ++i) {
|
||||
midgard_block *succ = bl->successors[i];
|
||||
|
||||
/* If we already visited, the value we're seeking
|
||||
* isn't down this path (or we would have short
|
||||
* circuited */
|
||||
|
||||
if (succ->visited) continue;
|
||||
|
||||
/* Otherwise (it's visited *now*), check the block */
|
||||
|
||||
succ->visited = true;
|
||||
|
||||
mir_foreach_instr_in_block(succ, ins) {
|
||||
if (midgard_is_live_in_instr(ins, src))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* ...and also, check *its* successors */
|
||||
if (is_live_after_successors(ctx, succ, src))
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
/* Welp. We're really not live. */
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
|
||||
{
|
||||
/* Check the rest of the block for liveness */
|
||||
|
||||
mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
|
||||
if (midgard_is_live_in_instr(ins, src))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check the rest of the blocks for liveness recursively */
|
||||
|
||||
bool succ = is_live_after_successors(ctx, block, src);
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
block->visited = false;
|
||||
}
|
||||
|
||||
return succ;
|
||||
}
|
||||
188
src/gallium/drivers/panfrost/midgard/midgard_ops.c
Normal file
188
src/gallium/drivers/panfrost/midgard/midgard_ops.c
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "midgard.h"
|
||||
|
||||
/* Include the definitions of the macros and such */
|
||||
|
||||
#define MIDGARD_OPS_TABLE
|
||||
#include "helpers.h"
|
||||
#undef MIDGARD_OPS_TABLE
|
||||
|
||||
/* Table of mapping opcodes to accompanying properties. This is used for both
|
||||
* the disassembler and the compiler. It is placed in a .c file like this to
|
||||
* avoid duplications in the binary */
|
||||
|
||||
struct mir_op_props alu_opcode_props[256] = {
|
||||
[midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES},
|
||||
[midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
|
||||
[midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
|
||||
[midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
|
||||
[midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
|
||||
[midgard_alu_op_fround] = {"fround", UNITS_ADD},
|
||||
[midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD},
|
||||
[midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD},
|
||||
[midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD},
|
||||
[midgard_alu_op_fceil] = {"fceil", UNITS_ADD},
|
||||
[midgard_alu_op_ffma] = {"ffma", UNIT_VLUT},
|
||||
|
||||
/* Though they output a scalar, they need to run on a vector unit
|
||||
* since they process vectors */
|
||||
[midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
|
||||
[midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
|
||||
[midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
|
||||
|
||||
/* Incredibly, iadd can run on vmul, etc */
|
||||
[midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iabs] = {"iabs", UNITS_ADD},
|
||||
[midgard_alu_op_isub] = {"isub", UNITS_MOST},
|
||||
[midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES},
|
||||
[midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
|
||||
|
||||
/* For vector comparisons, use ball etc */
|
||||
[midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_fle] = {"fle", UNITS_MOST},
|
||||
[midgard_alu_op_flt] = {"flt", UNITS_MOST},
|
||||
[midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_ilt] = {"ilt", UNITS_MOST},
|
||||
[midgard_alu_op_ile] = {"ile", UNITS_MOST},
|
||||
[midgard_alu_op_ult] = {"ult", UNITS_MOST},
|
||||
[midgard_alu_op_ule] = {"ule", UNITS_MOST},
|
||||
|
||||
[midgard_alu_op_icsel] = {"icsel", UNITS_ADD},
|
||||
[midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD},
|
||||
[midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD},
|
||||
[midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL},
|
||||
|
||||
[midgard_alu_op_frcp] = {"frcp", UNIT_VLUT},
|
||||
[midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT},
|
||||
[midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT},
|
||||
[midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT},
|
||||
[midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT},
|
||||
[midgard_alu_op_flog2] = {"flog2", UNIT_VLUT},
|
||||
|
||||
[midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
[midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
|
||||
|
||||
[midgard_alu_op_fsin] = {"fsin", UNIT_VLUT},
|
||||
[midgard_alu_op_fcos] = {"fcos", UNIT_VLUT},
|
||||
|
||||
/* XXX: Test case where it's right on smul but not sadd */
|
||||
[midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST},
|
||||
|
||||
[midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES},
|
||||
[midgard_alu_op_iclz] = {"iclz", UNITS_ADD},
|
||||
[midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD},
|
||||
[midgard_alu_op_inand] = {"inand", UNITS_MOST},
|
||||
[midgard_alu_op_ishl] = {"ishl", UNITS_ADD},
|
||||
[midgard_alu_op_iasr] = {"iasr", UNITS_ADD},
|
||||
[midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD},
|
||||
|
||||
[midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
|
||||
[midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
|
||||
|
||||
/* These instructions are not yet emitted by the compiler, so
|
||||
* don't speculate about units yet */
|
||||
[midgard_alu_op_ishladd] = {"ishladd", 0},
|
||||
|
||||
[midgard_alu_op_uball_lt] = {"uball_lt", 0},
|
||||
[midgard_alu_op_uball_lte] = {"uball_lte", 0},
|
||||
[midgard_alu_op_iball_lt] = {"iball_lt", 0},
|
||||
[midgard_alu_op_iball_lte] = {"iball_lte", 0},
|
||||
[midgard_alu_op_ubany_lt] = {"ubany_lt", 0},
|
||||
[midgard_alu_op_ubany_lte] = {"ubany_lte", 0},
|
||||
[midgard_alu_op_ibany_lt] = {"ibany_lt", 0},
|
||||
[midgard_alu_op_ibany_lte] = {"ibany_lte", 0},
|
||||
|
||||
[midgard_alu_op_freduce] = {"freduce", 0},
|
||||
[midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES},
|
||||
[midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES},
|
||||
[midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0},
|
||||
[midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0},
|
||||
};
|
||||
|
||||
const char *load_store_opcode_names[256] = {
|
||||
[midgard_op_st_cubemap_coords] = "st_cubemap_coords",
|
||||
[midgard_op_ld_global_id] = "ld_global_id",
|
||||
|
||||
[midgard_op_atomic_add] = "atomic_add",
|
||||
[midgard_op_atomic_and] = "atomic_and",
|
||||
[midgard_op_atomic_or] = "atomic_or",
|
||||
[midgard_op_atomic_xor] = "atomic_xor",
|
||||
[midgard_op_atomic_imin] = "atomic_imin",
|
||||
[midgard_op_atomic_umin] = "atomic_umin",
|
||||
[midgard_op_atomic_imax] = "atomic_imax",
|
||||
[midgard_op_atomic_umax] = "atomic_umax",
|
||||
[midgard_op_atomic_xchg] = "atomic_xchg",
|
||||
|
||||
[midgard_op_ld_char] = "ld_char",
|
||||
[midgard_op_ld_char2] = "ld_char2",
|
||||
[midgard_op_ld_short] = "ld_short",
|
||||
[midgard_op_ld_char4] = "ld_char4",
|
||||
[midgard_op_ld_short4] = "ld_short4",
|
||||
[midgard_op_ld_int4] = "ld_int4",
|
||||
|
||||
[midgard_op_ld_attr_32] = "ld_attr_32",
|
||||
[midgard_op_ld_attr_16] = "ld_attr_16",
|
||||
[midgard_op_ld_attr_32i] = "ld_attr_32i",
|
||||
|
||||
[midgard_op_ld_vary_32] = "ld_vary_32",
|
||||
[midgard_op_ld_vary_16] = "ld_vary_16",
|
||||
[midgard_op_ld_vary_32i] = "ld_vary_32i",
|
||||
|
||||
[midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
|
||||
|
||||
[midgard_op_ld_uniform_16] = "ld_uniform_16",
|
||||
[midgard_op_ld_uniform_32] = "ld_uniform_32",
|
||||
[midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
|
||||
|
||||
[midgard_op_st_char] = "st_char",
|
||||
[midgard_op_st_char2] = "st_char2",
|
||||
[midgard_op_st_char4] = "st_char4",
|
||||
[midgard_op_st_short4] = "st_short4",
|
||||
[midgard_op_st_int4] = "st_int4",
|
||||
|
||||
[midgard_op_st_vary_32] = "st_vary_32",
|
||||
[midgard_op_st_vary_16] = "st_vary_16",
|
||||
[midgard_op_st_vary_32i] = "st_vary_32i",
|
||||
|
||||
[midgard_op_st_image_f] = "st_image_f",
|
||||
[midgard_op_st_image_ui] = "st_image_ui",
|
||||
[midgard_op_st_image_i] = "st_image_i",
|
||||
};
|
||||
53
src/gallium/drivers/panfrost/midgard/midgard_ops.h
Normal file
53
src/gallium/drivers/panfrost/midgard/midgard_ops.h
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "helpers.h"
|
||||
|
||||
/* Forward declare */
|
||||
|
||||
extern struct mir_op_props alu_opcode_props[256];
|
||||
extern const char *load_store_opcode_names[256];
|
||||
|
||||
/* Is this opcode that of an integer (regardless of signedness)? Instruction
|
||||
* names authoritatively determine types */
|
||||
|
||||
static inline bool
|
||||
midgard_is_integer_op(int op)
|
||||
{
|
||||
const char *name = alu_opcode_props[op].name;
|
||||
|
||||
if (!name)
|
||||
return false;
|
||||
|
||||
return (name[0] == 'i') || (name[0] == 'u');
|
||||
}
|
||||
|
||||
/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
|
||||
* conversion between int<->float in which case we do the opposite */
|
||||
|
||||
static inline bool
|
||||
midgard_is_integer_out_op(int op)
|
||||
{
|
||||
bool is_int = midgard_is_integer_op(op);
|
||||
bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
|
||||
|
||||
return is_int ^ is_conversion;
|
||||
}
|
||||
124
src/gallium/drivers/panfrost/midgard/midgard_print.c
Normal file
124
src/gallium/drivers/panfrost/midgard/midgard_print.c
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "helpers.h"
|
||||
#include "midgard_ops.h"
|
||||
|
||||
/* Pretty printer for Midgard IR, for use debugging compiler-internal
|
||||
* passes like register allocation. The output superficially resembles
|
||||
* Midgard assembly, with the exception that unit information and such is
|
||||
* (normally) omitted, and generic indices are usually used instead of
|
||||
* registers */
|
||||
|
||||
static void
|
||||
mir_print_source(int source)
|
||||
{
|
||||
if (source >= SSA_FIXED_MINIMUM) {
|
||||
/* Specific register */
|
||||
int reg = SSA_REG_FROM_FIXED(source);
|
||||
|
||||
/* TODO: Moving threshold */
|
||||
if (reg > 16 && reg < 24)
|
||||
printf("u%d", 23 - reg);
|
||||
else
|
||||
printf("r%d", reg);
|
||||
} else {
|
||||
printf("%d", source);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
mir_print_instruction(midgard_instruction *ins)
|
||||
{
|
||||
printf("\t");
|
||||
|
||||
switch (ins->type) {
|
||||
case TAG_ALU_4: {
|
||||
midgard_alu_op op = ins->alu.op;
|
||||
const char *name = alu_opcode_props[op].name;
|
||||
|
||||
if (ins->unit)
|
||||
printf("%d.", ins->unit);
|
||||
|
||||
printf("%s", name ? name : "??");
|
||||
break;
|
||||
}
|
||||
|
||||
case TAG_LOAD_STORE_4: {
|
||||
midgard_load_store_op op = ins->load_store.op;
|
||||
const char *name = load_store_opcode_names[op];
|
||||
|
||||
assert(name);
|
||||
printf("%s", name);
|
||||
break;
|
||||
}
|
||||
|
||||
case TAG_TEXTURE_4: {
|
||||
printf("texture");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
ssa_args *args = &ins->ssa_args;
|
||||
|
||||
printf(" %d, ", args->dest);
|
||||
|
||||
mir_print_source(args->src0);
|
||||
printf(", ");
|
||||
|
||||
if (args->inline_constant)
|
||||
printf("#%d", ins->inline_constant);
|
||||
else
|
||||
mir_print_source(args->src1);
|
||||
|
||||
if (ins->has_constants)
|
||||
printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Dumps MIR for a block or entire shader respective */
|
||||
|
||||
void
|
||||
mir_print_block(midgard_block *block)
|
||||
{
|
||||
printf("{\n");
|
||||
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
mir_print_instruction(ins);
|
||||
}
|
||||
|
||||
printf("}\n");
|
||||
}
|
||||
|
||||
void
|
||||
mir_print_shader(compiler_context *ctx)
|
||||
{
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_print_block(block);
|
||||
}
|
||||
}
|
||||
310
src/gallium/drivers/panfrost/midgard/midgard_ra.c
Normal file
310
src/gallium/drivers/panfrost/midgard/midgard_ra.c
Normal file
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "util/register_allocate.h"
|
||||
|
||||
/* When we're 'squeezing down' the values in the IR, we maintain a hash
|
||||
* as such */
|
||||
|
||||
static unsigned
|
||||
find_or_allocate_temp(compiler_context *ctx, unsigned hash)
|
||||
{
|
||||
if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
|
||||
return hash;
|
||||
|
||||
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
|
||||
|
||||
if (temp)
|
||||
return temp - 1;
|
||||
|
||||
/* If no temp is find, allocate one */
|
||||
temp = ctx->temp_count++;
|
||||
ctx->max_hash = MAX2(ctx->max_hash, hash);
|
||||
|
||||
_mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* Callback for register allocation selection, trivial default for now */
|
||||
|
||||
static unsigned int
|
||||
midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
|
||||
{
|
||||
/* Choose the first available register to minimise reported register pressure */
|
||||
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if (BITSET_TEST(regs, i)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Determine the actual hardware from the index based on the RA results or special values */
|
||||
|
||||
static int
|
||||
dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
|
||||
{
|
||||
if (reg >= SSA_FIXED_MINIMUM)
|
||||
return SSA_REG_FROM_FIXED(reg);
|
||||
|
||||
if (reg >= 0) {
|
||||
assert(reg < maxreg);
|
||||
assert(g);
|
||||
int r = ra_get_node_reg(g, reg);
|
||||
ctx->work_registers = MAX2(ctx->work_registers, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
switch (reg) {
|
||||
case SSA_UNUSED_0:
|
||||
case SSA_UNUSED_1:
|
||||
return REGISTER_UNUSED;
|
||||
|
||||
default:
|
||||
unreachable("Unknown SSA register alias");
|
||||
}
|
||||
}
|
||||
|
||||
/* This routine performs the actual register allocation. It should be succeeded
|
||||
* by install_registers */
|
||||
|
||||
struct ra_graph *
|
||||
allocate_registers(compiler_context *ctx)
|
||||
{
|
||||
/* First, initialize the RA */
|
||||
struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
|
||||
|
||||
/* Create a primary (general purpose) class, as well as special purpose
|
||||
* pipeline register classes */
|
||||
|
||||
int primary_class = ra_alloc_reg_class(regs);
|
||||
int varying_class = ra_alloc_reg_class(regs);
|
||||
|
||||
/* Add the full set of work registers */
|
||||
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
|
||||
for (int i = 0; i < work_count; ++i)
|
||||
ra_class_add_reg(regs, primary_class, i);
|
||||
|
||||
/* Add special registers */
|
||||
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
|
||||
ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
|
||||
|
||||
/* We're done setting up */
|
||||
ra_set_finalize(regs, NULL);
|
||||
|
||||
/* Transform the MIR into squeezed index form */
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
|
||||
ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
|
||||
ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
|
||||
}
|
||||
}
|
||||
|
||||
/* No register allocation to do with no SSA */
|
||||
|
||||
if (!ctx->temp_count)
|
||||
return NULL;
|
||||
|
||||
/* Let's actually do register allocation */
|
||||
int nodes = ctx->temp_count;
|
||||
struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
|
||||
|
||||
/* Set everything to the work register class, unless it has somewhere
|
||||
* special to go */
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
if (ins->ssa_args.dest < 0) continue;
|
||||
|
||||
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
|
||||
|
||||
int class = primary_class;
|
||||
|
||||
ra_set_node_class(g, ins->ssa_args.dest, class);
|
||||
}
|
||||
}
|
||||
|
||||
for (int index = 0; index <= ctx->max_hash; ++index) {
|
||||
unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
|
||||
|
||||
if (temp) {
|
||||
unsigned reg = temp - 1;
|
||||
int t = find_or_allocate_temp(ctx, index);
|
||||
ra_set_node_reg(g, t, reg);
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine liveness */
|
||||
|
||||
int *live_start = malloc(nodes * sizeof(int));
|
||||
int *live_end = malloc(nodes * sizeof(int));
|
||||
|
||||
/* Initialize as non-existent */
|
||||
|
||||
for (int i = 0; i < nodes; ++i) {
|
||||
live_start[i] = live_end[i] = -1;
|
||||
}
|
||||
|
||||
int d = 0;
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
/* Dest is < 0 for st_vary instructions, which break
|
||||
* the usual SSA conventions. Liveness analysis doesn't
|
||||
* make sense on these instructions, so skip them to
|
||||
* avoid memory corruption */
|
||||
|
||||
if (ins->ssa_args.dest < 0) continue;
|
||||
|
||||
if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
|
||||
/* If this destination is not yet live, it is now since we just wrote it */
|
||||
|
||||
int dest = ins->ssa_args.dest;
|
||||
|
||||
if (live_start[dest] == -1)
|
||||
live_start[dest] = d;
|
||||
}
|
||||
|
||||
/* Since we just used a source, the source might be
|
||||
* dead now. Scan the rest of the block for
|
||||
* invocations, and if there are none, the source dies
|
||||
* */
|
||||
|
||||
int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
|
||||
|
||||
for (int src = 0; src < 2; ++src) {
|
||||
int s = sources[src];
|
||||
|
||||
if (s < 0) continue;
|
||||
|
||||
if (s >= SSA_FIXED_MINIMUM) continue;
|
||||
|
||||
if (!mir_is_live_after(ctx, block, ins, s)) {
|
||||
live_end[s] = d;
|
||||
}
|
||||
}
|
||||
|
||||
++d;
|
||||
}
|
||||
}
|
||||
|
||||
/* If a node still hasn't been killed, kill it now */
|
||||
|
||||
for (int i = 0; i < nodes; ++i) {
|
||||
/* live_start == -1 most likely indicates a pinned output */
|
||||
|
||||
if (live_end[i] == -1)
|
||||
live_end[i] = d;
|
||||
}
|
||||
|
||||
/* Setup interference between nodes that are live at the same time */
|
||||
|
||||
for (int i = 0; i < nodes; ++i) {
|
||||
for (int j = i + 1; j < nodes; ++j) {
|
||||
if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
|
||||
ra_add_node_interference(g, i, j);
|
||||
}
|
||||
}
|
||||
|
||||
ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
|
||||
|
||||
if (!ra_allocate(g)) {
|
||||
unreachable("Error allocating registers\n");
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
free(live_start);
|
||||
free(live_end);
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
/* Once registers have been decided via register allocation
|
||||
* (allocate_registers), we need to rewrite the MIR to use registers instead of
|
||||
* SSA */
|
||||
|
||||
void
|
||||
install_registers(compiler_context *ctx, struct ra_graph *g)
|
||||
{
|
||||
mir_foreach_block(ctx, block) {
|
||||
mir_foreach_instr_in_block(block, ins) {
|
||||
if (ins->compact_branch) continue;
|
||||
|
||||
ssa_args args = ins->ssa_args;
|
||||
|
||||
switch (ins->type) {
|
||||
case TAG_ALU_4:
|
||||
ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
|
||||
|
||||
ins->registers.src2_imm = args.inline_constant;
|
||||
|
||||
if (args.inline_constant) {
|
||||
/* Encode inline 16-bit constant as a vector by default */
|
||||
|
||||
ins->registers.src2_reg = ins->inline_constant >> 11;
|
||||
|
||||
int lower_11 = ins->inline_constant & ((1 << 12) - 1);
|
||||
|
||||
uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
|
||||
ins->alu.src2 = imm << 2;
|
||||
} else {
|
||||
ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
|
||||
}
|
||||
|
||||
ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
|
||||
|
||||
break;
|
||||
|
||||
case TAG_LOAD_STORE_4: {
|
||||
if (OP_IS_STORE_VARY(ins->load_store.op)) {
|
||||
/* TODO: use ssa_args for st_vary */
|
||||
ins->load_store.reg = 0;
|
||||
} else {
|
||||
bool has_dest = args.dest >= 0;
|
||||
int ssa_arg = has_dest ? args.dest : args.src0;
|
||||
|
||||
ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue