From f6e128fb8f4e6aae9a476b2cb70df64385e0b519 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Wed, 16 Dec 2020 14:37:17 -0500
Subject: [PATCH] pan/bi: Remove old IR

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8135>
---
 src/panfrost/bifrost/compiler.h | 359 --------------------------------
 1 file changed, 359 deletions(-)

diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 1d8ba37a2a7..fb36d812dd0 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -33,331 +33,6 @@
 #include "panfrost/util/pan_ir.h"
 #include "util/u_math.h"
 
-/* Bifrost opcodes are tricky -- the same op may exist on both FMA and
- * ADD with two completely different opcodes, and opcodes can be varying
- * length in some cases. Then we have different opcodes for int vs float
- * and then sometimes even for different typesizes. Further, virtually
- * every op has a number of flags which depend on the op. In constrast
- * to Midgard where you have a strict ALU/LDST/TEX division and within
- * ALU you have strict int/float and that's it... here it's a *lot* more
- * involved. As such, we use something much higher level for our IR,
- * encoding "classes" of operations, letting the opcode details get
- * sorted out at emit time.
- *
- * Please keep this list alphabetized. Please use a dictionary if you
- * don't know how to do that.
- */
-
-enum bi_class {
-        BI_ADD,
-        BI_ATEST,
-        BI_BRANCH,
-        BI_CMP,
-        BI_BLEND,
-        BI_BITWISE,
-        BI_COMBINE,
-        BI_CONVERT,
-        BI_CSEL,
-        BI_DISCARD,
-        BI_FMA,
-        BI_FMOV,
-        BI_FREXP,
-        BI_IMATH,
-        BI_LOAD,
-        BI_LOAD_UNIFORM,
-        BI_LOAD_ATTR,
-        BI_LOAD_VAR,
-        BI_LOAD_VAR_ADDRESS,
-        BI_LOAD_TILE,
-        BI_MINMAX,
-        BI_MOV,
-        BI_REDUCE_FMA,
-        BI_SELECT,
-        BI_STORE,
-        BI_STORE_VAR,
-        BI_SPECIAL_ADD, /* _FAST on supported GPUs */
-        BI_SPECIAL_FMA, /* _FAST on supported GPUs */
-        BI_TABLE,
-        BI_TEXS,
-        BI_TEXC,
-        BI_TEXC_DUAL,
-        BI_ROUND,
-        BI_IMUL,
-        BI_ZS_EMIT,
-        BI_NUM_CLASSES
-};
-
-/* abs/neg/clamp valid for a float op */
-#define BI_MODS (1 << 0)
-
-/* Accepts a bi_cond */
-#define BI_CONDITIONAL (1 << 1)
-
-/* Accepts a bi_round */
-#define BI_ROUNDMODE (1 << 2)
-
-/* Can be scheduled to FMA */
-#define BI_SCHED_FMA (1 << 3)
-
-/* Can be scheduled to ADD */
-#define BI_SCHED_ADD (1 << 4)
-
-/* Most ALU ops can do either, actually */
-#define BI_SCHED_ALL (BI_SCHED_FMA | BI_SCHED_ADD)
-
-/* Along with setting BI_SCHED_ADD, eats up the entire cycle, so FMA must be
- * nopped out. Used for _FAST operations. */
-#define BI_SCHED_SLOW (1 << 5)
-
-/* Swizzling allowed for the 8/16-bit source */
-#define BI_SWIZZLABLE (1 << 6)
-
-/* For scheduling purposes this is a high latency instruction and must be at
- * the end of a clause. Implies ADD */
-#define BI_SCHED_HI_LATENCY (1 << 7)
-
-/* Intrinsic is vectorized and acts with `vector_channels` components */
-#define BI_VECTOR (1 << 8)
-
-/* Use a data register for src0/dest respectively, bypassing the usual
- * register accessor. */
-#define BI_DATA_REG_SRC (1 << 9)
-#define BI_DATA_REG_DEST (1 << 10)
-
-/* Quirk: cannot encode multiple abs on FMA in fp16 mode */
-#define BI_NO_ABS_ABS_FP16_FMA (1 << 11)
-
-/* It can't get any worse than csel4... can it? */
-#define BIR_SRC_COUNT 4
-
-/* BI_LD_VARY */
-struct bi_load_vary {
-        enum bi_sample interp_mode;
-        enum bi_update update_mode;
-        enum bi_varying_name var_id;
-        unsigned index;
-        bool immediate;
-        bool special;
-        bool reuse;
-        bool flat;
-};
-
-/* BI_BRANCH encoding the details of the branch itself as well as a pointer to
- * the target. We forward declare bi_block since this is mildly circular (not
- * strictly, but this order of the file makes more sense I think)
- *
- * We define our own enum of conditions since the conditions in the hardware
- * packed in crazy ways that would make manipulation unweildly (meaning changes
- * based on slot swapping, etc), so we defer dealing with that until emit time.
- * Likewise, we expose NIR types instead of the crazy branch types, although
- * the restrictions do eventually apply of course. */
-
-struct bi_block;
-
-/* Sync with gen-pack.py */
-enum bi_cond {
-        BI_COND_ALWAYS = 0,
-        BI_COND_LT,
-        BI_COND_LE,
-        BI_COND_GE,
-        BI_COND_GT,
-        BI_COND_EQ,
-        BI_COND_NE,
-};
-
-/* Opcodes within a class */
-enum bi_minmax_op {
-        BI_MINMAX_MIN,
-        BI_MINMAX_MAX
-};
-
-enum bi_bitwise_op {
-        BI_BITWISE_AND,
-        BI_BITWISE_OR,
-        BI_BITWISE_XOR,
-        BI_BITWISE_ARSHIFT,
-};
-
-enum bi_imath_op {
-        BI_IMATH_ADD,
-        BI_IMATH_SUB,
-};
-
-enum bi_imul_op {
-        BI_IMUL_IMUL,
-};
-
-enum bi_table_op {
-        /* fp32 log2() with low precision, suitable for GL or half_log2() in
-         * CL. In the first argument, takes x. Letting u be such that x =
-         * 2^{-m} u with m integer and 0.75 <= u < 1.5, returns
-         * log2(u) / (u - 1). */
-
-        BI_TABLE_LOG2_U_OVER_U_1_LOW,
-};
-
-enum bi_reduce_op {
-        /* Takes two fp32 arguments and returns x + frexp(y). Used in
-         * low-precision log2 argument reduction on newer models. */
-
-        BI_REDUCE_ADD_FREXPM,
-};
-
-enum bi_frexp_op {
-        BI_FREXPE_LOG,
-};
-
-enum bi_special_op {
-        BI_SPECIAL_FRCP,
-        BI_SPECIAL_FRSQ,
-
-        /* fp32 exp2() with low precision, suitable for half_exp2() in CL or
-         * exp2() in GL. In the first argument, it takes f2i_rte(x * 2^24). In
-         * the second, it takes x itself. */
-        BI_SPECIAL_EXP2_LOW,
-        BI_SPECIAL_IABS,
-
-        /* cubemap coordinates extraction helpers */
-        BI_SPECIAL_CUBEFACE1,
-        BI_SPECIAL_CUBEFACE2,
-        BI_SPECIAL_CUBE_SSEL,
-        BI_SPECIAL_CUBE_TSEL,
-
-        /* Cross-lane permute, used to implement dFd{x,y} */
-        BI_SPECIAL_CLPER_V6,
-        BI_SPECIAL_CLPER_V7,
-};
-
-struct bi_bitwise {
-        bool dest_invert;
-        bool src1_invert;
-        bool rshift; /* false for lshift */
-};
-
-struct bi_texture {
-        /* Constant indices. Indirect would need to be in src[..] like normal,
-         * we can reserve some sentinels there for that for future. */
-        unsigned texture_index, sampler_index, varying_index;
-
-        /* Should the LOD be computed based on neighboring pixels? Only valid
-         * in fragment shaders. */
-        bool compute_lod;
-};
-
-struct bi_clper {
-        struct {
-                enum bi_lane_op lane_op_mod;
-                enum bi_inactive_result inactive_res;
-        } clper;
-        enum bi_subgroup subgroup_sz;
-};
-
-struct bi_attribute {
-        unsigned index;
-        bool immediate;
-};
-
-typedef struct {
-        struct list_head link; /* Must be first */
-        enum bi_class type;
-
-        /* Indices, see pan_ssa_index etc. Note zero is special cased
-         * to "no argument" */
-        unsigned dest;
-        unsigned src[BIR_SRC_COUNT];
-
-        /* 32-bit word offset for destination, added to the register number in
-         * RA when lowering combines */
-        unsigned dest_offset;
-
-        /* If one of the sources has BIR_INDEX_CONSTANT */
-        union {
-                uint64_t u64;
-                uint32_t u32;
-                uint16_t u16[2];
-                uint8_t u8[4];
-        } constant;
-
-        /* Floating-point modifiers, type/class permitting. If not
-         * allowed for the type/class, these are ignored. */
-        enum bi_clamp clamp;
-        bool src_abs[BIR_SRC_COUNT];
-        bool src_neg[BIR_SRC_COUNT];
-
-        /* Round mode (requires BI_ROUNDMODE) */
-        enum bi_round round;
-
-        /* Destination type. Usually the type of the instruction
-         * itself, but if sources and destination have different
-         * types, the type of the destination wins (so f2i would be
-         * int). Zero if there is no destination. Bitsize included */
-        nir_alu_type dest_type;
-
-        /* Source types if required by the class */
-        nir_alu_type src_types[BIR_SRC_COUNT];
-
-        /* register_format if applicable */
-        nir_alu_type format;
-
-        /* If the source type is 8-bit or 16-bit such that SIMD is possible,
-         * and the class has BI_SWIZZLABLE, this is a swizzle in the usual
-         * sense. On non-SIMD instructions, it can be used for component
-         * selection, so we don't have to special case extraction. */
-        uint8_t swizzle[BIR_SRC_COUNT][NIR_MAX_VEC_COMPONENTS];
-
-        /* For VECTOR ops, how many channels are written? */
-        unsigned vector_channels;
-
-        /* For texture ops, the skip bit. Set if helper invocations can skip
-         * the operation. That is, set if the result of this texture operation
-         * is never used for cross-lane operation (including texture
-         * coordinates and derivatives) as determined by data flow analysis
-         * (like Midgard) */
-        bool skip;
-
-        /* The comparison op. BI_COND_ALWAYS may not be valid. */
-        enum bi_cond cond;
-
-        /* For memory ops, base address */
-        enum bi_seg segment;
-
-        /* Can we spill the value written here? Used to prevent
-         * useless double fills */
-        bool no_spill;
-
-        /* A class-specific op from which the actual opcode can be derived
-         * (along with the above information) */
-
-        union {
-                enum bi_minmax_op minmax;
-                enum bi_bitwise_op bitwise;
-                enum bi_special_op special;
-                enum bi_reduce_op reduce;
-                enum bi_table_op table;
-                enum bi_frexp_op frexp;
-                enum bi_imath_op imath;
-                enum bi_imul_op imul;
-
-                /* For FMA/ADD, should we add a biased exponent? */
-                bool mscale;
-        } op;
-
-        /* Union for class-specific information */
-        union {
-                enum bi_sem minmax;
-                struct bi_load_vary load_vary;
-                struct bi_block *branch_target;
-
-                /* For BLEND -- the location 0-7 */
-                unsigned blend_location;
-
-                struct bi_bitwise bitwise;
-                struct bi_texture texture;
-                struct bi_clper special;
-                struct bi_attribute attribute;
-        };
-} bi_instruction;
-
 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
  * To express widen, use the correpsonding replicated form, i.e. H01 = identity
  * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
@@ -829,24 +504,6 @@ bi_remove_instruction(bi_instr *ins)
         list_del(&ins->link);
 }
 
-/* If high bits are set, instead of SSA/registers, we have specials indexed by
- * the low bits if necessary.
- *
- *  Fixed register: do not allocate register, do not collect $200.
- *  Uniform: access a uniform register given by low bits.
- *  Constant: access the specified constant (specifies a bit offset / shift)
- *  Zero: special cased to avoid wasting a constant
- *  Passthrough: a bifrost_packed_src to passthrough T/T0/T1
- */
-
-#define BIR_INDEX_REGISTER (1 << 31)
-#define BIR_INDEX_CONSTANT (1 << 30)
-#define BIR_INDEX_PASS     (1 << 29)
-#define BIR_INDEX_FAU      (1 << 28)
-
-/* Shift everything away */
-#define BIR_INDEX_ZERO (BIR_INDEX_CONSTANT | 64)
-
 enum bir_fau {
         BIR_FAU_ZERO = 0,
         BIR_FAU_LANE_ID = 1,
@@ -873,11 +530,6 @@ bi_fau(enum bir_fau value, bool hi)
         };
 }
 
-/* Keep me synced please so we can check src & BIR_SPECIAL */
-
-#define BIR_SPECIAL        (BIR_INDEX_REGISTER | BIR_INDEX_CONSTANT | \
-                            BIR_INDEX_PASS | BIR_INDEX_FAU)
-
 static inline unsigned
 bi_max_temp(bi_context *ctx)
 {
@@ -899,17 +551,6 @@ bi_temp_reg(bi_context *ctx)
         return bi_get_index(alloc, true, 0);
 }
 
-static inline unsigned
-bi_make_temp(bi_context *ctx)
-{
-        return (ctx->impl->ssa_alloc + 1 + ctx->temp_alloc++) << 1;
-}
-
-static inline unsigned
-bi_make_temp_reg(bi_context *ctx)
-{
-        return ((ctx->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG;
-}
 
 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
  * constant is not allowed. load_const_to_scalar gaurantees that this makes