mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-19 22:18:18 +02:00
This is a set of changes that optimizes the memory use of fragment operation programs (by using and transmitting only as much memory as is needed for the fragment ops programs, instead of maximal sizes), as well as eliminate the dependency on hard-coded maximal program sizes. State that is not dependent on fragment facing (i.e. that isn't using two-sided stenciling) will only save and transmit a single fragment operation program, instead of two identical programs. - Added the ability to emit a LNOP (No Operation (Load)) instruction. This is used to pad the generated fragment operations programs to a multiple of 8 bytes, which is necessary for proper operation of the dual instruction pipeline, and also required for proper SPU-side decoding. - Added the ability to allocate and manage a variant-length struct cell_command_fragment_ops. This structure now puts the generated function field at the end, where it can be as large as necessary. - On the PPU side, we now combine the generated front-facing and back-facing code into a single variant-length buffer (and only use one if the two sets of code are identical) for transmission to the SPU. - On the SPU side, we pull the correct sizes out of the buffer, allocate a new code buffer if the one we have isn't large enough, and save the code to that buffer. The buffer is deallocated when the SPU exits. - Commented out the emit_fetch() static function, which was not being used.
440 lines
14 KiB
C
440 lines
14 KiB
C
/*
|
|
* (C) Copyright IBM Corporation 2008
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
|
* the Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
* Real-time assembly generation interface for Cell B.E. SPEs.
|
|
* For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
|
|
*
|
|
* \author Ian Romanick <idr@us.ibm.com>
|
|
* \author Brian Paul
|
|
*/
|
|
|
|
#ifndef RTASM_PPC_SPE_H
|
|
#define RTASM_PPC_SPE_H
|
|
|
|
/** 4 bytes per instruction */
|
|
#define SPE_INST_SIZE 4
|
|
|
|
/** number of general-purpose SIMD registers */
|
|
#define SPE_NUM_REGS 128
|
|
|
|
/** Return Address register (aka $lr / Link Register) */
|
|
#define SPE_REG_RA 0
|
|
|
|
/** Stack Pointer register (aka $sp) */
|
|
#define SPE_REG_SP 1
|
|
|
|
|
|
struct spe_function
|
|
{
|
|
uint32_t *store; /**< instruction buffer */
|
|
uint num_inst;
|
|
uint max_inst;
|
|
|
|
/**
|
|
* The "set count" reflects the number of nested register sets
|
|
* are allowed. In the unlikely case that we exceed the set count,
|
|
* register allocation will start to be confused, which is critical
|
|
* enough that we check for it.
|
|
*/
|
|
unsigned char set_count;
|
|
|
|
/**
|
|
* Flags for used and unused registers. Each byte corresponds to a
|
|
* register; a 0 in that byte means that the register is available.
|
|
* A value of 1 means that the register was allocated in the current
|
|
* register set. Any other value N means that the register was allocated
|
|
* N register sets ago.
|
|
*
|
|
* \sa
|
|
* spe_allocate_register, spe_allocate_available_register,
|
|
* spe_allocate_register_set, spe_release_register_set, spe_release_register,
|
|
*/
|
|
unsigned char regs[SPE_NUM_REGS];
|
|
|
|
boolean print; /**< print/dump instructions as they're emitted? */
|
|
int indent; /**< number of spaces to indent */
|
|
};
|
|
|
|
|
|
extern void spe_init_func(struct spe_function *p, unsigned code_size);
|
|
extern void spe_release_func(struct spe_function *p);
|
|
extern unsigned spe_code_size(const struct spe_function *p);
|
|
|
|
extern int spe_allocate_available_register(struct spe_function *p);
|
|
extern int spe_allocate_register(struct spe_function *p, int reg);
|
|
extern void spe_release_register(struct spe_function *p, int reg);
|
|
extern void spe_allocate_register_set(struct spe_function *p);
|
|
extern void spe_release_register_set(struct spe_function *p);
|
|
|
|
extern unsigned
|
|
spe_get_registers_used(const struct spe_function *p, ubyte used[]);
|
|
|
|
extern void spe_print_code(struct spe_function *p, boolean enable);
|
|
extern void spe_indent(struct spe_function *p, int spaces);
|
|
extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
|
|
|
|
|
|
#endif /* RTASM_PPC_SPE_H */
|
|
|
|
#ifndef EMIT
|
|
#define EMIT(_name, _op) \
|
|
extern void _name (struct spe_function *p);
|
|
#define EMIT_(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT);
|
|
#define EMIT_R(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA);
|
|
#define EMIT_RR(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
|
|
unsigned rB);
|
|
#define EMIT_RRR(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
|
|
unsigned rB, unsigned rC);
|
|
#define EMIT_RI7(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
|
|
int imm);
|
|
#define EMIT_RI8(_name, _op, bias) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
|
|
int imm);
|
|
#define EMIT_RI10(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
|
|
int imm);
|
|
#define EMIT_RI10s(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
|
|
int imm);
|
|
#define EMIT_RI16(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, int imm);
|
|
#define EMIT_RI18(_name, _op) \
|
|
extern void _name (struct spe_function *p, unsigned rT, int imm);
|
|
#define EMIT_I16(_name, _op) \
|
|
extern void _name (struct spe_function *p, int imm);
|
|
#define UNDEF_EMIT_MACROS
|
|
#endif /* EMIT */
|
|
|
|
|
|
/* Memory load / store instructions
|
|
*/
|
|
EMIT_RR (spe_lqx, 0x1c4)
|
|
EMIT_RI16(spe_lqa, 0x061)
|
|
EMIT_RI16(spe_lqr, 0x067)
|
|
EMIT_RR (spe_stqx, 0x144)
|
|
EMIT_RI16(spe_stqa, 0x041)
|
|
EMIT_RI16(spe_stqr, 0x047)
|
|
EMIT_RI7 (spe_cbd, 0x1f4)
|
|
EMIT_RR (spe_cbx, 0x1d4)
|
|
EMIT_RI7 (spe_chd, 0x1f5)
|
|
EMIT_RI7 (spe_chx, 0x1d5)
|
|
EMIT_RI7 (spe_cwd, 0x1f6)
|
|
EMIT_RI7 (spe_cwx, 0x1d6)
|
|
EMIT_RI7 (spe_cdd, 0x1f7)
|
|
EMIT_RI7 (spe_cdx, 0x1d7)
|
|
|
|
|
|
/* Constant formation instructions
|
|
*/
|
|
EMIT_RI16(spe_ilh, 0x083)
|
|
EMIT_RI16(spe_ilhu, 0x082)
|
|
EMIT_RI16(spe_il, 0x081)
|
|
EMIT_RI18(spe_ila, 0x021)
|
|
EMIT_RI16(spe_iohl, 0x0c1)
|
|
EMIT_RI16(spe_fsmbi, 0x065)
|
|
|
|
|
|
|
|
/* Integer and logical instructions
|
|
*/
|
|
EMIT_RR (spe_ah, 0x0c8)
|
|
EMIT_RI10(spe_ahi, 0x01d)
|
|
EMIT_RR (spe_a, 0x0c0)
|
|
EMIT_RI10s(spe_ai, 0x01c)
|
|
EMIT_RR (spe_sfh, 0x048)
|
|
EMIT_RI10(spe_sfhi, 0x00d)
|
|
EMIT_RR (spe_sf, 0x040)
|
|
EMIT_RI10(spe_sfi, 0x00c)
|
|
EMIT_RR (spe_addx, 0x340)
|
|
EMIT_RR (spe_cg, 0x0c2)
|
|
EMIT_RR (spe_cgx, 0x342)
|
|
EMIT_RR (spe_sfx, 0x341)
|
|
EMIT_RR (spe_bg, 0x042)
|
|
EMIT_RR (spe_bgx, 0x343)
|
|
EMIT_RR (spe_mpy, 0x3c4)
|
|
EMIT_RR (spe_mpyu, 0x3cc)
|
|
EMIT_RI10(spe_mpyi, 0x074)
|
|
EMIT_RI10(spe_mpyui, 0x075)
|
|
EMIT_RRR (spe_mpya, 0x00c)
|
|
EMIT_RR (spe_mpyh, 0x3c5)
|
|
EMIT_RR (spe_mpys, 0x3c7)
|
|
EMIT_RR (spe_mpyhh, 0x3c6)
|
|
EMIT_RR (spe_mpyhha, 0x346)
|
|
EMIT_RR (spe_mpyhhu, 0x3ce)
|
|
EMIT_RR (spe_mpyhhau, 0x34e)
|
|
EMIT_R (spe_clz, 0x2a5)
|
|
EMIT_R (spe_cntb, 0x2b4)
|
|
EMIT_R (spe_fsmb, 0x1b6)
|
|
EMIT_R (spe_fsmh, 0x1b5)
|
|
EMIT_R (spe_fsm, 0x1b4)
|
|
EMIT_R (spe_gbb, 0x1b2)
|
|
EMIT_R (spe_gbh, 0x1b1)
|
|
EMIT_R (spe_gb, 0x1b0)
|
|
EMIT_RR (spe_avgb, 0x0d3)
|
|
EMIT_RR (spe_absdb, 0x053)
|
|
EMIT_RR (spe_sumb, 0x253)
|
|
EMIT_R (spe_xsbh, 0x2b6)
|
|
EMIT_R (spe_xshw, 0x2ae)
|
|
EMIT_R (spe_xswd, 0x2a6)
|
|
EMIT_RR (spe_and, 0x0c1)
|
|
EMIT_RR (spe_andc, 0x2c1)
|
|
EMIT_RI10s(spe_andbi, 0x016)
|
|
EMIT_RI10s(spe_andhi, 0x015)
|
|
EMIT_RI10s(spe_andi, 0x014)
|
|
EMIT_RR (spe_or, 0x041)
|
|
EMIT_RR (spe_orc, 0x2c9)
|
|
EMIT_RI10s(spe_orbi, 0x006)
|
|
EMIT_RI10s(spe_orhi, 0x005)
|
|
EMIT_RI10s(spe_ori, 0x004)
|
|
EMIT_R (spe_orx, 0x1f0)
|
|
EMIT_RR (spe_xor, 0x241)
|
|
EMIT_RI10s(spe_xorbi, 0x046)
|
|
EMIT_RI10s(spe_xorhi, 0x045)
|
|
EMIT_RI10s(spe_xori, 0x044)
|
|
EMIT_RR (spe_nand, 0x0c9)
|
|
EMIT_RR (spe_nor, 0x049)
|
|
EMIT_RR (spe_eqv, 0x249)
|
|
EMIT_RRR (spe_selb, 0x008)
|
|
EMIT_RRR (spe_shufb, 0x00b)
|
|
|
|
|
|
/* Shift and rotate instructions
|
|
*/
|
|
EMIT_RR (spe_shlh, 0x05f)
|
|
EMIT_RI7 (spe_shlhi, 0x07f)
|
|
EMIT_RR (spe_shl, 0x05b)
|
|
EMIT_RI7 (spe_shli, 0x07b)
|
|
EMIT_RR (spe_shlqbi, 0x1db)
|
|
EMIT_RI7 (spe_shlqbii, 0x1fb)
|
|
EMIT_RR (spe_shlqby, 0x1df)
|
|
EMIT_RI7 (spe_shlqbyi, 0x1ff)
|
|
EMIT_RR (spe_shlqbybi, 0x1cf)
|
|
EMIT_RR (spe_roth, 0x05c)
|
|
EMIT_RI7 (spe_rothi, 0x07c)
|
|
EMIT_RR (spe_rot, 0x058)
|
|
EMIT_RI7 (spe_roti, 0x078)
|
|
EMIT_RR (spe_rotqby, 0x1dc)
|
|
EMIT_RI7 (spe_rotqbyi, 0x1fc)
|
|
EMIT_RR (spe_rotqbybi, 0x1cc)
|
|
EMIT_RR (spe_rotqbi, 0x1d8)
|
|
EMIT_RI7 (spe_rotqbii, 0x1f8)
|
|
EMIT_RR (spe_rothm, 0x05d)
|
|
EMIT_RI7 (spe_rothmi, 0x07d)
|
|
EMIT_RR (spe_rotm, 0x059)
|
|
EMIT_RI7 (spe_rotmi, 0x079)
|
|
EMIT_RR (spe_rotqmby, 0x1dd)
|
|
EMIT_RI7 (spe_rotqmbyi, 0x1fd)
|
|
EMIT_RR (spe_rotqmbybi, 0x1cd)
|
|
EMIT_RR (spe_rotqmbi, 0x1c9)
|
|
EMIT_RI7 (spe_rotqmbii, 0x1f9)
|
|
EMIT_RR (spe_rotmah, 0x05e)
|
|
EMIT_RI7 (spe_rotmahi, 0x07e)
|
|
EMIT_RR (spe_rotma, 0x05a)
|
|
EMIT_RI7 (spe_rotmai, 0x07a)
|
|
|
|
|
|
/* Compare, branch, and halt instructions
|
|
*/
|
|
EMIT_RR (spe_heq, 0x3d8)
|
|
EMIT_RI10(spe_heqi, 0x07f)
|
|
EMIT_RR (spe_hgt, 0x258)
|
|
EMIT_RI10(spe_hgti, 0x04f)
|
|
EMIT_RR (spe_hlgt, 0x2d8)
|
|
EMIT_RI10(spe_hlgti, 0x05f)
|
|
EMIT_RR (spe_ceqb, 0x3d0)
|
|
EMIT_RI10(spe_ceqbi, 0x07e)
|
|
EMIT_RR (spe_ceqh, 0x3c8)
|
|
EMIT_RI10(spe_ceqhi, 0x07d)
|
|
EMIT_RR (spe_ceq, 0x3c0)
|
|
EMIT_RI10(spe_ceqi, 0x07c)
|
|
EMIT_RR (spe_cgtb, 0x250)
|
|
EMIT_RI10(spe_cgtbi, 0x04e)
|
|
EMIT_RR (spe_cgth, 0x248)
|
|
EMIT_RI10(spe_cgthi, 0x04d)
|
|
EMIT_RR (spe_cgt, 0x240)
|
|
EMIT_RI10(spe_cgti, 0x04c)
|
|
EMIT_RR (spe_clgtb, 0x2d0)
|
|
EMIT_RI10(spe_clgtbi, 0x05e)
|
|
EMIT_RR (spe_clgth, 0x2c8)
|
|
EMIT_RI10(spe_clgthi, 0x05d)
|
|
EMIT_RR (spe_clgt, 0x2c0)
|
|
EMIT_RI10(spe_clgti, 0x05c)
|
|
EMIT_I16 (spe_br, 0x064)
|
|
EMIT_I16 (spe_bra, 0x060)
|
|
EMIT_RI16(spe_brsl, 0x066)
|
|
EMIT_RI16(spe_brasl, 0x062)
|
|
EMIT_RI16(spe_brnz, 0x042)
|
|
EMIT_RI16(spe_brz, 0x040)
|
|
EMIT_RI16(spe_brhnz, 0x046)
|
|
EMIT_RI16(spe_brhz, 0x044)
|
|
|
|
/* Control instructions
|
|
*/
|
|
EMIT (spe_lnop, 0x001)
|
|
|
|
extern void
|
|
spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
|
|
|
|
extern void
|
|
spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
|
|
|
|
extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e);
|
|
extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e);
|
|
extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA,
|
|
int d, int e);
|
|
extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA,
|
|
int d, int e);
|
|
extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA,
|
|
int d, int e);
|
|
extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA,
|
|
int d, int e);
|
|
extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA,
|
|
int d, int e);
|
|
extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
|
|
int d, int e);
|
|
|
|
|
|
/** Load/splat immediate float into rT. */
|
|
extern void
|
|
spe_load_float(struct spe_function *p, unsigned rT, float x);
|
|
|
|
/** Load/splat immediate int into rT. */
|
|
extern void
|
|
spe_load_int(struct spe_function *p, unsigned rT, int i);
|
|
|
|
/** Load/splat immediate unsigned int into rT. */
|
|
extern void
|
|
spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui);
|
|
|
|
/** And immediate value into rT. */
|
|
extern void
|
|
spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
|
|
|
/** Xor immediate value into rT. */
|
|
extern void
|
|
spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
|
|
|
/** Compare equal with immediate value. */
|
|
extern void
|
|
spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
|
|
|
/** Compare greater with immediate value. */
|
|
extern void
|
|
spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
|
|
|
/** Replicate word 0 of rA across rT. */
|
|
extern void
|
|
spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
|
|
|
|
/** rT = complement_all_bits(rA). */
|
|
extern void
|
|
spe_complement(struct spe_function *p, unsigned rT, unsigned rA);
|
|
|
|
/** rT = rA. */
|
|
extern void
|
|
spe_move(struct spe_function *p, unsigned rT, unsigned rA);
|
|
|
|
/** rT = {0,0,0,0}. */
|
|
extern void
|
|
spe_zero(struct spe_function *p, unsigned rT);
|
|
|
|
/** rT = splat(rA, word) */
|
|
extern void
|
|
spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word);
|
|
|
|
/** rT = float min(rA, rB) */
|
|
extern void
|
|
spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
|
|
|
|
/** rT = float max(rA, rB) */
|
|
extern void
|
|
spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
|
|
|
|
|
|
/* Floating-point instructions
|
|
*/
|
|
EMIT_RR (spe_fa, 0x2c4)
|
|
EMIT_RR (spe_dfa, 0x2cc)
|
|
EMIT_RR (spe_fs, 0x2c5)
|
|
EMIT_RR (spe_dfs, 0x2cd)
|
|
EMIT_RR (spe_fm, 0x2c6)
|
|
EMIT_RR (spe_dfm, 0x2ce)
|
|
EMIT_RRR (spe_fma, 0x00e)
|
|
EMIT_RR (spe_dfma, 0x35c)
|
|
EMIT_RRR (spe_fnms, 0x00d)
|
|
EMIT_RR (spe_dfnms, 0x35e)
|
|
EMIT_RRR (spe_fms, 0x00f)
|
|
EMIT_RR (spe_dfms, 0x35d)
|
|
EMIT_RR (spe_dfnma, 0x35f)
|
|
EMIT_R (spe_frest, 0x1b8)
|
|
EMIT_R (spe_frsqest, 0x1b9)
|
|
EMIT_RR (spe_fi, 0x3d4)
|
|
EMIT_RI8 (spe_csflt, 0x1da, 155)
|
|
EMIT_RI8 (spe_cflts, 0x1d8, 173)
|
|
EMIT_RI8 (spe_cuflt, 0x1db, 155)
|
|
EMIT_RI8 (spe_cfltu, 0x1d9, 173)
|
|
EMIT_R (spe_frds, 0x3b9)
|
|
EMIT_R (spe_fesd, 0x3b8)
|
|
EMIT_RR (spe_dfceq, 0x3c3)
|
|
EMIT_RR (spe_dfcmeq, 0x3cb)
|
|
EMIT_RR (spe_dfcgt, 0x2c3)
|
|
EMIT_RR (spe_dfcmgt, 0x2cb)
|
|
EMIT_RI7 (spe_dftsv, 0x3bf)
|
|
EMIT_RR (spe_fceq, 0x3c2)
|
|
EMIT_RR (spe_fcmeq, 0x3ca)
|
|
EMIT_RR (spe_fcgt, 0x2c2)
|
|
EMIT_RR (spe_fcmgt, 0x2ca)
|
|
EMIT_R (spe_fscrwr, 0x3ba)
|
|
EMIT_ (spe_fscrrd, 0x398)
|
|
|
|
|
|
/* Channel instructions
|
|
*/
|
|
EMIT_R (spe_rdch, 0x00d)
|
|
EMIT_R (spe_rdchcnt, 0x00f)
|
|
EMIT_R (spe_wrch, 0x10d)
|
|
|
|
|
|
#ifdef UNDEF_EMIT_MACROS
|
|
#undef EMIT
|
|
#undef EMIT_
|
|
#undef EMIT_R
|
|
#undef EMIT_RR
|
|
#undef EMIT_RRR
|
|
#undef EMIT_RI7
|
|
#undef EMIT_RI8
|
|
#undef EMIT_RI10
|
|
#undef EMIT_RI10s
|
|
#undef EMIT_RI16
|
|
#undef EMIT_RI18
|
|
#undef EMIT_I16
|
|
#undef UNDEF_EMIT_MACROS
|
|
#endif /* EMIT_ */
|