mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
Add some more opcodes
This commit is contained in:
parent
2d2f5a4b19
commit
dce3a91a8d
2 changed files with 146 additions and 157 deletions
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include "x86sse.h"
|
||||
|
||||
#define DISASSEM 0
|
||||
#define DISASSEM 1
|
||||
#define X86_TWOB 0x0f
|
||||
|
||||
/* Emit bytes to the instruction stream:
|
||||
|
|
@ -21,10 +21,9 @@ static void emit_1i( struct x86_function *p, GLint i0 )
|
|||
static void disassem( struct x86_function *p, const char *fn )
|
||||
{
|
||||
#if DISASSEM
|
||||
static const char *last_fn;
|
||||
if (fn && fn != last_fn) {
|
||||
if (fn && fn != p->fn) {
|
||||
_mesa_printf("0x%x: %s\n", p->csr, fn);
|
||||
last_fn = fn;
|
||||
p->fn = fn;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
@ -75,7 +74,8 @@ static void emit_modrm( struct x86_function *p,
|
|||
|
||||
/* Oh-oh we've stumbled into the SIB thing.
|
||||
*/
|
||||
if (regmem.idx == reg_SP) {
|
||||
if (regmem.file == file_REG32 &&
|
||||
regmem.idx == reg_SP) {
|
||||
emit_1ub_fn(p, 0x24, 0); /* simplistic! */
|
||||
}
|
||||
|
||||
|
|
@ -357,6 +357,38 @@ void sse_movlps( struct x86_function *p,
|
|||
/* SSE operations often only have one format, with dest constrained to
|
||||
* be a register:
|
||||
*/
|
||||
void sse_maxps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_2ub(p, X86_TWOB, 0x5F);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_divss( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_minps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_2ub(p, X86_TWOB, 0x5D);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_subps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_2ub(p, X86_TWOB, 0x5C);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_mulps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
|
|
@ -373,6 +405,39 @@ void sse_addps( struct x86_function *p,
|
|||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_addss( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_3ub(p, 0xF3, X86_TWOB, 0x58);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_andps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_2ub(p, X86_TWOB, 0x54);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse2_rcpss( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_3ub(p, 0xF3, X86_TWOB, 0x53);
|
||||
emit_modrm( p, dst, src );
|
||||
}
|
||||
|
||||
void sse_rsqrtss( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_3ub(p, 0xF3, X86_TWOB, 0x52);
|
||||
emit_modrm( p, dst, src );
|
||||
|
||||
}
|
||||
|
||||
void sse_movhlps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
|
|
@ -488,14 +553,11 @@ void x86_test( struct x86_function *p,
|
|||
void sse2_pshufd( struct x86_function *p,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg arg0,
|
||||
GLubyte x,
|
||||
GLubyte y,
|
||||
GLubyte z,
|
||||
GLubyte w)
|
||||
GLubyte shuf)
|
||||
{
|
||||
emit_3ub(p, 0x66, X86_TWOB, 0x70);
|
||||
emit_modrm(p, dest, arg0);
|
||||
emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6));
|
||||
emit_1ub(p, shuf);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -505,14 +567,21 @@ void sse2_pshufd( struct x86_function *p,
|
|||
void sse_shufps( struct x86_function *p,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg arg0,
|
||||
GLubyte x,
|
||||
GLubyte y,
|
||||
GLubyte z,
|
||||
GLubyte w)
|
||||
GLubyte shuf)
|
||||
{
|
||||
emit_2ub(p, X86_TWOB, 0xC6);
|
||||
emit_modrm(p, dest, arg0);
|
||||
emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6));
|
||||
emit_1ub(p, shuf);
|
||||
}
|
||||
|
||||
void sse_cmpps( struct x86_function *p,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg arg0,
|
||||
GLubyte cc)
|
||||
{
|
||||
emit_2ub(p, X86_TWOB, 0xC2);
|
||||
emit_modrm(p, dest, arg0);
|
||||
emit_1ub(p, cc);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -541,6 +610,8 @@ void x86_release_func( struct x86_function *p )
|
|||
|
||||
void (*x86_get_func( struct x86_function *p ))(void)
|
||||
{
|
||||
if (DISASSEM)
|
||||
_mesa_printf("disassemble %p %p\n", p->store, p->csr);
|
||||
return (void (*)())p->store;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ struct x86_function {
|
|||
GLubyte *csr;
|
||||
GLuint stack_offset;
|
||||
GLint need_emms;
|
||||
const char *fn;
|
||||
};
|
||||
|
||||
enum x86_reg_file {
|
||||
|
|
@ -60,6 +61,17 @@ enum x86_cc {
|
|||
cc_NE /* not equal / not zero */
|
||||
};
|
||||
|
||||
enum sse_cc {
|
||||
cc_Equal,
|
||||
cc_LessThan,
|
||||
cc_LessThanEqual,
|
||||
cc_Unordered,
|
||||
cc_NotEqual,
|
||||
cc_NotLessThan,
|
||||
cc_NotLessThanEqual,
|
||||
cc_Ordered
|
||||
};
|
||||
|
||||
#define cc_Z cc_E
|
||||
#define cc_NZ cc_NE
|
||||
|
||||
|
|
@ -86,8 +98,6 @@ struct x86_reg x86_deref( struct x86_reg reg );
|
|||
struct x86_reg x86_get_base_reg( struct x86_reg reg );
|
||||
|
||||
|
||||
|
||||
|
||||
/* Labels, jumps and fixup:
|
||||
*/
|
||||
GLubyte *x86_get_label( struct x86_function *p );
|
||||
|
|
@ -96,162 +106,70 @@ void x86_jcc( struct x86_function *p,
|
|||
enum x86_cc cc,
|
||||
GLubyte *label );
|
||||
|
||||
/* Always use a 32bit offset for forward jumps:
|
||||
*/
|
||||
GLubyte *x86_jcc_forward( struct x86_function *p,
|
||||
enum x86_cc cc );
|
||||
|
||||
/* Fixup offset from forward jump:
|
||||
*/
|
||||
void x86_fixup_fwd_jump( struct x86_function *p,
|
||||
GLubyte *fixup );
|
||||
|
||||
void x86_push( struct x86_function *p,
|
||||
struct x86_reg reg );
|
||||
|
||||
void x86_pop( struct x86_function *p,
|
||||
struct x86_reg reg );
|
||||
|
||||
void x86_inc( struct x86_function *p,
|
||||
struct x86_reg reg );
|
||||
|
||||
void x86_dec( struct x86_function *p,
|
||||
struct x86_reg reg );
|
||||
|
||||
void x86_ret( struct x86_function *p );
|
||||
/* Macro for sse_shufps() and sse2_pshufd():
|
||||
*/
|
||||
#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
|
||||
#define SHUF_NOOP RSW(0,1,2,3)
|
||||
#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
|
||||
|
||||
void mmx_emms( struct x86_function *p );
|
||||
void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
|
||||
void x86_mov( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
|
||||
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
|
||||
void x86_xor( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc );
|
||||
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
|
||||
|
||||
void x86_cmp( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse2_movd( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void mmx_movd( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void mmx_movq( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movss( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movaps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movups( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movhps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movlps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
/* SSE operations often only have one format, with dest constrained to
|
||||
* be a register:
|
||||
*/
|
||||
void sse_mulps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_addps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movhlps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_movlhps( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse2_cvtps2dq( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse2_packssdw( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse2_packsswb( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse2_packuswb( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void sse_cvtps2pi( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void mmx_packssdw( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void mmx_packuswb( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
|
||||
/* Load effective address:
|
||||
*/
|
||||
void x86_lea( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
void x86_test( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src );
|
||||
|
||||
/* Perform a reduced swizzle in a single sse instruction:
|
||||
*/
|
||||
void sse2_pshufd( struct x86_function *p,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg arg0,
|
||||
GLubyte x,
|
||||
GLubyte y,
|
||||
GLubyte z,
|
||||
GLubyte w );
|
||||
|
||||
|
||||
/* Shufps can also be used to implement a reduced swizzle when dest ==
|
||||
* arg0.
|
||||
*/
|
||||
void sse_shufps( struct x86_function *p,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg arg0,
|
||||
GLubyte x,
|
||||
GLubyte y,
|
||||
GLubyte z,
|
||||
GLubyte w );
|
||||
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_dec( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_inc( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_pop( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_push( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_ret( struct x86_function *p );
|
||||
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
|
||||
|
||||
/* Retreive a reference to one of the function arguments, taking into
|
||||
* account any push/pop activity:
|
||||
* account any push/pop activity. Note - doesn't track explict
|
||||
* manipulation of ESP by other instructions.
|
||||
*/
|
||||
struct x86_reg x86_fn_arg( struct x86_function *p,
|
||||
GLuint arg );
|
||||
struct x86_reg x86_fn_arg( struct x86_function *p, GLuint arg );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue