mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 03:08:05 +02:00
tgsi: SSE2 optimized exp2, log2 and pow implementations.
Special care must be taken when calling compiler generated SSE2 functions from the runtime generated SSE2: saving the xmm registers, and notify gcc the stack is not 16byte aligned. It would be more efficient to keep the stack pointer 16byte aligned, but too hairy, and not consistent in all x86 architectures. This has been tested in linux x86 and windows x86 userspace. Not tested on x86-64 because it is broken for other reasons (even without this change).
This commit is contained in:
parent
906336cd7c
commit
5dc8e67078
1 changed files with 211 additions and 76 deletions
|
|
@ -28,6 +28,7 @@
|
|||
#include "pipe/p_debug.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_sse.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi_exec.h"
|
||||
|
|
@ -480,10 +481,31 @@ emit_coef_dady(
|
|||
* Function call helpers.
|
||||
*/
|
||||
|
||||
/**
|
||||
* NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be
|
||||
* defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
|
||||
* that the stack pointer is 16 byte aligned, as expected.
|
||||
*/
|
||||
static void
|
||||
emit_push_gp(
|
||||
struct x86_function *func )
|
||||
emit_func_call_dst(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst,
|
||||
void (PIPE_CDECL *code)() )
|
||||
{
|
||||
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
|
||||
unsigned i, n, xmm;
|
||||
unsigned xmm_mask;
|
||||
|
||||
/* Bitmask of the xmm registers to save */
|
||||
xmm_mask = (1 << xmm_save) - 1;
|
||||
xmm_mask &= ~(1 << xmm_dst);
|
||||
|
||||
sse_movaps(
|
||||
func,
|
||||
get_temp( TEMP_R0, 0 ),
|
||||
make_xmm( xmm_dst ) );
|
||||
|
||||
x86_push(
|
||||
func,
|
||||
x86_make_reg( file_REG32, reg_AX) );
|
||||
|
|
@ -493,12 +515,49 @@ emit_push_gp(
|
|||
x86_push(
|
||||
func,
|
||||
x86_make_reg( file_REG32, reg_DX) );
|
||||
}
|
||||
|
||||
for(i = 0, n = 0; i < 8; ++i)
|
||||
if(xmm_mask & (1 << i))
|
||||
++n;
|
||||
|
||||
x86_sub_imm(
|
||||
func,
|
||||
x86_make_reg( file_REG32, reg_SP ),
|
||||
n*16);
|
||||
|
||||
for(i = 0, n = 0; i < 8; ++i)
|
||||
if(xmm_mask & (1 << i)) {
|
||||
sse_movups(
|
||||
func,
|
||||
x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
|
||||
make_xmm( xmm ) );
|
||||
++n;
|
||||
}
|
||||
|
||||
x86_lea(
|
||||
func,
|
||||
ecx,
|
||||
get_temp( TEMP_R0, 0 ) );
|
||||
|
||||
x86_push( func, ecx );
|
||||
x86_mov_reg_imm( func, ecx, (unsigned long) code );
|
||||
x86_call( func, ecx );
|
||||
x86_pop(func, ecx );
|
||||
|
||||
for(i = 0, n = 0; i < 8; ++i)
|
||||
if(xmm_mask & (1 << i)) {
|
||||
sse_movups(
|
||||
func,
|
||||
make_xmm( xmm ),
|
||||
x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
|
||||
++n;
|
||||
}
|
||||
|
||||
x86_add_imm(
|
||||
func,
|
||||
x86_make_reg( file_REG32, reg_SP ),
|
||||
n*16);
|
||||
|
||||
static void
|
||||
x86_pop_gp(
|
||||
struct x86_function *func )
|
||||
{
|
||||
/* Restore GP registers in a reverse order.
|
||||
*/
|
||||
x86_pop(
|
||||
|
|
@ -510,39 +569,6 @@ x86_pop_gp(
|
|||
x86_pop(
|
||||
func,
|
||||
x86_make_reg( file_REG32, reg_AX) );
|
||||
}
|
||||
|
||||
static void
|
||||
emit_func_call_dst(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_dst,
|
||||
void (PIPE_CDECL *code)() )
|
||||
{
|
||||
sse_movaps(
|
||||
func,
|
||||
get_temp( TEMP_R0, 0 ),
|
||||
make_xmm( xmm_dst ) );
|
||||
|
||||
emit_push_gp(
|
||||
func );
|
||||
|
||||
{
|
||||
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
|
||||
|
||||
x86_lea(
|
||||
func,
|
||||
ecx,
|
||||
get_temp( TEMP_R0, 0 ) );
|
||||
|
||||
x86_push( func, ecx );
|
||||
x86_mov_reg_imm( func, ecx, (unsigned long) code );
|
||||
x86_call( func, ecx );
|
||||
x86_pop(func, ecx );
|
||||
}
|
||||
|
||||
|
||||
x86_pop_gp(
|
||||
func );
|
||||
|
||||
sse_movaps(
|
||||
func,
|
||||
|
|
@ -553,6 +579,7 @@ emit_func_call_dst(
|
|||
static void
|
||||
emit_func_call_dst_src(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst,
|
||||
unsigned xmm_src,
|
||||
void (PIPE_CDECL *code)() )
|
||||
|
|
@ -564,10 +591,111 @@ emit_func_call_dst_src(
|
|||
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
code );
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast SSE2 implementation of special math functions.
|
||||
*/
|
||||
|
||||
#define POLY0(x, c0) _mm_set1_ps(c0)
|
||||
#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
|
||||
#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
|
||||
#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
|
||||
#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
|
||||
#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
|
||||
|
||||
#define EXP_POLY_DEGREE 3
|
||||
#define LOG_POLY_DEGREE 5
|
||||
|
||||
/**
|
||||
* See http://www.devmaster.net/forums/showthread.php?p=43580
|
||||
*/
|
||||
static INLINE __m128
|
||||
exp2f4(__m128 x)
|
||||
{
|
||||
__m128i ipart;
|
||||
__m128 fpart, expipart, expfpart;
|
||||
|
||||
x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
|
||||
x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
|
||||
|
||||
/* ipart = int(x - 0.5) */
|
||||
ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
|
||||
|
||||
/* fpart = x - ipart */
|
||||
fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
|
||||
|
||||
/* expipart = (float) (1 << ipart) */
|
||||
expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
|
||||
|
||||
/* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
|
||||
#if EXP_POLY_DEGREE == 5
|
||||
expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
|
||||
#elif EXP_POLY_DEGREE == 4
|
||||
expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
|
||||
#elif EXP_POLY_DEGREE == 3
|
||||
expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
|
||||
#elif EXP_POLY_DEGREE == 2
|
||||
expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
return _mm_mul_ps(expipart, expfpart);
|
||||
}
|
||||
|
||||
/**
|
||||
* See http://www.devmaster.net/forums/showthread.php?p=43580
|
||||
*/
|
||||
static INLINE __m128
|
||||
log2f4(__m128 x)
|
||||
{
|
||||
__m128i expmask = _mm_set1_epi32(0x7f800000);
|
||||
__m128i mantmask = _mm_set1_epi32(0x007fffff);
|
||||
__m128 one = _mm_set1_ps(1.0f);
|
||||
|
||||
__m128i i = _mm_castps_si128(x);
|
||||
|
||||
/* exp = (float) exponent(x) */
|
||||
__m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
|
||||
|
||||
/* mant = (float) mantissa(x) */
|
||||
__m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
|
||||
|
||||
__m128 logmant;
|
||||
|
||||
/* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
|
||||
* These coefficients can be generate with
|
||||
* http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
|
||||
*/
|
||||
#if LOG_POLY_DEGREE == 6
|
||||
logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
|
||||
#elif LOG_POLY_DEGREE == 5
|
||||
logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
|
||||
#elif LOG_POLY_DEGREE == 4
|
||||
logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
|
||||
#elif LOG_POLY_DEGREE == 3
|
||||
logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
|
||||
logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
|
||||
|
||||
return _mm_add_ps(logmant, exp);
|
||||
}
|
||||
|
||||
static INLINE __m128
|
||||
powf4(__m128 x, __m128 y)
|
||||
{
|
||||
return exp2f4(_mm_mul_ps(log2f4(x), y));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Low-level instruction translators.
|
||||
*/
|
||||
|
|
@ -610,38 +738,35 @@ cos4f(
|
|||
static void
|
||||
emit_cos(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst )
|
||||
{
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
cos4f );
|
||||
}
|
||||
|
||||
static void PIPE_CDECL
|
||||
#if defined(PIPE_CC_GCC)
|
||||
__attribute__((force_align_arg_pointer))
|
||||
#endif
|
||||
ex24f(
|
||||
float *store )
|
||||
{
|
||||
#if FAST_MATH
|
||||
store[0] = util_fast_exp2( store[0] );
|
||||
store[1] = util_fast_exp2( store[1] );
|
||||
store[2] = util_fast_exp2( store[2] );
|
||||
store[3] = util_fast_exp2( store[3] );
|
||||
#else
|
||||
store[0] = powf( 2.0f, store[0] );
|
||||
store[1] = powf( 2.0f, store[1] );
|
||||
store[2] = powf( 2.0f, store[2] );
|
||||
store[3] = powf( 2.0f, store[3] );
|
||||
#endif
|
||||
_mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ex2(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst )
|
||||
{
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
ex24f );
|
||||
}
|
||||
|
|
@ -670,10 +795,12 @@ flr4f(
|
|||
static void
|
||||
emit_flr(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst )
|
||||
{
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
flr4f );
|
||||
}
|
||||
|
|
@ -691,31 +818,35 @@ frc4f(
|
|||
static void
|
||||
emit_frc(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst )
|
||||
{
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
frc4f );
|
||||
}
|
||||
|
||||
static void PIPE_CDECL
|
||||
#if defined(PIPE_CC_GCC)
|
||||
__attribute__((force_align_arg_pointer))
|
||||
#endif
|
||||
lg24f(
|
||||
float *store )
|
||||
{
|
||||
store[0] = util_fast_log2( store[0] );
|
||||
store[1] = util_fast_log2( store[1] );
|
||||
store[2] = util_fast_log2( store[2] );
|
||||
store[3] = util_fast_log2( store[3] );
|
||||
_mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
|
||||
}
|
||||
|
||||
static void
|
||||
emit_lg2(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst )
|
||||
{
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
lg24f );
|
||||
}
|
||||
|
|
@ -757,14 +888,14 @@ emit_neg(
|
|||
}
|
||||
|
||||
static void PIPE_CDECL
|
||||
#if defined(PIPE_CC_GCC)
|
||||
__attribute__((force_align_arg_pointer))
|
||||
#endif
|
||||
pow4f(
|
||||
float *store )
|
||||
{
|
||||
#if FAST_MATH
|
||||
store[0] = util_fast_pow( store[0], store[4] );
|
||||
store[1] = util_fast_pow( store[1], store[5] );
|
||||
store[2] = util_fast_pow( store[2], store[6] );
|
||||
store[3] = util_fast_pow( store[3], store[7] );
|
||||
#if 1
|
||||
_mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
|
||||
#else
|
||||
store[0] = powf( store[0], store[4] );
|
||||
store[1] = powf( store[1], store[5] );
|
||||
|
|
@ -776,11 +907,13 @@ pow4f(
|
|||
static void
|
||||
emit_pow(
|
||||
struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst,
|
||||
unsigned xmm_src )
|
||||
{
|
||||
emit_func_call_dst_src(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
xmm_src,
|
||||
pow4f );
|
||||
|
|
@ -873,10 +1006,12 @@ sin4f(
|
|||
|
||||
static void
|
||||
emit_sin (struct x86_function *func,
|
||||
unsigned xmm_save,
|
||||
unsigned xmm_dst)
|
||||
{
|
||||
emit_func_call_dst(
|
||||
func,
|
||||
xmm_save,
|
||||
xmm_dst,
|
||||
sin4f );
|
||||
}
|
||||
|
|
@ -1296,7 +1431,7 @@ emit_instruction(
|
|||
get_temp(
|
||||
TGSI_EXEC_TEMP_MINUS_128_I,
|
||||
TGSI_EXEC_TEMP_MINUS_128_C ) );
|
||||
emit_pow( func, 1, 2 );
|
||||
emit_pow( func, 3, 1, 2 );
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
sse_xorps(
|
||||
func,
|
||||
|
|
@ -1342,11 +1477,11 @@ emit_instruction(
|
|||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
|
||||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
|
||||
emit_MOV( func, 1, 0 );
|
||||
emit_flr( func, 1 );
|
||||
emit_flr( func, 2, 1 );
|
||||
/* dst.x = ex2(floor(src.x)) */
|
||||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
|
||||
emit_MOV( func, 2, 1 );
|
||||
emit_ex2( func, 2 );
|
||||
emit_ex2( func, 3, 2 );
|
||||
STORE( func, *inst, 2, 0, CHAN_X );
|
||||
}
|
||||
/* dst.y = src.x - floor(src.x) */
|
||||
|
|
@ -1358,7 +1493,7 @@ emit_instruction(
|
|||
}
|
||||
/* dst.z = ex2(src.x) */
|
||||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
|
||||
emit_ex2( func, 0 );
|
||||
emit_ex2( func, 3, 0 );
|
||||
STORE( func, *inst, 0, 0, CHAN_Z );
|
||||
}
|
||||
}
|
||||
|
|
@ -1376,21 +1511,21 @@ emit_instruction(
|
|||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_abs( func, 0 );
|
||||
emit_MOV( func, 1, 0 );
|
||||
emit_lg2( func, 1 );
|
||||
emit_lg2( func, 2, 1 );
|
||||
/* dst.z = lg2(abs(src.x)) */
|
||||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
|
||||
STORE( func, *inst, 1, 0, CHAN_Z );
|
||||
}
|
||||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
|
||||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
|
||||
emit_flr( func, 1 );
|
||||
emit_flr( func, 2, 1 );
|
||||
/* dst.x = floor(lg2(abs(src.x))) */
|
||||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
|
||||
STORE( func, *inst, 1, 0, CHAN_X );
|
||||
}
|
||||
/* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
|
||||
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
|
||||
emit_ex2( func, 1 );
|
||||
emit_ex2( func, 2, 1 );
|
||||
emit_rcp( func, 1, 1 );
|
||||
emit_mul( func, 0, 1 );
|
||||
STORE( func, *inst, 0, 0, CHAN_Y );
|
||||
|
|
@ -1580,7 +1715,7 @@ emit_instruction(
|
|||
/* TGSI_OPCODE_FRC */
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
FETCH( func, *inst, 0, 0, chan_index );
|
||||
emit_frc( func, 0 );
|
||||
emit_frc( func, 0, 0 );
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
break;
|
||||
|
|
@ -1593,7 +1728,7 @@ emit_instruction(
|
|||
/* TGSI_OPCODE_FLR */
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
FETCH( func, *inst, 0, 0, chan_index );
|
||||
emit_flr( func, 0 );
|
||||
emit_flr( func, 0, 0 );
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
break;
|
||||
|
|
@ -1605,7 +1740,7 @@ emit_instruction(
|
|||
case TGSI_OPCODE_EXPBASE2:
|
||||
/* TGSI_OPCODE_EX2 */
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_ex2( func, 0 );
|
||||
emit_ex2( func, 0, 0 );
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
|
|
@ -1614,7 +1749,7 @@ emit_instruction(
|
|||
case TGSI_OPCODE_LOGBASE2:
|
||||
/* TGSI_OPCODE_LG2 */
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_lg2( func, 0 );
|
||||
emit_lg2( func, 0, 0 );
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
|
|
@ -1624,7 +1759,7 @@ emit_instruction(
|
|||
/* TGSI_OPCODE_POW */
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
FETCH( func, *inst, 1, 1, CHAN_X );
|
||||
emit_pow( func, 0, 1 );
|
||||
emit_pow( func, 0, 0, 1 );
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
|
|
@ -1715,7 +1850,7 @@ emit_instruction(
|
|||
|
||||
case TGSI_OPCODE_COS:
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_cos( func, 0 );
|
||||
emit_cos( func, 0, 0 );
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
|
|
@ -1774,7 +1909,7 @@ emit_instruction(
|
|||
|
||||
case TGSI_OPCODE_SIN:
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_sin( func, 0 );
|
||||
emit_sin( func, 0, 0 );
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
}
|
||||
|
|
@ -1868,12 +2003,12 @@ emit_instruction(
|
|||
case TGSI_OPCODE_SCS:
|
||||
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_cos( func, 0 );
|
||||
emit_cos( func, 0, 0 );
|
||||
STORE( func, *inst, 0, 0, CHAN_X );
|
||||
}
|
||||
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
|
||||
FETCH( func, *inst, 0, 0, CHAN_X );
|
||||
emit_sin( func, 0 );
|
||||
emit_sin( func, 0, 0 );
|
||||
STORE( func, *inst, 0, 0, CHAN_Y );
|
||||
}
|
||||
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue