mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 13:28:06 +02:00
draw: enable FSE by default
This commit is contained in:
parent
648da5158e
commit
728d1f7f43
6 changed files with 129 additions and 64 deletions
|
|
@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw,
|
|||
|
||||
if (opt == 0)
|
||||
middle = draw->pt.middle.fetch_emit;
|
||||
else if (opt == PT_SHADE && draw->pt.test_fse)
|
||||
else if (opt == PT_SHADE)
|
||||
middle = draw->pt.middle.fetch_shade_emit;
|
||||
else
|
||||
middle = draw->pt.middle.general;
|
||||
|
|
@ -118,12 +118,9 @@ boolean draw_pt_init( struct draw_context *draw )
|
|||
if (!draw->pt.middle.fetch_emit)
|
||||
return FALSE;
|
||||
|
||||
if (draw->pt.test_fse) {
|
||||
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
|
||||
if (!draw->pt.middle.fetch_shade_emit)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
|
||||
if (!draw->pt.middle.fetch_shade_emit)
|
||||
return FALSE;
|
||||
|
||||
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
|
||||
if (!draw->pt.middle.general)
|
||||
|
|
|
|||
|
|
@ -123,6 +123,10 @@ struct draw_vertex_shader {
|
|||
|
||||
struct tgsi_shader_info info;
|
||||
|
||||
/* Extracted from shader:
|
||||
*/
|
||||
const float (*immediates)[4];
|
||||
|
||||
/*
|
||||
*/
|
||||
struct draw_vs_varient *varient[16];
|
||||
|
|
|
|||
|
|
@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a,
|
|||
a.disp == b.disp);
|
||||
}
|
||||
|
||||
struct x86_reg aos_get_x86( struct aos_compilation *cp,
|
||||
unsigned value )
|
||||
{
|
||||
if (cp->ebp != value) {
|
||||
unsigned offset;
|
||||
|
||||
switch (value) {
|
||||
case X86_IMMEDIATES:
|
||||
offset = Offset(struct aos_machine, immediates);
|
||||
break;
|
||||
case X86_CONSTANTS:
|
||||
offset = Offset(struct aos_machine, constants);
|
||||
break;
|
||||
case X86_ATTRIBS:
|
||||
offset = Offset(struct aos_machine, attrib);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
x86_mov(cp->func, cp->temp_EBP,
|
||||
x86_make_disp(cp->machine_EDX, offset));
|
||||
/* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */
|
||||
|
||||
cp->ebp = value;
|
||||
}
|
||||
|
||||
return cp->temp_EBP;
|
||||
}
|
||||
|
||||
|
||||
static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
|
||||
unsigned file,
|
||||
|
|
@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
|
|||
case TGSI_FILE_TEMPORARY:
|
||||
return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
|
||||
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
|
||||
|
||||
case TGSI_FILE_CONSTANT:
|
||||
return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
|
||||
|
||||
case AOS_FILE_INTERNAL:
|
||||
return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
|
||||
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float));
|
||||
|
||||
case TGSI_FILE_CONSTANT:
|
||||
return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float));
|
||||
|
||||
default:
|
||||
ERROR(cp, "unknown reg file");
|
||||
return x86_make_reg(0,0);
|
||||
|
|
@ -1865,6 +1896,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp )
|
|||
}
|
||||
|
||||
|
||||
#if 0
|
||||
static boolean note_immediate( struct aos_compilation *cp,
|
||||
struct tgsi_full_immediate *imm )
|
||||
{
|
||||
|
|
@ -1877,6 +1909,7 @@ static boolean note_immediate( struct aos_compilation *cp,
|
|||
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
|
@ -1939,6 +1972,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
|
|||
cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
|
||||
cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
|
||||
cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
|
||||
cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);
|
||||
|
||||
x86_init_func(cp.func);
|
||||
|
||||
|
|
@ -1946,6 +1980,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
|
|||
|
||||
x86_push(cp.func, cp.idx_EBX);
|
||||
x86_push(cp.func, cp.count_ESI);
|
||||
x86_push(cp.func, cp.temp_EBP);
|
||||
|
||||
|
||||
/* Load arguments into regs:
|
||||
|
|
@ -1988,8 +2023,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
|
|||
|
||||
switch (parse.FullToken.Token.Type) {
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE:
|
||||
#if 0
|
||||
if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
|
||||
goto fail;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
|
|
@ -2072,6 +2109,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
|
|||
if (cp.func->need_emms)
|
||||
mmx_emms(cp.func);
|
||||
|
||||
x86_pop(cp.func, cp.temp_EBP);
|
||||
x86_pop(cp.func, cp.count_ESI);
|
||||
x86_pop(cp.func, cp.idx_EBX);
|
||||
|
||||
|
|
@ -2098,26 +2136,14 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
|
|||
|
||||
for (i = 0; i < vaos->base.key.nr_inputs; i++) {
|
||||
if (vaos->base.key.element[i].in.buffer == buf) {
|
||||
vaos->machine->attrib[i].input_ptr = ((char *)ptr +
|
||||
vaos->base.key.element[i].in.offset);
|
||||
vaos->machine->attrib[i].input_stride = stride;
|
||||
vaos->attrib[i].input_ptr = ((char *)ptr +
|
||||
vaos->base.key.element[i].in.offset);
|
||||
vaos->attrib[i].input_stride = stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void vaos_destroy( struct draw_vs_varient *varient )
|
||||
{
|
||||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
|
||||
if (vaos->machine)
|
||||
align_free( vaos->machine );
|
||||
|
||||
x86_release_func( &vaos->func[0] );
|
||||
x86_release_func( &vaos->func[1] );
|
||||
|
||||
FREE(vaos);
|
||||
}
|
||||
|
||||
static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
|
||||
const unsigned *elts,
|
||||
|
|
@ -2127,6 +2153,10 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
|
|||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
|
||||
vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
|
||||
vaos->machine->constants = vaos->draw->pt.user.constants;
|
||||
vaos->machine->immediates = vaos->base.vs->immediates;
|
||||
vaos->machine->attrib = vaos->attrib;
|
||||
|
||||
vaos->gen_run_elts( varient,
|
||||
elts,
|
||||
count,
|
||||
|
|
@ -2141,6 +2171,10 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
|
|||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
|
||||
vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
|
||||
vaos->machine->constants = vaos->draw->pt.user.constants;
|
||||
vaos->machine->immediates = vaos->base.vs->immediates;
|
||||
vaos->machine->attrib = vaos->attrib;
|
||||
|
||||
vaos->gen_run_linear( varient,
|
||||
start,
|
||||
count,
|
||||
|
|
@ -2153,10 +2187,6 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
|
|||
{
|
||||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
|
||||
memcpy(vaos->machine->constant,
|
||||
constants,
|
||||
(vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
|
||||
|
||||
#if 0
|
||||
unsigned i;
|
||||
for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
|
||||
|
|
@ -2187,6 +2217,21 @@ static void vaos_set_viewport( struct draw_vs_varient *varient,
|
|||
memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
|
||||
}
|
||||
|
||||
static void vaos_destroy( struct draw_vs_varient *varient )
|
||||
{
|
||||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
|
||||
if (vaos->machine)
|
||||
align_free( vaos->machine );
|
||||
|
||||
FREE( vaos->attrib );
|
||||
|
||||
x86_release_func( &vaos->func[0] );
|
||||
x86_release_func( &vaos->func[1] );
|
||||
|
||||
FREE(vaos);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
|
||||
|
|
@ -2207,6 +2252,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
|
|||
vaos->base.run_elts = vaos_run_elts;
|
||||
|
||||
vaos->draw = vs->draw;
|
||||
|
||||
vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
|
||||
if (!vaos->attrib)
|
||||
goto fail;
|
||||
|
||||
vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
|
||||
if (!vaos->machine)
|
||||
goto fail;
|
||||
|
|
@ -2233,7 +2283,10 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
|
|||
return &vaos->base;
|
||||
|
||||
fail:
|
||||
if (vaos->machine)
|
||||
if (vaos && vaos->attrib)
|
||||
FREE(vaos->attrib);
|
||||
|
||||
if (vaos && vaos->machine)
|
||||
align_free( vaos->machine );
|
||||
|
||||
if (vaos)
|
||||
|
|
|
|||
|
|
@ -78,6 +78,14 @@ struct lit_info {
|
|||
#define MAX_SHINE_TAB 4
|
||||
#define MAX_LIT_INFO 16
|
||||
|
||||
struct aos_attrib {
|
||||
const void *input_ptr;
|
||||
unsigned input_stride;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/* This is the temporary storage used by all the aos_sse vs varients.
|
||||
* Create one per context and reuse by passing a pointer in at
|
||||
* vs_varient creation??
|
||||
|
|
@ -86,8 +94,6 @@ struct aos_machine {
|
|||
float input [MAX_INPUTS ][4];
|
||||
float output [MAX_OUTPUTS ][4];
|
||||
float temp [MAX_TEMPS ][4];
|
||||
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
|
||||
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
|
||||
float internal [MAX_INTERNALS ][4];
|
||||
|
||||
float scale[4]; /* viewport */
|
||||
|
|
@ -105,12 +111,10 @@ struct aos_machine {
|
|||
ushort fpu_restore;
|
||||
ushort fpucntl; /* one of FPU_* above */
|
||||
|
||||
struct {
|
||||
const void *input_ptr;
|
||||
unsigned input_stride;
|
||||
const float (*immediates)[4]; /* points to shader data */
|
||||
const float (*constants)[4]; /* points to draw data */
|
||||
|
||||
unsigned output_offset;
|
||||
} attrib[PIPE_MAX_ATTRIBS];
|
||||
const struct aos_attrib *attrib; /* points to ? */
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -132,6 +136,7 @@ struct aos_compilation {
|
|||
unsigned last_used;
|
||||
} xmm[8];
|
||||
|
||||
unsigned ebp; /* one of X86_* */
|
||||
|
||||
boolean input_fetched[PIPE_MAX_ATTRIBS];
|
||||
unsigned output_last_write[PIPE_MAX_ATTRIBS];
|
||||
|
|
@ -148,6 +153,7 @@ struct aos_compilation {
|
|||
struct x86_reg outbuf_ECX;
|
||||
struct x86_reg machine_EDX;
|
||||
struct x86_reg count_ESI; /* decrements to zero */
|
||||
struct x86_reg temp_EBP;
|
||||
};
|
||||
|
||||
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
|
||||
|
|
@ -192,20 +198,20 @@ do { \
|
|||
} while (0)
|
||||
|
||||
|
||||
#define X86_NULL 0
|
||||
#define X86_IMMEDIATES 1
|
||||
#define X86_CONSTANTS 2
|
||||
#define X86_ATTRIBS 3
|
||||
|
||||
|
||||
struct x86_reg aos_get_x86( struct aos_compilation *cp,
|
||||
unsigned value );
|
||||
|
||||
|
||||
struct draw_vs_varient_aos_sse {
|
||||
struct draw_vs_varient base;
|
||||
struct draw_context *draw;
|
||||
|
||||
#if 0
|
||||
struct {
|
||||
const void *ptr;
|
||||
unsigned stride;
|
||||
} attrib[PIPE_MAX_ATTRIBS];
|
||||
#endif
|
||||
struct aos_attrib *attrib;
|
||||
|
||||
struct aos_machine *machine; /* XXX: temporarily unshared */
|
||||
|
||||
|
|
|
|||
|
|
@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
|
|||
|
||||
|
||||
|
||||
static void get_src_ptr( struct x86_function *func,
|
||||
static void get_src_ptr( struct aos_compilation *cp,
|
||||
struct x86_reg src,
|
||||
struct x86_reg machine,
|
||||
struct x86_reg elt,
|
||||
unsigned a )
|
||||
{
|
||||
struct x86_reg input_ptr =
|
||||
x86_make_disp(machine,
|
||||
Offset(struct aos_machine, attrib[a].input_ptr));
|
||||
struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ),
|
||||
a * sizeof(struct aos_attrib));
|
||||
|
||||
struct x86_reg input_stride =
|
||||
x86_make_disp(machine,
|
||||
Offset(struct aos_machine, attrib[a].input_stride));
|
||||
struct x86_reg input_ptr = x86_make_disp(attrib,
|
||||
Offset(struct aos_attrib, input_ptr));
|
||||
|
||||
struct x86_reg input_stride = x86_make_disp(attrib,
|
||||
Offset(struct aos_attrib, input_stride));
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(func, src, input_stride);
|
||||
x86_imul(func, src, elt);
|
||||
x86_add(func, src, input_ptr);
|
||||
x86_mov(cp->func, src, input_stride);
|
||||
x86_imul(cp->func, src, elt);
|
||||
x86_add(cp->func, src, input_ptr);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp,
|
|||
|
||||
/* Figure out source pointer address:
|
||||
*/
|
||||
get_src_ptr(cp->func,
|
||||
get_src_ptr(cp,
|
||||
src,
|
||||
cp->machine_EDX,
|
||||
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
|
||||
idx);
|
||||
|
||||
|
|
|
|||
|
|
@ -68,8 +68,6 @@ struct draw_sse_vertex_shader {
|
|||
codegen_function func;
|
||||
|
||||
struct tgsi_exec_machine *machine;
|
||||
|
||||
float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -107,7 +105,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
|
|||
machine->Outputs,
|
||||
(float (*)[4])constants,
|
||||
machine->Temps,
|
||||
shader->immediates,
|
||||
(float (*)[4])shader->base.immediates,
|
||||
input,
|
||||
base->info.num_inputs,
|
||||
input_stride,
|
||||
|
|
@ -130,6 +128,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
|
|||
|
||||
x86_release_func( &shader->sse2_program );
|
||||
|
||||
align_free(shader->base.immediates);
|
||||
|
||||
FREE( (void*) shader->base.state.tokens );
|
||||
FREE( shader );
|
||||
}
|
||||
|
|
@ -161,12 +161,18 @@ draw_create_vs_sse(struct draw_context *draw,
|
|||
vs->base.prepare = vs_sse_prepare;
|
||||
vs->base.run_linear = vs_sse_run_linear;
|
||||
vs->base.delete = vs_sse_delete;
|
||||
|
||||
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
|
||||
sizeof(float), 16);
|
||||
|
||||
vs->machine = &draw->vs.machine;
|
||||
|
||||
x86_init_func( &vs->sse2_program );
|
||||
|
||||
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
|
||||
&vs->sse2_program, vs->immediates, TRUE ))
|
||||
&vs->sse2_program,
|
||||
(float (*)[4])vs->base.immediates,
|
||||
TRUE ))
|
||||
goto fail;
|
||||
|
||||
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue