mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
draw: don't keep refetching constant inputs
This commit is contained in:
parent
102daee1b8
commit
af9cfea9cc
6 changed files with 144 additions and 62 deletions
|
|
@ -79,6 +79,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
|
|||
unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
|
||||
const struct vertex_info *vinfo;
|
||||
unsigned i;
|
||||
unsigned nr_vbs = 0;
|
||||
|
||||
|
||||
if (!draw->render->set_primitive( draw->render,
|
||||
|
|
@ -102,7 +103,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
|
|||
|
||||
fse->key.viewport = !draw->identity_viewport;
|
||||
fse->key.clip = !draw->bypass_clipping;
|
||||
fse->key.pad = 0;
|
||||
fse->key.const_vbuffers = 0;
|
||||
|
||||
memset(fse->key.element, 0,
|
||||
fse->key.nr_elements * sizeof(fse->key.element[0]));
|
||||
|
|
@ -116,9 +117,16 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
|
|||
*/
|
||||
fse->key.element[i].in.buffer = src->vertex_buffer_index;
|
||||
fse->key.element[i].in.offset = src->src_offset;
|
||||
nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1);
|
||||
}
|
||||
|
||||
for (i = 0; i < 5 && i < nr_vbs; i++) {
|
||||
if (draw->pt.vertex_buffer[i].pitch == 0)
|
||||
fse->key.const_vbuffers |= (1<<i);
|
||||
}
|
||||
|
||||
if (0) debug_printf("%s: lookup const_vbuffers: %x\n", __FUNCTION__, fse->key.const_vbuffers);
|
||||
|
||||
{
|
||||
unsigned dst_offset = 0;
|
||||
|
||||
|
|
@ -162,13 +170,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/* Would normally look up a vertex shader and peruse its list of
|
||||
* varients somehow. We omitted that step and put all the
|
||||
* hardcoded "shaders" into an array. We're just making the
|
||||
* assumption that this happens to be a matching shader... ie
|
||||
* you're running isosurf, aren't you?
|
||||
*/
|
||||
|
||||
fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
|
||||
&fse->key );
|
||||
|
||||
|
|
@ -177,18 +179,17 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
|
|||
return ;
|
||||
}
|
||||
|
||||
if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__,
|
||||
fse->active->key.const_vbuffers);
|
||||
|
||||
/* Now set buffer pointers:
|
||||
*/
|
||||
for (i = 0; i < num_vs_inputs; i++) {
|
||||
unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
|
||||
|
||||
fse->active->set_input( fse->active,
|
||||
i,
|
||||
|
||||
((const ubyte *) draw->pt.user.vbuffer[buf] +
|
||||
draw->pt.vertex_buffer[buf].buffer_offset),
|
||||
|
||||
draw->pt.vertex_buffer[buf].pitch );
|
||||
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
|
||||
fse->active->set_buffer( fse->active,
|
||||
i,
|
||||
((const ubyte *) draw->pt.user.vbuffer[i] +
|
||||
draw->pt.vertex_buffer[i].buffer_offset),
|
||||
draw->pt.vertex_buffer[i].pitch );
|
||||
}
|
||||
|
||||
*max_vertices = (draw->render->max_vertex_buffer_bytes /
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ struct draw_vs_varient_key {
|
|||
unsigned nr_outputs:8;
|
||||
unsigned viewport:1;
|
||||
unsigned clip:1;
|
||||
unsigned pad:5;
|
||||
unsigned const_vbuffers:5;
|
||||
struct draw_varient_element element[PIPE_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ struct draw_vs_varient {
|
|||
|
||||
struct draw_vertex_shader *vs;
|
||||
|
||||
void (*set_input)( struct draw_vs_varient *,
|
||||
void (*set_buffer)( struct draw_vs_varient *,
|
||||
unsigned i,
|
||||
const void *ptr,
|
||||
unsigned stride );
|
||||
|
|
|
|||
|
|
@ -196,6 +196,18 @@ static void spill( struct aos_compilation *cp, unsigned idx )
|
|||
}
|
||||
|
||||
|
||||
void aos_spill_all( struct aos_compilation *cp )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (cp->xmm[i].dirty)
|
||||
spill(cp, i);
|
||||
aos_release_xmm_reg(cp, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
|
||||
struct x86_reg reg )
|
||||
{
|
||||
|
|
@ -1941,6 +1953,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
|
|||
|
||||
aos_init_inputs( &cp, linear );
|
||||
|
||||
cp.x86_reg[0] = 0;
|
||||
cp.x86_reg[1] = 0;
|
||||
|
||||
/* Note address for loop jump
|
||||
*/
|
||||
label = x86_get_label(cp.func);
|
||||
|
|
@ -2066,6 +2081,8 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
|
|||
vaos->buffer[buf].base_ptr = (char *)ptr;
|
||||
vaos->buffer[buf].stride = stride;
|
||||
}
|
||||
|
||||
if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -2078,6 +2095,8 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
|
|||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
struct aos_machine *machine = vaos->draw->vs.aos_machine;
|
||||
|
||||
if (0) debug_printf("%s %d\n", __FUNCTION__, count);
|
||||
|
||||
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
|
||||
machine->constants = vaos->draw->vs.aligned_constants;
|
||||
machine->immediates = vaos->base.vs->immediates;
|
||||
|
|
@ -2097,6 +2116,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
|
|||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
|
||||
struct aos_machine *machine = vaos->draw->vs.aos_machine;
|
||||
|
||||
if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count,
|
||||
vaos->base.key.const_vbuffers);
|
||||
|
||||
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
|
||||
machine->constants = vaos->draw->vs.aligned_constants;
|
||||
machine->immediates = vaos->base.vs->immediates;
|
||||
|
|
@ -2140,7 +2162,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
|
|||
|
||||
vaos->base.key = *key;
|
||||
vaos->base.vs = vs;
|
||||
vaos->base.set_input = vaos_set_buffer;
|
||||
vaos->base.set_buffer = vaos_set_buffer;
|
||||
vaos->base.destroy = vaos_destroy;
|
||||
vaos->base.run_linear = vaos_run_linear;
|
||||
vaos->base.run_elts = vaos_run_elts;
|
||||
|
|
@ -2154,7 +2176,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
|
|||
if (!vaos->buffer)
|
||||
goto fail;
|
||||
|
||||
debug_printf("nr_vb: %d\n", vaos->nr_vb);
|
||||
debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
|
||||
|
||||
#if 0
|
||||
tgsi_dump(vs->state.tokens, 0);
|
||||
|
|
|
|||
|
|
@ -176,6 +176,8 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
|
|||
unsigned idx,
|
||||
unsigned dirty );
|
||||
|
||||
void aos_spill_all( struct aos_compilation *cp );
|
||||
|
||||
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
|
||||
unsigned file,
|
||||
unsigned idx );
|
||||
|
|
|
|||
|
|
@ -108,31 +108,47 @@ static void emit_swizzle( struct aos_compilation *cp,
|
|||
|
||||
|
||||
static boolean get_buffer_ptr( struct aos_compilation *cp,
|
||||
unsigned buf_idx,
|
||||
struct x86_reg elt,
|
||||
struct x86_reg ptr)
|
||||
boolean linear,
|
||||
unsigned buf_idx,
|
||||
struct x86_reg elt,
|
||||
struct x86_reg ptr)
|
||||
{
|
||||
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
|
||||
buf_idx * sizeof(struct aos_buffer));
|
||||
|
||||
struct x86_reg buf_base_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, base_ptr));
|
||||
|
||||
struct x86_reg buf_stride = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, stride));
|
||||
if (linear) {
|
||||
struct x86_reg buf_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, ptr));
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_stride);
|
||||
x86_imul(cp->func, ptr, elt);
|
||||
x86_add(cp->func, ptr, buf_base_ptr);
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_ptr);
|
||||
x86_mov(cp->func, elt, buf_stride);
|
||||
x86_add(cp->func, elt, ptr);
|
||||
sse_prefetchnta(cp->func, x86_deref(elt));
|
||||
x86_mov(cp->func, buf_ptr, elt);
|
||||
}
|
||||
else {
|
||||
struct x86_reg buf_base_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, base_ptr));
|
||||
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_stride);
|
||||
x86_imul(cp->func, ptr, elt);
|
||||
x86_add(cp->func, ptr, buf_base_ptr);
|
||||
}
|
||||
|
||||
cp->insn_counter++;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static boolean load_input( struct aos_compilation *cp,
|
||||
unsigned idx,
|
||||
struct x86_reg bufptr )
|
||||
|
|
@ -200,18 +216,57 @@ static boolean load_inputs( struct aos_compilation *cp,
|
|||
|
||||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
|
||||
{
|
||||
if (linear && cp->vaos->nr_vb == 1) {
|
||||
unsigned i;
|
||||
for (i = 0; i < cp->vaos->nr_vb; i++) {
|
||||
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
|
||||
i * sizeof(struct aos_buffer));
|
||||
|
||||
struct x86_reg elt = cp->idx_EBX;
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
struct x86_reg buf_base_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, base_ptr));
|
||||
|
||||
if (!get_buffer_ptr( cp, 0, elt, ptr ))
|
||||
return FALSE;
|
||||
if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
|
||||
/* In the linear, single buffer case, keep the buffer pointer
|
||||
* instead of the index number.
|
||||
*/
|
||||
x86_mov( cp->func, elt, ptr );
|
||||
x86_mov(cp->func, ptr, buf_base_ptr);
|
||||
|
||||
/* Load all inputs for this constant vertex buffer
|
||||
*/
|
||||
load_inputs( cp, i, x86_deref(ptr) );
|
||||
|
||||
/* Then just force them out to aos_machine.input[]
|
||||
*/
|
||||
aos_spill_all( cp );
|
||||
|
||||
}
|
||||
else if (linear) {
|
||||
|
||||
struct x86_reg elt = cp->idx_EBX;
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
|
||||
struct x86_reg buf_stride = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, stride));
|
||||
|
||||
struct x86_reg buf_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, ptr));
|
||||
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_stride);
|
||||
x86_imul(cp->func, ptr, elt);
|
||||
x86_add(cp->func, ptr, buf_base_ptr);
|
||||
|
||||
|
||||
/* In the linear case, keep the buffer pointer instead of the
|
||||
* index number.
|
||||
*/
|
||||
if (cp->vaos->nr_vb == 1)
|
||||
x86_mov( cp->func, elt, ptr );
|
||||
else
|
||||
x86_mov( cp->func, buf_ptr, ptr );
|
||||
|
||||
cp->insn_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
|
@ -219,23 +274,22 @@ boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
|
|||
|
||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
|
||||
{
|
||||
if (linear && cp->vaos->nr_vb == 1) {
|
||||
|
||||
load_inputs( cp, 0, cp->idx_EBX );
|
||||
unsigned j;
|
||||
|
||||
}
|
||||
else {
|
||||
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
|
||||
unsigned j;
|
||||
|
||||
for (j = 0; j < cp->vaos->nr_vb; j++) {
|
||||
for (j = 0; j < cp->vaos->nr_vb; j++) {
|
||||
if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
|
||||
/* just retreive pre-transformed input */
|
||||
}
|
||||
else if (linear && cp->vaos->nr_vb == 1) {
|
||||
load_inputs( cp, 0, cp->idx_EBX );
|
||||
}
|
||||
else {
|
||||
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
|
||||
if (!get_buffer_ptr( cp, j, elt, ptr ))
|
||||
if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
|
||||
return FALSE;
|
||||
|
||||
cp->insn_counter++;
|
||||
|
||||
if (!load_inputs( cp, j, ptr ))
|
||||
return FALSE;
|
||||
}
|
||||
|
|
@ -252,13 +306,16 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
|
|||
Offset(struct aos_buffer, stride)));
|
||||
|
||||
x86_add(cp->func, cp->idx_EBX, stride);
|
||||
sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX));
|
||||
}
|
||||
else if (linear) {
|
||||
x86_inc(cp->func, cp->idx_EBX);
|
||||
/* Nothing to do */
|
||||
}
|
||||
else {
|
||||
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -64,10 +64,10 @@ struct draw_vs_varient_generic {
|
|||
|
||||
|
||||
|
||||
static void vsvg_set_input( struct draw_vs_varient *varient,
|
||||
unsigned buffer,
|
||||
const void *ptr,
|
||||
unsigned stride )
|
||||
static void vsvg_set_buffer( struct draw_vs_varient *varient,
|
||||
unsigned buffer,
|
||||
const void *ptr,
|
||||
unsigned stride )
|
||||
{
|
||||
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
|
||||
|
||||
|
|
@ -265,7 +265,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
|
|||
|
||||
vsvg->base.key = *key;
|
||||
vsvg->base.vs = vs;
|
||||
vsvg->base.set_input = vsvg_set_input;
|
||||
vsvg->base.set_buffer = vsvg_set_buffer;
|
||||
vsvg->base.run_elts = vsvg_run_elts;
|
||||
vsvg->base.run_linear = vsvg_run_linear;
|
||||
vsvg->base.destroy = vsvg_destroy;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue