Implement draw_arrays_instanced() in softpipe.

Modify the translate module to respect instance divisors and accept
instance id as a parameter to calculate input vertex offset.
This commit is contained in:
Michal Krol 2009-12-29 23:21:01 +01:00
parent 7124fa16ef
commit 7ca0ce3834
17 changed files with 130 additions and 5 deletions

View file

@ -151,6 +151,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw,
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
void
draw_arrays_instanced(struct draw_context *draw,
unsigned mode,
unsigned start,
unsigned count,
unsigned startInstance,
unsigned instanceCount);
void draw_flush(struct draw_context *draw);

View file

@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf,
/* Note: we really do want data[0] here, not data[pos]:
*/
vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);
if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
@ -275,6 +275,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
hw_key.element[i].input_buffer = src_buffer;
hw_key.element[i].input_offset = src_offset;
hw_key.element[i].instance_divisor = 0;
hw_key.element[i].output_format = output_format;
hw_key.element[i].output_offset = dst_offset;

View file

@ -226,6 +226,8 @@ struct draw_context
unsigned reduced_prim;
unsigned instance_id;
void *driver_private;
};

View file

@ -312,5 +312,28 @@ draw_arrays(struct draw_context *draw, unsigned prim,
#endif
/* drawing done here: */
draw->instance_id = 0;
draw_pt_arrays(draw, prim, start, count);
}
void
draw_arrays_instanced(struct draw_context *draw,
unsigned mode,
unsigned start,
unsigned count,
unsigned startInstance,
unsigned instanceCount)
{
unsigned reduced_prim = u_reduced_prim(mode);
unsigned instance;
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
draw->reduced_prim = reduced_prim;
}
for (instance = 0; instance < instanceCount; instance++) {
draw->instance_id = instance + startInstance;
draw_pt_arrays(draw, mode, start, count);
}
}

View file

@ -125,6 +125,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
hw_key.element[i].input_buffer = src_buffer;
hw_key.element[i].input_offset = src_offset;
hw_key.element[i].instance_divisor = 0;
hw_key.element[i].output_format = output_format;
hw_key.element[i].output_offset = dst_offset;
@ -204,6 +205,7 @@ void draw_pt_emit( struct pt_emit *emit,
translate->run( translate,
0,
vertex_count,
draw->instance_id,
hw_verts );
render->unmap_vertices( render,
@ -263,6 +265,7 @@ void draw_pt_emit_linear(struct pt_emit *emit,
translate->run(translate,
0,
count,
draw->instance_id,
hw_verts);
if (0) {

View file

@ -81,6 +81,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
key.element[nr].input_offset = 0;
key.element[nr].instance_divisor = 0;
key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].output_offset = dst_offset;
dst_offset += 1 * sizeof(float);
@ -100,6 +101,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor;
key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
key.element[nr].output_offset = dst_offset;
@ -183,6 +185,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
translate->run( translate,
start,
count,
draw->instance_id,
verts );
}

View file

@ -169,6 +169,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
key.element[i].input_format = input_format;
key.element[i].input_buffer = input_buffer;
key.element[i].input_offset = input_offset;
key.element[i].instance_divisor = src->instance_divisor;
key.element[i].output_format = output_format;
key.element[i].output_offset = dst_offset;
@ -314,6 +315,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
feme->translate->run( feme->translate,
start,
count,
draw->instance_id,
hw_verts );
if (0) {
@ -374,6 +376,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
feme->translate->run( feme->translate,
start,
count,
draw->instance_id,
hw_verts );
draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );

View file

@ -43,6 +43,7 @@ struct draw_varient_input
enum pipe_format format;
unsigned buffer;
unsigned offset;
unsigned instance_divisor;
};
struct draw_varient_output

View file

@ -180,6 +180,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->emit->run( vsvg->emit,
0, count,
vsvg->draw->instance_id,
output_buffer );
FREE(temp_buffer);
@ -202,6 +203,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->fetch->run( vsvg->fetch,
start,
count,
vsvg->draw->instance_id,
temp_buffer );
vsvg->base.vs->run_linear( vsvg->base.vs,
@ -238,6 +240,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->emit->run( vsvg->emit,
0, count,
vsvg->draw->instance_id,
output_buffer );
FREE(temp_buffer);
@ -283,6 +286,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
fetch.element[i].input_format = key->element[i].in.format;
fetch.element[i].input_buffer = key->element[i].in.buffer;
fetch.element[i].input_offset = key->element[i].in.offset;
fetch.element[i].instance_divisor = 0;
fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
fetch.element[i].output_offset = i * 4 * sizeof(float);
assert(fetch.element[i].output_offset < fetch.output_stride);
@ -297,6 +301,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit.element[i].input_buffer = 0;
emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
emit.element[i].instance_divisor = 0;
emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
emit.element[i].output_offset = key->element[i].out.offset;
assert(emit.element[i].input_offset <= fetch.output_stride);
@ -305,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].input_buffer = 1;
emit.element[i].input_offset = 0;
emit.element[i].instance_divisor = 0;
emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].output_offset = key->element[i].out.offset;
}

View file

@ -122,7 +122,8 @@ static const char *semantic_names[] =
"GENERIC",
"NORMAL",
"FACE",
"EDGEFLAG"
"EDGEFLAG",
"INSTANCEID"
};
static const char *immediate_type_names[] =

View file

@ -50,6 +50,7 @@ struct translate_element
enum pipe_format output_format;
unsigned input_buffer:8;
unsigned input_offset:24;
unsigned instance_divisor;
unsigned output_offset;
};
@ -79,6 +80,7 @@ struct translate {
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
unsigned instance_id,
void *output_buffer);
};

View file

@ -49,6 +49,7 @@ struct translate_generic {
fetch_func fetch;
unsigned buffer;
unsigned input_offset;
unsigned instance_divisor;
emit_func emit;
unsigned output_offset;
@ -607,6 +608,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
@ -622,13 +624,20 @@ static void PIPE_CDECL generic_run( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
const char *src = (tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * elt);
const char *src;
char *dst = (vert +
tg->attrib[attr].output_offset);
if (tg->attrib[attr].instance_divisor) {
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride *
(instance_id / tg->attrib[attr].instance_divisor);
} else {
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * elt;
}
tg->attrib[attr].fetch( src, data );
if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
@ -687,6 +696,7 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
tg->attrib[i].buffer = key->element[i].input_buffer;
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;

View file

@ -637,6 +637,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
static void PIPE_CDECL translate_sse_run( struct translate *translate,
unsigned start,
unsigned count,
unsigned instance_id,
void *output_buffer )
{
struct translate_sse *p = (struct translate_sse *)translate;

View file

@ -238,6 +238,7 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced;
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush;

View file

@ -184,3 +184,54 @@ softpipe_draw_elements(struct pipe_context *pipe,
0, 0xffffffff,
mode, start, count );
}
boolean
softpipe_draw_arrays_instanced(struct pipe_context *pipe,
unsigned mode,
unsigned start,
unsigned count,
unsigned startInstance,
unsigned instanceCount)
{
struct softpipe_context *sp = softpipe_context(pipe);
struct draw_context *draw = sp->draw;
unsigned i;
sp->reduced_api_prim = u_reduced_prim(mode);
if (sp->dirty) {
softpipe_update_derived(sp);
}
softpipe_map_transfers(sp);
softpipe_map_constant_buffers(sp);
/* Map vertex buffers */
for (i = 0; i < sp->num_vertex_buffers; i++) {
void *buf;
buf = pipe_buffer_map(pipe->screen,
sp->vertex_buffer[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(draw, i, buf);
}
draw_set_mapped_element_buffer_range(draw, 0, start,
start + count - 1, NULL);
/* draw! */
draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount);
/* unmap vertex/index buffers - will cause draw module to flush */
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
}
/* Note: leave drawing surfaces mapped */
softpipe_unmap_constant_buffers(sp);
sp->dirty_render_cache = TRUE;
return TRUE;
}

View file

@ -189,6 +189,14 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned max_index,
unsigned mode, unsigned start, unsigned count);
boolean
softpipe_draw_arrays_instanced(struct pipe_context *pipe,
unsigned mode,
unsigned start,
unsigned count,
unsigned startInstance,
unsigned instanceCount);
void
softpipe_map_transfers(struct softpipe_context *sp);

View file

@ -198,6 +198,7 @@ static int update_zero_stride( struct svga_context *svga,
key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
key.element[0].input_buffer = vel->vertex_buffer_index;
key.element[0].input_offset = vel->src_offset;
key.element[0].instance_divisor = vel->instance_divisor;
key.element[0].output_offset = const_idx * 4 * sizeof(float);
translate_key_sanitize(&key);