llvmpipe: implement threaded rasterization

The LP_NUM_THREADS env var controls how many threads are created.
The default (and max) is 4, for now.
If LP_NUM_THREADS = 0, threading is not used.
This commit is contained in:
Brian Paul 2009-12-07 18:01:12 -07:00
parent 87c9ceaea2
commit aab1ceceec
2 changed files with 170 additions and 43 deletions

View file

@ -26,6 +26,7 @@
**************************************************************************/
#include "util/u_memory.h"
#include "util/u_math.h"
#include "lp_debug.h"
#include "lp_state.h"
@ -36,25 +37,6 @@
#include "lp_bin.h"
struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen )
{
struct lp_rasterizer *rast;
unsigned i;
rast = CALLOC_STRUCT(lp_rasterizer);
if(!rast)
return NULL;
rast->screen = screen;
for (i = 0; i < Elements(rast->tasks); i++) {
rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
}
return rast;
}
/**
* Begin the rasterization phase.
@ -414,16 +396,25 @@ static void lp_rast_store_color( struct lp_rasterizer *rast,
{
const unsigned x = rast->tasks[thread_index].x;
const unsigned y = rast->tasks[thread_index].y;
unsigned w = TILE_SIZE;
unsigned h = TILE_SIZE;
int w = TILE_SIZE;
int h = TILE_SIZE;
if (x + w > rast->width)
w -= x + w - rast->width;
if (y + h > rast->height)
h -= y + h - rast->height;
if (y + h > rast->height) {
int h2;
h2 = h - (y + h - rast->height);
assert(h2 <= TILE_SIZE);
h = h2;
}
assert(w >= 0);
assert(h >= 0);
assert(w <= TILE_SIZE);
assert(h <= TILE_SIZE);
LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
thread_index, x, y, w, h);
lp_tile_write_4ub(rast->cbuf_transfer->format,
rast->tasks[thread_index].tile.color,
@ -513,7 +504,7 @@ rasterize_bin( struct lp_rasterizer *rast,
/* simply execute each of the commands in the block list */
for (block = commands->head; block; block = block->next) {
for (k = 0; k < block->count; k++) {
block->cmd[k]( rast, 0, block->arg[k] );
block->cmd[k]( rast, thread_index, block->arg[k] );
}
}
@ -524,6 +515,41 @@ rasterize_bin( struct lp_rasterizer *rast,
/**
* Rasterize/execute all bins.
*/
static void
rasterize_bins( struct lp_rasterizer *rast,
unsigned thread_index,
struct lp_bins *bins,
const struct pipe_framebuffer_state *fb,
bool write_depth )
{
/* loop over tile bins, rasterize each */
#if 0
{
unsigned i, j;
for (i = 0; i < bins->tiles_x; i++) {
for (j = 0; j < bins->tiles_y; j++) {
struct cmd_bin *bin = lp_get_bin(bins, i, j);
rasterize_bin( rast, thread_index,
bin, i * TILE_SIZE, j * TILE_SIZE );
}
}
}
#else
{
struct cmd_bin *bin;
int x, y;
while ((bin = lp_bin_iter_next(bins, &x, &y))) {
rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
}
}
#endif
}
/**
* Called by rasterizer when it has something for us to render.
*/
void
lp_rasterize_bins( struct lp_rasterizer *rast,
struct lp_bins *bins,
@ -539,30 +565,32 @@ lp_rasterize_bins( struct lp_rasterizer *rast,
fb->zsbuf != NULL && write_depth,
fb->width,
fb->height );
/* loop over tile bins, rasterize each */
#if 0
{
unsigned i, j;
for (i = 0; i < bins->tiles_x; i++) {
for (j = 0; j < bins->tiles_y; j++) {
struct cmd_bin *bin = lp_get_bin(bins, i, j);
rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE );
}
}
if (rast->num_threads == 0) {
/* no threading */
lp_bin_iter_begin( bins );
rasterize_bins( rast, 0, bins, fb, write_depth );
}
#else
{
struct cmd_bin *bin;
int x, y;
else {
/* threaded rendering! */
unsigned i;
rast->bins = bins;
rast->fb = fb;
rast->write_depth = write_depth;
lp_bin_iter_begin( bins );
while ((bin = lp_bin_iter_next(bins, &x, &y))) {
rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE);
/* signal the threads that there's work to do */
for (i = 0; i < rast->num_threads; i++) {
pipe_semaphore_signal(&rast->tasks[i].work_ready);
}
/* wait for work to complete */
for (i = 0; i < rast->num_threads; i++) {
pipe_semaphore_wait(&rast->tasks[i].work_done);
}
}
#endif
lp_rast_end( rast );
@ -570,6 +598,87 @@ lp_rasterize_bins( struct lp_rasterizer *rast,
}
/**
* This is the thread's main entrypoint.
* It's a simple loop:
* 1. wait for work
* 2. do work
* 3. signal that we're done
*/
static void *
thread_func( void *init_data )
{
struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
struct lp_rasterizer *rast = task->rast;
int debug = 0;
while (1) {
/* wait for work */
if (debug)
debug_printf("thread %d waiting for work\n", task->thread_index);
pipe_semaphore_wait(&task->work_ready);
/* do work */
if (debug)
debug_printf("thread %d doing work\n", task->thread_index);
rasterize_bins(rast, task->thread_index,
rast->bins, rast->fb, rast->write_depth);
/* signal done with work */
if (debug)
debug_printf("thread %d done working\n", task->thread_index);
pipe_semaphore_signal(&task->work_done);
}
return NULL;
}
/**
* Initialize semaphores and spawn the threads.
*/
static void
create_rast_threads(struct lp_rasterizer *rast)
{
unsigned i;
rast->num_threads = debug_get_num_option("LP_NUM_THREADS", MAX_THREADS);
rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
/* NOTE: if num_threads is zero, we won't use any threads */
for (i = 0; i < rast->num_threads; i++) {
pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
pipe_semaphore_init(&rast->tasks[i].work_done, 0);
rast->threads[i] = pipe_thread_create(thread_func,
(void *) &rast->tasks[i]);
}
}
struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen )
{
struct lp_rasterizer *rast;
unsigned i;
rast = CALLOC_STRUCT(lp_rasterizer);
if(!rast)
return NULL;
rast->screen = screen;
for (i = 0; i < Elements(rast->tasks); i++) {
rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
rast->tasks[i].rast = rast;
rast->tasks[i].thread_index = i;
}
create_rast_threads(rast);
return rast;
}
/* Shutdown:
*/

View file

@ -28,6 +28,7 @@
#ifndef LP_RAST_PRIV_H
#define LP_RAST_PRIV_H
#include "pipe/p_thread.h"
#include "lp_rast.h"
@ -36,6 +37,7 @@
struct pipe_transfer;
struct pipe_screen;
struct lp_rasterizer;
/**
@ -69,6 +71,15 @@ struct lp_rasterizer_task
} blocks[256];
const struct lp_rast_state *current_state;
/** "back" pointer */
struct lp_rasterizer *rast;
/** "my" index */
unsigned thread_index;
pipe_semaphore work_ready;
pipe_semaphore work_done;
};
@ -104,6 +115,13 @@ struct lp_rasterizer
/** A task object for each rasterization thread */
struct lp_rasterizer_task tasks[MAX_THREADS];
unsigned num_threads;
pipe_thread threads[MAX_THREADS];
struct lp_bins *bins;
const struct pipe_framebuffer_state *fb;
boolean write_depth;
};