Cell: improve "finished copying batch buffer" signalling.

When the SPU is done copying a batch buffer to local store, use an mfc_put()
to write a "done" message back to the buffer status array in main memory.
We were previously using a mailbox message for synchronization.
This commit is contained in:
Brian 2008-01-19 12:04:06 -07:00
parent 06b019d16b
commit a1f4a5e802
7 changed files with 103 additions and 22 deletions

View file

@ -62,10 +62,12 @@
#define CELL_CMD_STATE_DEPTH_STENCIL 7
#define CELL_NUM_BATCH_BUFFERS 2
#define CELL_NUM_BATCH_BUFFERS 3
#define CELL_BATCH_BUFFER_SIZE 1024 /**< 16KB would be the max */
#define CELL_BATCH_FINISHED 0x1234 /**< mbox message */
#define CELL_BUFFER_STATUS_FREE 10
#define CELL_BUFFER_STATUS_USED 20
/**
@ -122,6 +124,7 @@ struct cell_init_info
unsigned num_spus;
struct cell_command *cmd;
ubyte *batch_buffers[CELL_NUM_BATCH_BUFFERS];
uint *buffer_status; /**< points at cell_context->buffer_status */
} ALIGN16_ATTRIB;

View file

@ -34,9 +34,9 @@
void
cell_batch_flush(struct cell_context *cell)
{
const uint batch = cell->cur_batch;
uint batch = cell->cur_batch;
const uint size = cell->batch_buffer_size[batch];
uint i, cmd_word;
uint spu, cmd_word;
if (size == 0)
return;
@ -48,25 +48,44 @@ cell_batch_flush(struct cell_context *cell)
batch, &cell->batch_buffer[batch][0], size);
*/
/*
* Build "BATCH" command and sent to all SPUs.
*/
cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
for (i = 0; i < cell->num_spus; i++) {
send_mbox_message(cell_global.spe_contexts[i], cmd_word);
for (spu = 0; spu < cell->num_spus; spu++) {
assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED);
send_mbox_message(cell_global.spe_contexts[spu], cmd_word);
}
/* XXX wait for the DMX xfer to finish.
* Using mailboxes here is temporary.
* Ideally, we want to use a PPE-side DMA status check function...
/* When the SPUs are done copying the buffer into their locals stores
* they'll write a BUFFER_STATUS_FREE message into the buffer_status[]
* array indicating that the PPU can re-use the buffer.
*/
for (i = 0; i < cell->num_spus; i++) {
uint k = wait_mbox_message(cell_global.spe_contexts[i]);
assert(k == CELL_BATCH_FINISHED);
/* Find a buffer that's marked as free by all SPUs */
while (1) {
uint num_free = 0;
batch = (batch + 1) % CELL_NUM_BATCH_BUFFERS;
for (spu = 0; spu < cell->num_spus; spu++) {
if (cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_FREE)
num_free++;
}
if (num_free == cell->num_spus) {
/* found a free buffer, now mark status as used */
for (spu = 0; spu < cell->num_spus; spu++) {
cell->buffer_status[spu][batch][0] = CELL_BUFFER_STATUS_USED;
}
break;
}
}
/* next buffer */
cell->cur_batch = (batch + 1) % CELL_NUM_BATCH_BUFFERS;
cell->batch_buffer_size[cell->cur_batch] = 0; /* empty */
cell->batch_buffer_size[batch] = 0; /* empty */
cell->cur_batch = batch;
}

View file

@ -160,7 +160,7 @@ struct pipe_context *
cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws)
{
struct cell_context *cell;
uint i;
uint spu, buf;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
@ -248,13 +248,30 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws)
cell_start_spus(cell);
for (i = 0; i < CELL_NUM_BATCH_BUFFERS; i++) {
cell->batch_buffer_size[i] = 0;
for (buf = 0; buf < CELL_NUM_BATCH_BUFFERS; buf++) {
cell->batch_buffer_size[buf] = 0;
/* init batch buffer status values,
* mark 0th buffer as used, rest as free.
*/
for (spu = 0; spu < cell->num_spus; spu++) {
if (buf == 0)
cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
else
cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
}
}
#if 0
test_spus(cell);
#endif
return &cell->pipe;
}
#if 0
/** [4] to ensure 16-byte alignment for each status word */
uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BATCH_BUFFERS][4] ALIGN16_ATTRIB;
#endif

View file

@ -38,6 +38,9 @@
#include "pipe/cell/common.h"
#define CELL_MAX_SPUS 6
struct cell_vbuf_render;
struct cell_vertex_shader_state
@ -103,10 +106,14 @@ struct cell_context
ubyte batch_buffer[CELL_NUM_BATCH_BUFFERS][CELL_BATCH_BUFFER_SIZE] ALIGN16_ATTRIB;
int cur_batch; /**< which batch buffer is being filled */
/** [4] to ensure 16-byte alignment for each status word */
uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BATCH_BUFFERS][4] ALIGN16_ATTRIB;
};
static INLINE struct cell_context *
cell_context(struct pipe_context *pipe)
{

View file

@ -114,6 +114,7 @@ cell_start_spus(struct cell_context *cell)
for (j = 0; j < CELL_NUM_BATCH_BUFFERS; j++) {
cell_global.inits[i].batch_buffers[j] = cell->batch_buffer[j];
}
cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
cell_global.spe_contexts[i] = spe_context_create(0, NULL);
if (!cell_global.spe_contexts[i]) {

View file

@ -395,6 +395,38 @@ cmd_finish(void)
}
/**
* Tell the PPU that this SPU has finished copying a batch buffer to
* local store and that it may be reused by the PPU.
* This is done by writting a 16-byte batch-buffer-status block back into
* main memory (in cell_contex->buffer_status[]).
*/
static void
release_batch_buffer(uint buffer)
{
/* Evidently, using less than a 16-byte status doesn't work reliably */
static const uint status[4] ALIGN16_ATTRIB
= {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
const uint index = 4 * (spu.init.id * CELL_NUM_BATCH_BUFFERS + buffer);
uint *dst = spu.init.buffer_status + index;
ASSERT(buffer < CELL_NUM_BATCH_BUFFERS);
/*
printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n",
spu.init.id, buffer, index, dst);
*/
mfc_put((void *) &status, /* src in local memory */
(unsigned int) dst, /* dst in main memory */
sizeof(status), /* size */
TAG_MISC, /* tag is unimportant */
0, /* tid */
0 /* rid */);
}
/**
* Execute a batch of commands
* The opcode param encodes the location of the buffer and its size.
@ -429,9 +461,9 @@ cmd_batch(uint opcode)
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* send mbox message to indicate DMA completed */
/* XXX temporary */
spu_write_out_mbox(CELL_BATCH_FINISHED);
/* Tell PPU we're done copying the buffer to local store */
release_batch_buffer(buf);
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {

View file

@ -76,6 +76,8 @@ extern struct spu_global spu;
#define TAG_WRITE_TILE_Z 15
#define TAG_INDEX_BUFFER 16
#define TAG_BATCH_BUFFER 17
#define TAG_MISC 18
/** The standard assert macro doesn't seem to work on SPUs */
#define ASSERT(x) \