Cell: checkpoint commit: always inline prim indexes into batch buffer

Also, explicit release-vertex-buffer command.
Lots of debug/stale code still in place...
This commit is contained in:
Brian 2008-01-28 10:00:27 -07:00 committed by Ben Skeggs
parent 4f0906a18a
commit 9abbaacea6
4 changed files with 171 additions and 66 deletions

View file

@ -75,6 +75,7 @@
#define CELL_CMD_FINISH 3
#define CELL_CMD_RENDER 4
#define CELL_CMD_BATCH 5
#define CELL_CMD_RELEASE_VERTS 6
#define CELL_CMD_STATE_FRAMEBUFFER 10
#define CELL_CMD_STATE_DEPTH_STENCIL 11
#define CELL_CMD_STATE_SAMPLER 12
@ -124,7 +125,11 @@ struct cell_command_render
uint vertex_size; /**< bytes per vertex */
uint dummy; /* XXX this dummy field works around a compiler bug */
uint num_indexes;
#if 0
const void *vertex_data;
#else
uint vertex_buf; /**< which cell->buffer[] contains the vertex data */
#endif
const ushort *index_data;
float xmin, ymin, xmax, ymax;
boolean inline_indexes;
@ -132,6 +137,13 @@ struct cell_command_render
} ALIGN16_ATTRIB;
struct cell_command_release_verts
{
int opcode; /**< CELL_CMD_RELEASE_VERTS */
uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
};
/** XXX unions don't seem to work */
struct cell_command
{

View file

@ -40,8 +40,8 @@
/** Allow prim indexes, verts to be inlined after RENDER command */
#define ALLOW_INLINE_INDEXES 1
#define ALLOW_INLINE_VERTS 1
#define ALLOW_INLINE_INDEXES 01
#define ALLOW_INLINE_VERTS 0
/**
@ -55,6 +55,9 @@ struct cell_vbuf_render
uint prim;
uint vertex_size;
void *vertex_buffer;
#if 1
uint vertex_buf;
#endif
};
@ -81,13 +84,52 @@ cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
/*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
#if 0
assert(!cvbr->vertex_buffer);
cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16);
#else
assert(cvbr->vertex_buf == ~0);
cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
printf("%s vertex_buf = %u\n", __FUNCTION__, cvbr->vertex_buf);
#endif
cvbr->vertex_size = vertex_size;
return cvbr->vertex_buffer;
}
static void
cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
unsigned vertex_size, unsigned vertices_used)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
struct cell_context *cell = cvbr->cell;
/*printf("Free verts %u * %u\n", vertex_size, vertices_used);*/
#if 0
align_free(vertices);
#else
printf("%s vertex_buf = %u count = %u\n",
__FUNCTION__, cvbr->vertex_buf, vertices_used);
{
struct cell_command_release_verts *release
= (struct cell_command_release_verts *)
cell_batch_alloc(cell, sizeof(struct cell_command_release_verts));
release->opcode = CELL_CMD_RELEASE_VERTS;
release->vertex_buf = cvbr->vertex_buf;
}
cvbr->vertex_buf = ~0;
cell_flush_int(&cell->pipe, 0x0);/*NEW*/
#endif
assert(vertices == cvbr->vertex_buffer);
cvbr->vertex_buffer = NULL;
}
static void
cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
@ -124,7 +166,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]);
}
printf("\n");
#elif 0
#elif 01
printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n",
nr_indices, nr_vertices,
indices[0], indices[1], indices[2]);
@ -157,28 +199,26 @@ cell_vbuf_draw(struct vbuf_render *vbr,
const uint index_bytes = ROUNDUP4(nr_indices * 2);
const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size;
const uint batch_size = sizeof(struct cell_command_render)
+ index_bytes;
struct cell_command_render *render
= (struct cell_command_render *)
cell_batch_alloc(cell, sizeof(*render));
cell_batch_alloc(cell, batch_size);
render->opcode = CELL_CMD_RENDER;
render->prim_type = cvbr->prim;
render->num_indexes = nr_indices;
if (ALLOW_INLINE_INDEXES &&
index_bytes <= cell_batch_free_space(cell)) {
/* indices inlined, right after render cmd */
void *dst = cell_batch_alloc(cell, index_bytes);
memcpy(dst, indices, nr_indices * 2);
render->inline_indexes = TRUE;
render->index_data = NULL;
}
else {
/* indices in separate buffer */
render->inline_indexes = FALSE;
render->index_data = indices;
ASSERT_ALIGN16(render->index_data);
}
/* append indices after render command */
memcpy(render + 1, indices, nr_indices * 2);
render->inline_indexes = TRUE;
render->index_data = NULL;
/* if there's room, append vertices after the indices, else leave
* vertices in the original/separate buffer.
*/
render->vertex_size = 4 * cell->vertex_info.size;
render->num_verts = nr_vertices;
if (ALLOW_INLINE_VERTS &&
@ -188,12 +228,21 @@ cell_vbuf_draw(struct vbuf_render *vbr,
void *dst = cell_batch_alloc(cell, vertex_bytes);
memcpy(dst, vertices, vertex_bytes);
render->inline_verts = TRUE;
#if 0
render->vertex_data = NULL;
#else
render->vertex_buf = ~0;
#endif
}
else {
render->inline_verts = FALSE;
#if 0
render->vertex_data = vertices;
ASSERT_ALIGN16(render->vertex_data);
#else
ASSERT(cvbr->vertex_buf >= 0);
render->vertex_buf = cvbr->vertex_buf;
#endif
}
@ -203,27 +252,13 @@ cell_vbuf_draw(struct vbuf_render *vbr,
render->ymax = ymax;
}
#if 01
#if 0
/* XXX this is temporary */
cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT);
#endif
}
static void
cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
unsigned vertex_size, unsigned vertices_used)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
/*printf("Free verts %u * %u\n", vertex_size, vertices_used);*/
align_free(vertices);
assert(vertices == cvbr->vertex_buffer);
cvbr->vertex_buffer = NULL;
}
static void
cell_vbuf_destroy(struct vbuf_render *vbr)
{
@ -244,8 +279,17 @@ cell_init_vbuf(struct cell_context *cell)
cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render);
#if 0
cell->vbuf_render->base.max_indices = CELL_MAX_VBUF_INDEXES;
cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_MAX_VBUF_SIZE;
#else
cell->vbuf_render->base.max_indices
= (CELL_BUFFER_SIZE
- sizeof(struct cell_command_render)
- sizeof(struct cell_command_release_verts))
/ sizeof(ushort);
cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE;
#endif
cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
@ -255,6 +299,9 @@ cell_init_vbuf(struct cell_context *cell)
cell->vbuf_render->base.destroy = cell_vbuf_destroy;
cell->vbuf_render->cell = cell;
#if 1
cell->vbuf_render->vertex_buf = ~0;
#endif
cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base);
}

View file

@ -69,6 +69,32 @@ wait_on_mask_all(unsigned tagMask)
}
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
* This is done by writting a 16-byte batch-buffer-status block back into
* main memory (in cell_context->buffer_status[]).
*/
static void
release_buffer(uint buffer)
{
/* Evidently, using less than a 16-byte status doesn't work reliably */
static const uint status[4] ALIGN16_ATTRIB
= {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
uint *dst = spu.init.buffer_status + index;
ASSERT(buffer < CELL_NUM_BUFFERS);
mfc_put((void *) &status, /* src in local memory */
(unsigned int) dst, /* dst in main memory */
sizeof(status), /* size */
TAG_MISC, /* tag is unimportant */
0, /* tid */
0 /* rid */);
}
/**
* For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
@ -237,13 +263,18 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
printf(" bound: %g, %g .. %g, %g\n",
render->xmin, render->ymin, render->xmax, render->ymax);
*/
/*
printf("SPU %u: indices at %p vertices at %p\n",
spu.init.id,
render->index_data, render->vertex_data);
*/
}
ASSERT(sizeof(*render) % 4 == 0);
#if 0
ASSERT_ALIGN16(render->vertex_data);
#else
#endif
ASSERT_ALIGN16(render->index_data);
@ -251,10 +282,18 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
** Get vertex, index buffers if not inlined
**/
if (!render->inline_verts) {
void *src;
ASSERT(total_vertex_bytes % 16 == 0);
#if 0
src = render->vertex_data;
#else
spu.cur_vertex_buf = render->vertex_buf;
src = spu.init.buffers[render->vertex_buf];
#endif
mfc_get(vertex_data, /* dest */
(unsigned int) render->vertex_data, /* src */
(unsigned int) src,
total_vertex_bytes, /* size */
TAG_VERTEX_BUFFER,
0, /* tid */
@ -298,6 +337,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
/* vertices are after indexes, if inlined */
vertices = (const ubyte *) (render + 1) + *pos_incr * 4;
*pos_incr = *pos_incr + total_vertex_bytes / 4;
spu.cur_vertex_buf = ~0;
}
}
@ -310,6 +350,12 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
mask |= (1 << TAG_INDEX_BUFFER);
wait_on_mask_all(mask);
#if 0
if (!render->inline_verts) {
printf("SPU %u: release vbuf %u\n", spu.init.id, render->vertex_buf);
release_buffer(render->vertex_buf);
}
#endif
/**
** find tiles which intersect the prim bounding box
@ -359,6 +405,14 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
for (j = 0; j < render->num_indexes; j += 3) {
const float *v0, *v1, *v2;
if (indexes[j] == 0xffff) {
printf("index[%u] = 0xffff\n", j);
}
ASSERT(indexes[j] != 0xffff);
ASSERT(indexes[j+1] != 0xffff);
ASSERT(indexes[j+2] != 0xffff);
v0 = (const float *) (vertices + indexes[j+0] * vertex_size);
v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
@ -391,6 +445,17 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
}
static void
cmd_release_verts(const struct cell_command_release_verts *release)
{
if (Debug)
printf("SPU %u: RELEASE VERTS %u\n",
spu.init.id, spu.cur_vertex_buf);
ASSERT(spu.cur_vertex_buf == release->vertex_buf);
release_buffer(release->vertex_buf);
}
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
@ -472,38 +537,6 @@ cmd_finish(void)
}
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
* This is done by writting a 16-byte batch-buffer-status block back into
* main memory (in cell_context->buffer_status[]).
*/
static void
release_buffer(uint buffer)
{
/* Evidently, using less than a 16-byte status doesn't work reliably */
static const uint status[4] ALIGN16_ATTRIB
= {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
uint *dst = spu.init.buffer_status + index;
ASSERT(buffer < CELL_NUM_BUFFERS);
/*
printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n",
spu.init.id, buffer, index, dst);
*/
mfc_put((void *) &status, /* src in local memory */
(unsigned int) dst, /* dst in main memory */
sizeof(status), /* size */
TAG_MISC, /* tag is unimportant */
0, /* tid */
0 /* rid */);
}
/**
* Execute a batch of commands
* The opcode param encodes the location of the buffer and its size.
@ -538,6 +571,8 @@ cmd_batch(uint opcode)
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* Tell PPU we're done copying the buffer to local store */
if (Debug)
printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
release_buffer(buf);
for (pos = 0; pos < usize; /* no incr */) {
@ -567,6 +602,15 @@ cmd_batch(uint opcode)
pos += sizeof(*render) / 4 + pos_incr;
}
break;
case CELL_CMD_RELEASE_VERTS:
{
struct cell_command_release_verts *release
= (struct cell_command_release_verts *) &buffer[pos];
cmd_release_verts(release);
ASSERT(sizeof(*release) == 8);
pos += sizeof(*release) / 4;
}
break;
case CELL_CMD_FINISH:
cmd_finish();
pos += 1;

View file

@ -65,6 +65,8 @@ struct spu_global
/* XXX more state to come */
uint cur_vertex_buf;
} ALIGN16_ATTRIB;