freedreno/a6xx: Let the GPU track streamout offsets

The GPU writes out streamout offsets as it goes to the FLUSH_BASE
pointer.  We use that value with CP_MEM_TO_REG when appending to the
stream so that we don't have to track the offsets with the CPU in the
driver.  This ensures that streamout continues to work once we enable
geometry and tessellation shader stages that add geometry.

Reviewed-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Kristian H. Kristensen 2019-09-05 15:07:55 -07:00
parent de1c89fd93
commit 1acf8d2354
4 changed files with 35 additions and 18 deletions

View file

@ -107,13 +107,19 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
struct fd6_control {
uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */
uint32_t _pad0;
uint32_t flush_base; /* dummy address for VPC_SO[i].FLUSH_BASE_LO/HI */
volatile uint32_t vsc_overflow;
uint32_t _pad1;
/* flag set from cmdstream when VSC overflow detected: */
volatile uint32_t vsc_overflow;
uint32_t _pad2;
uint32_t vsc_scratch;
uint32_t _pad2;
uint32_t _pad3;
uint32_t _pad4;
/* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
struct {
uint32_t offset;
uint32_t pad[7];
} flush_base[4];
};
#define control_ptr(fd6_ctx, member) \

View file

@ -712,21 +712,27 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3
if (!target)
continue;
unsigned offset = (so->offsets[i] * info->stride[i] * 4) +
target->buffer_offset;
OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(i), 3);
/* VPC_SO[i].BUFFER_BASE_LO: */
OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0);
OUT_RING(ring, target->buffer_size + offset);
OUT_RELOCW(ring, fd_resource(target->buffer)->bo, target->buffer_offset, 0, 0);
OUT_RING(ring, target->buffer_size - target->buffer_offset);
OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 3);
OUT_RING(ring, offset);
/* VPC_SO[i].FLUSH_BASE_LO/HI: */
// TODO just give hw a dummy addr for now.. we should
// be using this an then CP_MEM_TO_REG to set the
// VPC_SO[i].BUFFER_OFFSET for the next draw..
OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base));
if (so->reset & (1 << i)) {
unsigned offset = (so->offsets[i] * info->stride[i] * 4);
OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);
OUT_RING(ring, offset);
} else {
OUT_PKT7(ring, CP_MEM_TO_REG, 3);
OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
CP_MEM_TO_REG_0_64B | CP_MEM_TO_REG_0_ACCUMULATE |
CP_MEM_TO_REG_0_CNT(1 - 1));
OUT_RELOC(ring, control_ptr(fd6_context(ctx), flush_base[i].offset));
}
OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE_LO(i), 2);
OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base[i]));
so->reset &= ~(1 << i);
emit->streamout_mask |= (1 << i);
}

View file

@ -86,6 +86,9 @@ struct fd_vertex_stateobj {
struct fd_streamout_stateobj {
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
/* Bitmask of stream that should be reset. */
unsigned reset;
unsigned num_targets;
/* Track offset from vtxcnt for streamout data. This counter
* is just incremented by # of vertices on each draw until

View file

@ -499,12 +499,14 @@ fd_set_stream_output_targets(struct pipe_context *pctx,
for (i = 0; i < num_targets; i++) {
boolean changed = targets[i] != so->targets[i];
boolean append = (offsets[i] == (unsigned)-1);
boolean reset = (offsets[i] != (unsigned)-1);
if (!changed && append)
so->reset |= (reset << i);
if (!changed && !reset)
continue;
if (!append)
if (reset)
so->offsets[i] = offsets[i];
pipe_so_target_reference(&so->targets[i], targets[i]);