r600g: implement transform feedback

r600: DONE.
r700: MOSTLY (done but locks up).
Evergreen: MOSTLY (done but doesn't work for an unknown reason).

The kernel support will come soon.
This commit is contained in:
Marek Olšák 2011-11-08 21:58:27 +01:00
parent 047363a07b
commit 543b2331d7
18 changed files with 873 additions and 13 deletions

View file

@ -73,6 +73,35 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
id++;
break;
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3:
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
cf->output.inst |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
id++;
break;
case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:

View file

@ -1241,3 +1241,38 @@ void evergreen_context_flush_dest_caches(struct r600_context *ctx)
ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
}
void evergreen_flush_vgt_streamout(struct r600_context *ctx)
{
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_0084FC_CP_STRMOUT_CNTL - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
ctx->pm4[ctx->pm4_cdwords++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
ctx->pm4[ctx->pm4_cdwords++] = R_0084FC_CP_STRMOUT_CNTL >> 2; /* register */
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
ctx->pm4[ctx->pm4_cdwords++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
ctx->pm4[ctx->pm4_cdwords++] = 4; /* poll interval */
}
void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
{
if (buffer_enable_bit) {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028B94_VGT_STRMOUT_CONFIG - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = S_028B94_STREAMOUT_0_EN(1);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
} else {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028B94_VGT_STRMOUT_CONFIG - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = S_028B94_STREAMOUT_0_EN(0);
}
}

View file

@ -927,8 +927,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
state->fill_back != PIPE_POLYGON_MODE_FILL);
r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL,
S_028814_PROVOKING_VTX_LAST(prov_vtx) |
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
S_028814_FACE(!state->front_ccw) |
S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
@ -1688,6 +1688,9 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
rctx->context.texture_barrier = evergreen_texture_barrier;
rctx->context.create_stream_output_target = r600_create_so_target;
rctx->context.stream_output_target_destroy = r600_so_target_destroy;
rctx->context.set_stream_output_targets = r600_set_so_targets;
}
static void cayman_init_config(struct r600_pipe_context *rctx)

View file

@ -50,6 +50,8 @@
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_ZPASS_DONE 0x15
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
/* 0 - any non-TS event
@ -82,6 +84,7 @@
#define PKT3_MEM_SEMAPHORE 0x39
#define PKT3_MPEG_INDEX 0x3A
#define PKT3_WAIT_REG_MEM 0x3C
#define WAIT_REG_MEM_EQUAL 3
#define PKT3_MEM_WRITE 0x3D
#define PKT3_INDIRECT_BUFFER 0x32
#define PKT3_CP_INTERRUPT 0x40
@ -118,6 +121,12 @@
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate))
/* Registers */
#define R_0084FC_CP_STRMOUT_CNTL 0x000084FC
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
#define R_008960_VGT_STRMOUT_BUFFER_FILLED_SIZE_0 0x008960 /* read-only */
#define R_008964_VGT_STRMOUT_BUFFER_FILLED_SIZE_1 0x008964 /* read-only */
#define R_008968_VGT_STRMOUT_BUFFER_FILLED_SIZE_2 0x008968 /* read-only */
#define R_00896C_VGT_STRMOUT_BUFFER_FILLED_SIZE_3 0x00896C /* read-only */
#define R_008C00_SQ_CONFIG 0x00008C00
#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0)
#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1)
@ -1723,6 +1732,33 @@
#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x00028AC0
#define R_028AC4_DB_SRESULTS_COMPARE_STATE1 0x00028AC4
#define R_028AC8_DB_PRELOAD_CONTROL 0x00028AC8
#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0
#define R_028AD4_VGT_STRMOUT_VTX_STRIDE_0 0x028AD4
#define R_028AD8_VGT_STRMOUT_BUFFER_BASE_0 0x028AD8
#define R_028ADC_VGT_STRMOUT_BUFFER_OFFSET_0 0x028ADC
#define R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1 0x028AE0
#define R_028AE4_VGT_STRMOUT_VTX_STRIDE_1 0x028AE4
#define R_028AE8_VGT_STRMOUT_BUFFER_BASE_1 0x028AE8
#define R_028AEC_VGT_STRMOUT_BUFFER_OFFSET_1 0x028AEC
#define R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2 0x028AF0
#define R_028AF4_VGT_STRMOUT_VTX_STRIDE_2 0x028AF4
#define R_028AF8_VGT_STRMOUT_BUFFER_BASE_2 0x028AF8
#define R_028AFC_VGT_STRMOUT_BUFFER_OFFSET_2 0x028AFC
#define R_028B00_VGT_STRMOUT_BUFFER_SIZE_3 0x028B00
#define R_028B04_VGT_STRMOUT_VTX_STRIDE_3 0x028B04
#define R_028B08_VGT_STRMOUT_BUFFER_BASE_3 0x028B08
#define R_028B0C_VGT_STRMOUT_BUFFER_OFFSET_3 0x028B0C
#define R_028B10_VGT_STRMOUT_BASE_OFFSET_0 0x028B10
#define R_028B14_VGT_STRMOUT_BASE_OFFSET_1 0x028B14
#define R_028B18_VGT_STRMOUT_BASE_OFFSET_2 0x028B18
#define R_028B1C_VGT_STRMOUT_BASE_OFFSET_3 0x028B1C
#define R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET 0x028B28
#define R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE 0x028B2C
#define R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE 0x028B30
#define R_028B44_VGT_STRMOUT_BASE_OFFSET_HI_0 0x028B44
#define R_028B48_VGT_STRMOUT_BASE_OFFSET_HI_1 0x028B48
#define R_028B4C_VGT_STRMOUT_BASE_OFFSET_HI_2 0x028B4C
#define R_028B50_VGT_STRMOUT_BASE_OFFSET_HI_3 0x028B50
#define R_028B54_VGT_SHADER_STAGES_EN 0x00028B54
#define R_028B70_DB_ALPHA_TO_MASK 0x00028B70
#define R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x00028B78
@ -1750,7 +1786,16 @@
#define G_028B8C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF)
#define C_028B8C_OFFSET 0x00000000
#define R_028B94_VGT_STRMOUT_CONFIG 0x00028B94
#define S_028B94_STREAMOUT_0_EN(x) (((x) & 0x1) << 0)
#define S_028B94_STREAMOUT_1_EN(x) (((x) & 0x1) << 1)
#define S_028B94_STREAMOUT_2_EN(x) (((x) & 0x1) << 2)
#define S_028B94_STREAMOUT_3_EN(x) (((x) & 0x1) << 3)
#define S_028B94_RAST_STREAM(x) (((x) & 0x7) << 4)
#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x00028B98
#define S_028B98_STREAM_0_BUFFER_EN(x) (((x) & 0xf) << 0)
#define S_028B98_STREAM_1_BUFFER_EN(x) (((x) & 0xf) << 4)
#define S_028B98_STREAM_2_BUFFER_EN(x) (((x) & 0xf) << 8)
#define S_028B98_STREAM_3_BUFFER_EN(x) (((x) & 0xf) << 12)
#define R_028C00_PA_SC_LINE_CNTL 0x00028C00
#define R_028C04_PA_SC_AA_CONFIG 0x00028C04
#define R_028C08_PA_SU_VTX_CNTL 0x00028C08

View file

@ -167,6 +167,7 @@ struct r600_query {
union {
uint64_t u64;
boolean b;
struct pipe_query_data_so_statistics so;
} result;
/* The kind of query */
unsigned type;
@ -187,6 +188,15 @@ struct r600_query {
struct list_head list;
};
struct r600_so_target {
struct pipe_stream_output_target b;
/* The buffer where BUFFER_FILLED_SIZE is stored. */
struct r600_resource *filled_size;
unsigned stride;
unsigned so_index;
};
#define R600_CONTEXT_DRAW_PENDING (1 << 0)
#define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1)
#define R600_CONTEXT_CHECK_EVENT_FLUSH (1 << 2)
@ -218,6 +228,7 @@ struct r600_context {
/* The list of active queries. Only one query of each type can be active. */
struct list_head active_query_list;
unsigned num_cs_dw_queries_suspend;
unsigned num_cs_dw_streamout_end;
unsigned backend_mask;
unsigned max_db; /* for OQ */
@ -229,6 +240,12 @@ struct r600_context {
struct r600_range fs_resources;
int num_ps_resources, num_vs_resources, num_fs_resources;
boolean have_depth_texture, have_depth_fb;
unsigned num_so_targets;
struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
boolean streamout_start;
unsigned streamout_append_bitmask;
unsigned *vs_shader_so_strides;
};
struct r600_draw {
@ -268,6 +285,10 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags);
void r600_context_flush_dest_caches(struct r600_context *ctx);
void r600_context_streamout_begin(struct r600_context *ctx);
void r600_context_streamout_end(struct r600_context *ctx);
void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t);
int evergreen_context_init(struct r600_context *ctx, struct r600_screen *screen);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
void evergreen_context_flush_dest_caches(struct r600_context *ctx);

View file

@ -1672,6 +1672,21 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
cf->output.inst |
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3:
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
cf->output.inst |
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
break;
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
@ -1730,6 +1745,22 @@ int r600_bytecode_build(struct r600_bytecode *bc)
case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
@ -1760,6 +1791,10 @@ int r600_bytecode_build(struct r600_bytecode *bc)
case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3:
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
@ -1849,6 +1884,22 @@ int r600_bytecode_build(struct r600_bytecode *bc)
break;
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
@ -1923,6 +1974,10 @@ int r600_bytecode_build(struct r600_bytecode *bc)
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
@ -2057,6 +2112,44 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
break;
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3:
fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i_BUF%i ", id, bc->bytecode[id],
(EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) -
EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) / 4,
(EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) -
EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) % 4);
fprintf(stderr, "GPR:%X ", cf->output.gpr);
fprintf(stderr, "ELEM_SIZE:%i ", cf->output.elem_size);
fprintf(stderr, "ARRAY_BASE:%i ", cf->output.array_base);
fprintf(stderr, "TYPE:%X\n", cf->output.type);
id++;
fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i_BUF%i ", id, bc->bytecode[id],
(EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) -
EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) / 4,
(EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) -
EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) % 4);
fprintf(stderr, "ARRAY_SIZE:%i ", cf->output.array_size);
fprintf(stderr, "COMP_MASK:%X ", cf->output.comp_mask);
fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
fprintf(stderr, "INST:%d ", cf->output.inst);
fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
break;
case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
@ -2125,6 +2218,28 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3:
fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i ", id, bc->bytecode[id],
R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) -
R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0));
fprintf(stderr, "GPR:%X ", cf->output.gpr);
fprintf(stderr, "ELEM_SIZE:%i ", cf->output.elem_size);
fprintf(stderr, "ARRAY_BASE:%i ", cf->output.array_base);
fprintf(stderr, "TYPE:%X\n", cf->output.type);
id++;
fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i ", id, bc->bytecode[id],
R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) -
R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0));
fprintf(stderr, "ARRAY_SIZE:%i ", cf->output.array_size);
fprintf(stderr, "COMP_MASK:%X ", cf->output.comp_mask);
fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
fprintf(stderr, "INST:%d ", cf->output.inst);
fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
break;
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:

View file

@ -107,6 +107,8 @@ struct r600_bytecode_vtx {
struct r600_bytecode_output {
unsigned array_base;
unsigned array_size;
unsigned comp_mask;
unsigned type;
unsigned end_of_program;

View file

@ -65,6 +65,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
util_blitter_save_vertex_buffers(rctx->blitter,
rctx->vbuf_mgr->nr_vertex_buffers,
rctx->vbuf_mgr->vertex_buffer);
util_blitter_save_so_targets(rctx->blitter, rctx->ctx.num_so_targets,
(struct pipe_stream_output_target**)rctx->ctx.so_targets);
if (op & R600_SAVE_FRAMEBUFFER)
util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);

View file

@ -943,6 +943,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
/* Count in queries_suspend. */
num_dw += ctx->num_cs_dw_queries_suspend;
/* Count in streamout_end at the end of CS. */
num_dw += ctx->num_cs_dw_streamout_end;
/* Count in render_condition(NULL) at the end of CS. */
if (ctx->predicate_drawing) {
num_dw += 3;
@ -1471,6 +1474,12 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
r600_context_block_resource_emit_dirty(ctx, dirty_block);
}
/* Enable stream out if needed. */
if (ctx->streamout_start) {
r600_context_streamout_begin(ctx);
ctx->streamout_start = FALSE;
}
/* draw packet */
pm4 = &ctx->pm4[ctx->pm4_cdwords];
@ -1503,6 +1512,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
{
struct r600_block *enable_block = NULL;
bool queries_suspended = false;
bool streamout_suspended = false;
if (ctx->pm4_cdwords == ctx->init_dwords)
return;
@ -1513,6 +1523,11 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
queries_suspended = true;
}
if (ctx->num_cs_dw_streamout_end) {
r600_context_streamout_end(ctx);
streamout_suspended = true;
}
if (ctx->screen->chip_class >= EVERGREEN)
evergreen_context_flush_dest_caches(ctx);
else
@ -1542,6 +1557,11 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
r600_init_cs(ctx);
if (streamout_suspended) {
ctx->streamout_start = TRUE;
ctx->streamout_append_bitmask = ~0;
}
/* resume queries */
if (queries_suspended) {
r600_context_queries_resume(ctx);
@ -1636,6 +1656,44 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
}
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
/* SAMPLE_STREAMOUTSTATS stores this structure:
* {
* u64 NumPrimitivesWritten;
* u64 PrimitiveStorageNeeded;
* }
* We only need NumPrimitivesWritten here. */
while (results_base != query->results_end) {
query->result.u64 +=
r600_query_read_result(map + results_base, 2, 6, true);
results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
/* Here we read PrimitiveStorageNeeded. */
while (results_base != query->results_end) {
query->result.u64 +=
r600_query_read_result(map + results_base, 0, 4, true);
results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
}
break;
case PIPE_QUERY_SO_STATISTICS:
while (results_base != query->results_end) {
query->result.so.num_primitives_written +=
r600_query_read_result(map + results_base, 2, 6, true);
query->result.so.primitives_storage_needed +=
r600_query_read_result(map + results_base, 0, 4, true);
results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
}
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
while (results_base != query->results_end) {
query->result.b = query->result.b ||
r600_query_read_result(map + results_base, 2, 6, true) !=
r600_query_read_result(map + results_base, 0, 4, true);
results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
}
break;
default:
assert(0);
}
@ -1679,6 +1737,15 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
break;
case PIPE_QUERY_TIME_ELAPSED:
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE);
results = (u32*)((char*)results + query->results_end);
memset(results, 0, query->result_size);
ctx->ws->buffer_unmap(query->buffer->buf);
break;
default:
assert(0);
}
@ -1692,6 +1759,15 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
case PIPE_QUERY_TIME_ELAPSED:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
@ -1720,6 +1796,15 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
case PIPE_QUERY_TIME_ELAPSED:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
@ -1798,6 +1883,14 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
query->result_size = 16;
query->num_cs_dw = 8;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
query->num_cs_dw = 6;
break;
default:
assert(0);
FREE(query);
@ -1832,20 +1925,28 @@ boolean r600_context_query_result(struct r600_context *ctx,
{
boolean *result_b = (boolean*)vresult;
uint64_t *result_u64 = (uint64_t*)vresult;
struct pipe_query_data_so_statistics *result_so =
(struct pipe_query_data_so_statistics*)vresult;
if (!r600_query_result(ctx, query, wait))
return FALSE;
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
*result_u64 = query->result.u64;
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
*result_b = query->result.b;
break;
case PIPE_QUERY_TIME_ELAPSED:
*result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq;
break;
case PIPE_QUERY_SO_STATISTICS:
*result_so = query->result.so;
break;
default:
assert(0);
}
@ -1872,3 +1973,237 @@ void r600_context_queries_resume(struct r600_context *ctx)
r600_query_begin(ctx, query);
}
}
static void r600_flush_vgt_streamout(struct r600_context *ctx)
{
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_008490_CP_STRMOUT_CNTL - R600_CONFIG_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
ctx->pm4[ctx->pm4_cdwords++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
ctx->pm4[ctx->pm4_cdwords++] = R_008490_CP_STRMOUT_CNTL >> 2; /* register */
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = S_008490_OFFSET_UPDATE_DONE(1); /* reference value */
ctx->pm4[ctx->pm4_cdwords++] = S_008490_OFFSET_UPDATE_DONE(1); /* mask */
ctx->pm4[ctx->pm4_cdwords++] = 4; /* poll interval */
}
static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
{
if (buffer_enable_bit) {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = S_028AB0_STREAMOUT(1);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028B20_VGT_STRMOUT_BUFFER_EN - R600_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = buffer_enable_bit;
} else {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = S_028AB0_STREAMOUT(0);
}
}
void r600_context_streamout_begin(struct r600_context *ctx)
{
struct r600_so_target **t = ctx->so_targets;
unsigned *strides = ctx->vs_shader_so_strides;
unsigned buffer_en, i, update_flags = 0;
buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
(ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
(ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
(ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
ctx->num_cs_dw_streamout_end =
12 + /* flush_vgt_streamout */
util_bitcount(buffer_en) * 8 +
8;
r600_need_cs_space(ctx,
12 + /* flush_vgt_streamout */
6 + /* enables */
util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
(ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770 ? 2 : 0) +
ctx->num_cs_dw_streamout_end, TRUE);
if (ctx->screen->chip_class >= EVERGREEN) {
evergreen_flush_vgt_streamout(ctx);
evergreen_set_streamout_enable(ctx, buffer_en);
} else {
r600_flush_vgt_streamout(ctx);
r600_set_streamout_enable(ctx, buffer_en);
}
for (i = 0; i < ctx->num_so_targets; i++) {
if (t[i]) {
t[i]->stride = strides[i];
t[i]->so_index = i;
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
16*i - R600_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
ctx->pm4[ctx->pm4_cdwords++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* BUFFER_BASE */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] =
r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE);
if (ctx->streamout_append_bitmask & (1 << i)) {
/* Append. */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address lo */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address hi */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] =
r600_context_bo_reloc(ctx, t[i]->filled_size,
RADEON_USAGE_READ);
} else {
/* Start from the beginning. */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
}
}
}
if (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770) {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = update_flags;
}
}
void r600_context_streamout_end(struct r600_context *ctx)
{
struct r600_so_target **t = ctx->so_targets;
unsigned i, flush_flags = 0;
if (ctx->screen->chip_class >= EVERGREEN) {
evergreen_flush_vgt_streamout(ctx);
} else {
r600_flush_vgt_streamout(ctx);
}
for (i = 0; i < ctx->num_so_targets; i++) {
if (t[i]) {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* dst address lo */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* dst address hi */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] =
r600_context_bo_reloc(ctx, t[i]->filled_size,
RADEON_USAGE_WRITE);
flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
}
}
if (ctx->screen->chip_class >= EVERGREEN) {
evergreen_set_streamout_enable(ctx, 0);
} else {
r600_set_streamout_enable(ctx, 0);
}
if (ctx->screen->family < CHIP_RV770) {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
} else {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
ctx->pm4[ctx->pm4_cdwords++] = flush_flags; /* CP_COHER_CNTL */
ctx->pm4[ctx->pm4_cdwords++] = 0xffffffff; /* CP_COHER_SIZE */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* CP_COHER_BASE */
ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; /* POLL_INTERVAL */
}
ctx->num_cs_dw_streamout_end = 0;
/* XXX print some debug info */
for (i = 0; i < ctx->num_so_targets; i++) {
if (!t[i])
continue;
uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->buf, ctx->cs, RADEON_USAGE_READ);
printf("FILLED_SIZE%i: %u\n", i, *ptr);
ctx->ws->buffer_unmap(t[i]->filled_size->buf);
}
}
void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
{
r600_need_cs_space(ctx, 14 + 21, TRUE);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - R600_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = t->stride >> 2;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address lo */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address hi */
ctx->pm4[ctx->pm4_cdwords++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, t->filled_size,
RADEON_USAGE_READ);
#if 0 /* I have not found this useful yet. */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_REG | COPY_DW_DST_IS_REG;
ctx->pm4[ctx->pm4_cdwords++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* src register */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = R_0085F4_CP_COHER_SIZE >> 2; /* dst register */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_0085F0_CP_COHER_CNTL - R600_CONFIG_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = S_0085F0_SO0_DEST_BASE_ENA(1) << t->so_index;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
ctx->pm4[ctx->pm4_cdwords++] = (R_0085F8_CP_COHER_BASE - R600_CONFIG_REG_OFFSET) >> 2;
ctx->pm4[ctx->pm4_cdwords++] = t->b.buffer_offset >> 2;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, (struct r600_resource*)t->b.buffer,
RADEON_USAGE_WRITE);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
ctx->pm4[ctx->pm4_cdwords++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
ctx->pm4[ctx->pm4_cdwords++] = R_0085FC_CP_COHER_STATUS >> 2; /* register */
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = 0; /* reference value */
ctx->pm4[ctx->pm4_cdwords++] = 0xffffffff; /* mask */
ctx->pm4[ctx->pm4_cdwords++] = 4; /* poll interval */
#endif
}

View file

@ -76,6 +76,13 @@ void r600_context_reg(struct r600_context *ctx,
void r600_init_cs(struct r600_context *ctx);
int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base);
/*
* evergreen_hw_context.c
*/
void evergreen_flush_vgt_streamout(struct r600_context *ctx);
void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit);
static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_resource *rbo,
enum radeon_bo_usage usage)
{

View file

@ -378,6 +378,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
return 1;
/* Supported except the original R600. */
@ -391,17 +392,21 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return family >= CHIP_CEDAR ? 1 : 0;
/* Unsupported features. */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_ATTRIBS:
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_SCALED_RESOLVE:
return 0;
/* Stream output. */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
return debug_get_bool_option("R600_STREAMOUT", FALSE) ? 4 : 0;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_ATTRIBS:
return 16;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
return 16*4;
/* Texturing. */
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:

View file

@ -133,6 +133,8 @@ struct r600_pipe_shader {
struct r600_vertex_element vertex_elements;
struct tgsi_token *tokens;
unsigned sprite_coord_enable;
struct pipe_stream_output_info so;
unsigned so_strides[4];
};
struct r600_pipe_sampler_state {
@ -348,6 +350,19 @@ void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
struct pipe_resource *buffer);
struct pipe_stream_output_target *
r600_create_so_target(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned buffer_offset,
unsigned buffer_size);
void r600_so_target_destroy(struct pipe_context *ctx,
struct pipe_stream_output_target *target);
void r600_set_so_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_bitmask);
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
/*

View file

@ -100,7 +100,21 @@ static void r600_render_condition(struct pipe_context *ctx,
}
rctx->ctx.predicate_drawing = true;
r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
switch (rquery->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
break;
default:
assert(0);
}
}
void r600_init_query_functions(struct r600_pipe_context *rctx)

View file

@ -119,6 +119,19 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
if (dump_shaders) {
fprintf(stderr, "--------------------------------------------------------------\n");
tgsi_dump(shader->tokens, 0);
if (shader->so.num_outputs) {
unsigned i;
fprintf(stderr, "STREAMOUT\n");
for (i = 0; i < shader->so.num_outputs; i++) {
fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
shader->so.output[i].output_buffer, shader->so.output[i].register_index,
shader->so.output[i].register_mask & 1 ? "x" : "_",
(shader->so.output[i].register_mask >> 1) & 1 ? "y" : "_",
(shader->so.output[i].register_mask >> 2) & 1 ? "z" : "_",
(shader->so.output[i].register_mask >> 3) & 1 ? "w" : "_");
}
}
}
r = r600_shader_from_tgsi(rctx, shader);
if (r) {
@ -681,6 +694,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
{
struct r600_shader *shader = &pipeshader->shader;
struct tgsi_token *tokens = pipeshader->tokens;
struct pipe_stream_output_info so = pipeshader->so;
struct tgsi_full_immediate *immediate;
struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
@ -847,6 +861,93 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
}
}
/* Add stream outputs. */
if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
unsigned buffer_offset[PIPE_MAX_SO_BUFFERS] = {0};
for (i = 0; i < so.num_outputs; i++) {
struct r600_bytecode_output output;
unsigned comps;
if (so.output[i].output_buffer >= 4) {
R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
so.output[i].output_buffer);
r = -EINVAL;
goto out_err;
}
switch (so.output[i].register_mask) {
case TGSI_WRITEMASK_XYZW:
comps = 4;
break;
case TGSI_WRITEMASK_XYZ:
comps = 3;
break;
case TGSI_WRITEMASK_XY:
comps = 2;
break;
case TGSI_WRITEMASK_X:
comps = 1;
break;
default:
R600_ERR("streamout: invalid register_mask, got: %x\n",
so.output[i].register_mask);
r = -EINVAL;
goto out_err;
}
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = shader->output[so.output[i].register_index].gpr;
output.elem_size = 0;
output.array_base = buffer_offset[so.output[i].output_buffer];
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.burst_count = 1;
output.barrier = 1;
output.array_size = 0;
output.comp_mask = so.output[i].register_mask;
if (ctx.bc->chip_class >= EVERGREEN) {
switch (so.output[i].output_buffer) {
case 0:
output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
break;
case 1:
output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
break;
case 2:
output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
break;
case 3:
output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
break;
}
} else {
switch (so.output[i].output_buffer) {
case 0:
output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
break;
case 1:
output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
break;
case 2:
output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
break;
case 3:
output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
break;
}
}
r = r600_bytecode_add_output(ctx.bc, &output);
if (r)
goto out_err;
buffer_offset[so.output[i].output_buffer] += comps;
}
for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
pipeshader->so_strides[i] = buffer_offset[i] * 4;
}
}
/* export output */
j = 0;
for (i = 0, pos0 = 0; i < noutput; i++) {

View file

@ -114,6 +114,10 @@
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS 0x00000001
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM 0x00000002
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_SX 0x00000003
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE 0x00000000
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND 0x00000001
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ 0x00000002
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND 0x00000003
#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) & 0x7F) << 15)
#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) >> 15) & 0x7F)
#define C_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR 0xFFC07FFF

View file

@ -933,7 +933,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
}
static void *r600_create_rs_state(struct pipe_context *ctx,
const struct pipe_rasterizer_state *state)
const struct pipe_rasterizer_state *state)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
@ -978,8 +978,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
state->fill_back != PIPE_POLYGON_MODE_FILL);
r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL,
S_028814_PROVOKING_VTX_LAST(prov_vtx) |
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
S_028814_FACE(!state->front_ccw) |
S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
@ -1758,6 +1758,9 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
rctx->context.texture_barrier = r600_texture_barrier;
rctx->context.create_stream_output_target = r600_create_so_target;
rctx->context.stream_output_target_destroy = r600_so_target_destroy;
rctx->context.set_stream_output_targets = r600_set_so_targets;
}
void r600_adjust_gprs(struct r600_pipe_context *rctx)

View file

@ -247,10 +247,11 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
void *r600_create_shader_state(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
int r;
shader->tokens = tgsi_dup_tokens(state->tokens);
shader->so = state->stream_output;
r = r600_pipe_shader_create(ctx, shader);
if (r) {
@ -412,6 +413,71 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);
}
struct pipe_stream_output_target *
r600_create_so_target(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned buffer_offset,
unsigned buffer_size)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_so_target *t;
void *ptr;
t = CALLOC_STRUCT(r600_so_target);
if (!t) {
return NULL;
}
t->b.reference.count = 1;
t->b.context = ctx;
pipe_resource_reference(&t->b.buffer, buffer);
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
t->filled_size = (struct r600_resource*)
pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4);
ptr = rctx->ws->buffer_map(t->filled_size->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
memset(ptr, 0, t->filled_size->buf->size);
rctx->ws->buffer_unmap(t->filled_size->buf);
return &t->b;
}
void r600_so_target_destroy(struct pipe_context *ctx,
struct pipe_stream_output_target *target)
{
struct r600_so_target *t = (struct r600_so_target*)target;
pipe_resource_reference(&t->b.buffer, NULL);
pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL);
FREE(t);
}
void r600_set_so_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_bitmask)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
unsigned i;
/* Stop streamout. */
if (rctx->ctx.num_so_targets) {
r600_context_streamout_end(&rctx->ctx);
}
/* Set the new targets. */
for (i = 0; i < num_targets; i++) {
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->ctx.so_targets[i], targets[i]);
}
for (; i < rctx->ctx.num_so_targets; i++) {
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->ctx.so_targets[i], NULL);
}
rctx->ctx.num_so_targets = num_targets;
rctx->ctx.streamout_start = num_targets != 0;
rctx->ctx.streamout_append_bitmask = append_bitmask;
}
static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
{
struct r600_pipe_resource_state *rstate;
@ -528,7 +594,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
struct pipe_index_buffer ib = {};
unsigned prim, mask, ls_mask = 0;
if (!info.count ||
if ((!info.count && (info.indexed || !info.count_from_stream_output)) ||
(info.indexed && !rctx->vbuf_mgr->index_buffer.buffer) ||
!r600_conv_pipe_prim(info.mode, &prim)) {
return;
@ -572,8 +638,15 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
} else {
info.index_bias = info.start;
rdraw.vgt_draw_initiator = V_0287F0_DI_SRC_SEL_AUTO_INDEX;
if (info.count_from_stream_output) {
rdraw.vgt_draw_initiator |= S_0287F0_USE_OPAQUE(1);
r600_context_draw_opaque_count(&rctx->ctx, (struct r600_so_target*)info.count_from_stream_output);
}
}
rctx->ctx.vs_shader_so_strides = rctx->vs_shader->so_strides;
mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
if (rctx->vgt.id != R600_PIPE_STATE_VGT) {

View file

@ -76,10 +76,23 @@
#define PKT3_DRAW_INDEX_IMMD 0x2E
#define PKT3_NUM_INSTANCES 0x2F
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
#define STRMOUT_OFFSET_SOURCE(x) (((x) & 0x3) << 1)
#define STRMOUT_OFFSET_FROM_PACKET 0
#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
#define STRMOUT_OFFSET_FROM_MEM 2
#define STRMOUT_OFFSET_NONE 3
#define STRMOUT_SELECT_BUFFER(x) (((x) & 0x3) << 8)
#define PKT3_INDIRECT_BUFFER_MP 0x38
#define PKT3_MEM_SEMAPHORE 0x39
#define PKT3_MPEG_INDEX 0x3A
#define PKT3_COPY_DW 0x3B
#define COPY_DW_SRC_IS_REG (0 << 0)
#define COPY_DW_SRC_IS_MEM (1 << 0)
#define COPY_DW_DST_IS_REG (0 << 1)
#define COPY_DW_DST_IS_MEM (1 << 1)
#define PKT3_WAIT_REG_MEM 0x3C
#define WAIT_REG_MEM_EQUAL 3
#define PKT3_MEM_WRITE 0x3D
#define PKT3_INDIRECT_BUFFER 0x32
#define PKT3_CP_INTERRUPT 0x40
@ -106,6 +119,8 @@
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
#define EVENT_TYPE_ZPASS_DONE 0x15
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
/* 0 - any non-TS event
@ -147,6 +162,12 @@
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
/* Registers */
#define R_008490_CP_STRMOUT_CNTL 0x008490
#define S_008490_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
#define R_008960_VGT_STRMOUT_BUFFER_FILLED_SIZE_0 0x008960 /* read-only */
#define R_008964_VGT_STRMOUT_BUFFER_FILLED_SIZE_1 0x008964 /* read-only */
#define R_008968_VGT_STRMOUT_BUFFER_FILLED_SIZE_2 0x008968 /* read-only */
#define R_00896C_VGT_STRMOUT_BUFFER_FILLED_SIZE_3 0x00896C /* read-only */
#define R_008C00_SQ_CONFIG 0x00008C00
#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0)
#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1)
@ -3144,6 +3165,26 @@
#define S_028AB8_VTX_CNT_EN(x) (((x) & 0x1) << 0)
#define G_028AB8_VTX_CNT_EN(x) (((x) >> 0) & 0x1)
#define C_028AB8_VTX_CNT_EN 0xFFFFFFFE
#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0
#define R_028AD4_VGT_STRMOUT_VTX_STRIDE_0 0x028AD4
#define R_028AD8_VGT_STRMOUT_BUFFER_BASE_0 0x028AD8
#define R_028ADC_VGT_STRMOUT_BUFFER_OFFSET_0 0x028ADC
#define R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1 0x028AE0
#define R_028AE4_VGT_STRMOUT_VTX_STRIDE_1 0x028AE4
#define R_028AE8_VGT_STRMOUT_BUFFER_BASE_1 0x028AE8
#define R_028AEC_VGT_STRMOUT_BUFFER_OFFSET_1 0x028AEC
#define R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2 0x028AF0
#define R_028AF4_VGT_STRMOUT_VTX_STRIDE_2 0x028AF4
#define R_028AF8_VGT_STRMOUT_BUFFER_BASE_2 0x028AF8
#define R_028AFC_VGT_STRMOUT_BUFFER_OFFSET_2 0x028AFC
#define R_028B00_VGT_STRMOUT_BUFFER_SIZE_3 0x028B00
#define R_028B04_VGT_STRMOUT_VTX_STRIDE_3 0x028B04
#define R_028B08_VGT_STRMOUT_BUFFER_BASE_3 0x028B08
#define R_028B0C_VGT_STRMOUT_BUFFER_OFFSET_3 0x028B0C
#define R_028B10_VGT_STRMOUT_BASE_OFFSET_0 0x028B10
#define R_028B14_VGT_STRMOUT_BASE_OFFSET_1 0x028B14
#define R_028B18_VGT_STRMOUT_BASE_OFFSET_2 0x028B18
#define R_028B1C_VGT_STRMOUT_BASE_OFFSET_3 0x028B1C
#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20
#define S_028B20_BUFFER_0_EN(x) (((x) & 0x1) << 0)
#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1)
@ -3157,6 +3198,13 @@
#define S_028B20_BUFFER_3_EN(x) (((x) & 0x1) << 3)
#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1)
#define C_028B20_BUFFER_3_EN 0xFFFFFFF7
#define R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET 0x028B28
#define R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE 0x028B2C
#define R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE 0x028B30
#define R_028B44_VGT_STRMOUT_BASE_OFFSET_HI_0 0x028B44
#define R_028B48_VGT_STRMOUT_BASE_OFFSET_HI_1 0x028B48
#define R_028B4C_VGT_STRMOUT_BASE_OFFSET_HI_2 0x028B4C
#define R_028B50_VGT_STRMOUT_BASE_OFFSET_HI_3 0x028B50
#define R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 0x028C20
#define S_028C20_S4_X(x) (((x) & 0xF) << 0)
#define G_028C20_S4_X(x) (((x) >> 0) & 0xF)
@ -3280,6 +3328,9 @@
#define S_0085F0_CR2_ACTION_ENA(x) (((x) & 0x1) << 31)
#define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1)
#define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF
#define R_0085F4_CP_COHER_SIZE 0x0085F4
#define R_0085F8_CP_COHER_BASE 0x0085F8
#define R_0085FC_CP_COHER_STATUS 0x0085FC
#define R_02812C_CB_CLEAR_ALPHA 0x02812C