radeonsi: rename flush flags, split the TC flag into L1 and L2

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
Marek Olšák 2014-12-29 14:02:46 +01:00
parent d217819e78
commit 2bfe9d4538
7 changed files with 109 additions and 91 deletions

View file

@ -206,11 +206,12 @@ static void si_launch_grid(
radeon_emit(cs, 0x80000000);
radeon_emit(cs, 0x80000000);
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_SHADER_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_FLUSH_WITH_INV_L2 |
R600_CONTEXT_FLAG_COMPUTE;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_ICACHE |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_WITH_INV_L2 |
SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(&sctx->b, NULL);
pm4->compute_pkt = true;
@ -401,11 +402,12 @@ static void si_launch_grid(
si_pm4_free_state(sctx, pm4, ~0);
sctx->b.flags |= R600_CONTEXT_CS_PARTIAL_FLUSH |
R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_SHADER_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_FLAG_COMPUTE;
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_ICACHE |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(&sctx->b, NULL);
}

View file

@ -168,7 +168,7 @@ static void si_update_descriptors(struct si_context *sctx,
desc->atom.dirty = true;
/* The descriptors are read with the K cache. */
sctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
} else {
desc->atom.dirty = false;
}
@ -430,7 +430,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
}
}
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2;
si_update_descriptors(sctx, &samplers->views.desc);
}
@ -655,7 +656,8 @@ void si_update_vertex_buffers(struct si_context *sctx)
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2;
}
@ -1080,13 +1082,14 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Flush the caches where the resource is bound. */
/* XXX only flush the caches where the buffer is bound. */
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB_META;
sctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_FLUSH_AND_INV_CB_META |
SI_CONTEXT_FLUSH_AND_INV_DB_META;
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@ -1121,12 +1124,13 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
/* XXX only flush the caches where the buffer is bound. */
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB_META;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_FLUSH_AND_INV_CB_META |
SI_CONTEXT_FLUSH_AND_INV_DB_META;
}
void si_copy_buffer(struct si_context *sctx,
@ -1146,13 +1150,14 @@ void si_copy_buffer(struct si_context *sctx,
src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_PS_PARTIAL_FLUSH;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_FLUSH_AND_INV_CB_META |
SI_CONTEXT_FLUSH_AND_INV_DB_META |
SI_CONTEXT_PS_PARTIAL_FLUSH;
while (size) {
unsigned sync_flags = 0;
@ -1184,12 +1189,13 @@ void si_copy_buffer(struct si_context *sctx,
dst_offset += byte_count;
}
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB_META;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_FLUSH_AND_INV_CB_META |
SI_CONTEXT_FLUSH_AND_INV_DB_META;
}
/* INIT/DEINIT */

View file

@ -90,13 +90,14 @@ void si_context_gfx_flush(void *context, unsigned flags,
r600_preflush_suspend_features(&ctx->b);
ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_INV_TEX_CACHE |
ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_CB_META |
SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_FLUSH_AND_INV_DB_META |
SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
/* this is probably not needed anymore */
R600_CONTEXT_PS_PARTIAL_FLUSH;
SI_CONTEXT_PS_PARTIAL_FLUSH;
si_emit_cache_flush(&ctx->b, NULL);
/* force to keep tiling flags */
@ -132,9 +133,10 @@ void si_context_gfx_flush(void *context, unsigned flags,
void si_begin_new_cs(struct si_context *ctx)
{
/* Flush read caches at the beginning of CS. */
ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_INV_SHADER_CACHE;
ctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_INV_ICACHE;
/* set all valid group as dirty so they get reemited on
* next draw command

View file

@ -48,24 +48,30 @@
#define SI_MAX_DRAW_CS_DWORDS \
(/*derived prim state:*/ 6 + /*draw regs:*/ 16 + /*draw packets:*/ 31)
/* read caches */
#define R600_CONTEXT_INV_TEX_CACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
#define R600_CONTEXT_INV_CONST_CACHE (R600_CONTEXT_PRIVATE_FLAG << 1)
#define R600_CONTEXT_INV_SHADER_CACHE (R600_CONTEXT_PRIVATE_FLAG << 2)
/* read-write caches */
#define R600_CONTEXT_FLUSH_AND_INV (R600_CONTEXT_PRIVATE_FLAG << 3)
#define R600_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4)
#define R600_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5)
#define R600_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
#define R600_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7)
#define R600_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 8)
/* engine synchronization */
#define R600_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
#define R600_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10)
#define R600_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11)
#define R600_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 12)
/* other flags */
#define R600_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13)
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
/* Cache used by scalar memory (SMEM) instructions. They also use TC
* as a second level cache, which isn't flushed by this.
* Other names: constant cache, data cache, DCACHE */
#define SI_CONTEXT_INV_KCACHE (R600_CONTEXT_PRIVATE_FLAG << 1)
/* Caches used by vector memory (VMEM) instructions.
* L1 can optionally be bypassed (GLC=1) and can only be used by shaders.
* L2 is used by shaders and can be used by other blocks (CP, sDMA). */
#define SI_CONTEXT_INV_TC_L1 (R600_CONTEXT_PRIVATE_FLAG << 2)
#define SI_CONTEXT_INV_TC_L2 (R600_CONTEXT_PRIVATE_FLAG << 3)
/* Framebuffer caches. */
#define SI_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4)
#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5)
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7)
/* Engine synchronization. */
#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8)
#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10)
#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 11)
/* Compute only. */
#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 12) /* TODO: merge with TC? */
#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13)
struct si_compute;

View file

@ -1996,12 +1996,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
int i;
if (sctx->framebuffer.state.nr_cbufs) {
sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_CB_META;
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_CB_META;
}
if (sctx->framebuffer.state.zsbuf) {
sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_DB_META;
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_FLUSH_AND_INV_DB_META;
}
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
@ -2753,8 +2753,9 @@ static void si_texture_barrier(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *)ctx;
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_FLUSH_AND_INV_CB;
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB;
}
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)

View file

@ -368,23 +368,24 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
uint32_t cp_coher_cntl = 0;
uint32_t compute =
PKT3_SHADER_TYPE_S(!!(sctx->flags & R600_CONTEXT_FLAG_COMPUTE));
PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
/* XXX SI flushes both ICACHE and KCACHE if either flag is set.
* XXX CIK shouldn't have this issue. Test CIK before separating the flags
* XXX to ensure there is no regression. Also find out if there is another
* XXX way to flush either ICACHE or KCACHE but not both for SI. */
if (sctx->flags & (R600_CONTEXT_INV_SHADER_CACHE |
R600_CONTEXT_INV_CONST_CACHE)) {
if (sctx->flags & (SI_CONTEXT_INV_ICACHE |
SI_CONTEXT_INV_KCACHE)) {
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1) |
S_0085F0_SH_KCACHE_ACTION_ENA(1);
}
if (sctx->flags & (R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_STREAMOUT_FLUSH)) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1);
}
if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB) {
if (sctx->flags & (SI_CONTEXT_INV_TC_L1 | R600_CONTEXT_STREAMOUT_FLUSH))
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
if (sctx->flags & (SI_CONTEXT_INV_TC_L2 | R600_CONTEXT_STREAMOUT_FLUSH))
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB1_DEST_BASE_ENA(1) |
@ -395,7 +396,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
}
if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB) {
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
S_0085F0_DB_DEST_BASE_ENA(1);
}
@ -418,21 +419,21 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
}
}
if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META) {
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB_META) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
}
if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META) {
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB_META) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
if (sctx->flags & R600_CONTEXT_FLUSH_WITH_INV_L2) {
if (sctx->flags & SI_CONTEXT_FLUSH_WITH_INV_L2) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | EVENT_INDEX(7) |
EVENT_WRITE_INV_L2);
}
if (sctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
} else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
@ -441,16 +442,16 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
if (sctx->flags & R600_CONTEXT_CS_PARTIAL_FLUSH) {
if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
}
if (sctx->flags & R600_CONTEXT_VGT_FLUSH) {
if (sctx->flags & SI_CONTEXT_VGT_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
if (sctx->flags & R600_CONTEXT_VGT_STREAMOUT_SYNC) {
if (sctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
}
@ -572,7 +573,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
if (sctx->b.family == CHIP_HAWAII &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
sctx->b.flags |= R600_CONTEXT_VGT_STREAMOUT_SYNC;
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
/* Set the depth buffer as dirty. */

View file

@ -725,7 +725,7 @@ void si_update_shaders(struct si_context *sctx)
if (!sctx->gs_rings)
si_init_gs_rings(sctx);
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
sctx->b.flags |= R600_CONTEXT_VGT_FLUSH;
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,