radeonsi: atomize the scratch buffer state

The update frequency is very low.

Difference: Only account for the size when allocating a new one and when
            starting a new IB, and check for NULL. (v3)

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2017-01-26 02:56:15 +01:00
parent a41f2527ae
commit 408f9a1584
6 changed files with 32 additions and 29 deletions

View file

@ -283,7 +283,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
PIPE_USAGE_DEFAULT, scratch_size);
if (!sctx->scratch_buffer)
return;
sctx->emit_scratch_reloc = true;
si_mark_atom_dirty(sctx, &sctx->scratch_state);
}
si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,

View file

@ -235,6 +235,12 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
si_mark_atom_dirty(ctx, &ctx->scratch_state);
if (ctx->scratch_buffer) {
r600_context_add_resource_size(&ctx->b.b,
&ctx->scratch_buffer->b.b);
}
r600_postflush_resume_features(&ctx->b);
assert(!ctx->b.gfx.cs->prev_dw);
@ -251,7 +257,6 @@ void si_begin_new_cs(struct si_context *ctx)
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
ctx->last_sc_line_stipple = ~0;
ctx->emit_scratch_reloc = true;
ctx->last_ls = NULL;
ctx->last_tcs = NULL;
ctx->last_tes_sh_base = -1;

View file

@ -352,8 +352,8 @@ struct si_context {
bool gs_tri_strip_adj_fix;
/* Scratch buffer */
struct r600_atom scratch_state;
struct r600_resource *scratch_buffer;
bool emit_scratch_reloc;
unsigned scratch_waves;
unsigned spi_tmpring_size;

View file

@ -152,6 +152,7 @@ union si_state_atoms {
struct r600_atom *viewports;
struct r600_atom *stencil_ref;
struct r600_atom *spi_map;
struct r600_atom *scratch_state;
} s;
struct r600_atom *array[0];
};

View file

@ -459,25 +459,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
return ia_multi_vgt_param;
}
static void si_emit_scratch_reloc(struct si_context *sctx)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
if (!sctx->emit_scratch_reloc)
return;
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
sctx->spi_tmpring_size);
if (sctx->scratch_buffer) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SCRATCH_BUFFER);
}
sctx->emit_scratch_reloc = false;
}
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
@ -1133,8 +1114,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
/* Add buffer sizes for memory checking in need_cs_space. */
if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
if (info->indirect)
r600_context_add_resource_size(ctx, info->indirect);
@ -1174,14 +1153,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
sctx->dirty_states = 0;
si_emit_scratch_reloc(sctx);
si_emit_rasterizer_prim_state(sctx);
si_emit_draw_registers(sctx, info);
si_ce_pre_draw_synchronization(sctx);
si_emit_draw_packets(sctx, info, &ib);
si_ce_post_draw_synchronization(sctx);
if (sctx->trace_buf)

View file

@ -2200,7 +2200,10 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
PIPE_USAGE_DEFAULT, scratch_needed_size);
if (!sctx->scratch_buffer)
return false;
sctx->emit_scratch_reloc = true;
si_mark_atom_dirty(sctx, &sctx->scratch_state);
r600_context_add_resource_size(&sctx->b.b,
&sctx->scratch_buffer->b.b);
}
/* Update the shaders, so they are using the latest scratch. The
@ -2259,7 +2262,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
if (spi_tmpring_size != sctx->spi_tmpring_size) {
sctx->spi_tmpring_size = spi_tmpring_size;
sctx->emit_scratch_reloc = true;
si_mark_atom_dirty(sctx, &sctx->scratch_state);
}
return true;
}
@ -2588,9 +2591,26 @@ bool si_update_shaders(struct si_context *sctx)
return true;
}
static void si_emit_scratch_state(struct si_context *sctx,
struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
sctx->spi_tmpring_size);
if (sctx->scratch_buffer) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SCRATCH_BUFFER);
}
}
void si_init_shader_functions(struct si_context *sctx)
{
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state,
si_emit_scratch_state);
sctx->b.b.create_vs_state = si_create_shader_selector;
sctx->b.b.create_tcs_state = si_create_shader_selector;