radeonsi: always prefetch later shaders after the draw packet

so that the draw is started as soon as possible.

v2: only prefetch the API VS and VBO descriptors

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
This commit is contained in:
Marek Olšák 2018-04-02 21:08:05 -04:00
parent e4b7974ec7
commit 9a1363427e
3 changed files with 75 additions and 26 deletions

View file

@ -520,67 +520,110 @@ static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
sctx->vertex_elements->desc_list_byte_size); sctx->vertex_elements->desc_list_byte_size);
} }
void cik_emit_prefetch_L2(struct si_context *sctx) /**
* Prefetch shaders and VBO descriptors.
*
* \param vertex_stage_only Whether only the the API VS and VBO descriptors
* should be prefetched.
*/
void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only)
{ {
unsigned mask = sctx->prefetch_L2_mask;
assert(mask);
/* Prefetch shaders and VBO descriptors to TC L2. */ /* Prefetch shaders and VBO descriptors to TC L2. */
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
/* Choose the right spot for the VBO prefetch. */ /* Choose the right spot for the VBO prefetch. */
if (sctx->tes_shader.cso) { if (sctx->tes_shader.cso) {
if (sctx->prefetch_L2_mask & SI_PREFETCH_HS) if (mask & SI_PREFETCH_HS)
cik_prefetch_shader_async(sctx, sctx->queued.named.hs); cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx); cik_prefetch_VBO_descriptors(sctx);
if (sctx->prefetch_L2_mask & SI_PREFETCH_GS) if (vertex_stage_only) {
sctx->prefetch_L2_mask &= ~(SI_PREFETCH_HS |
SI_PREFETCH_VBO_DESCRIPTORS);
return;
}
if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs); cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS) if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs); cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else if (sctx->gs_shader.cso) { } else if (sctx->gs_shader.cso) {
if (sctx->prefetch_L2_mask & SI_PREFETCH_GS) if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs); cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx); cik_prefetch_VBO_descriptors(sctx);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS) if (vertex_stage_only) {
sctx->prefetch_L2_mask &= ~(SI_PREFETCH_GS |
SI_PREFETCH_VBO_DESCRIPTORS);
return;
}
if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs); cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else { } else {
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS) if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs); cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx); cik_prefetch_VBO_descriptors(sctx);
if (vertex_stage_only) {
sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
SI_PREFETCH_VBO_DESCRIPTORS);
return;
}
} }
} else { } else {
/* SI-CI-VI */ /* SI-CI-VI */
/* Choose the right spot for the VBO prefetch. */ /* Choose the right spot for the VBO prefetch. */
if (sctx->tes_shader.cso) { if (sctx->tes_shader.cso) {
if (sctx->prefetch_L2_mask & SI_PREFETCH_LS) if (mask & SI_PREFETCH_LS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ls); cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx); cik_prefetch_VBO_descriptors(sctx);
if (sctx->prefetch_L2_mask & SI_PREFETCH_HS) if (vertex_stage_only) {
sctx->prefetch_L2_mask &= ~(SI_PREFETCH_LS |
SI_PREFETCH_VBO_DESCRIPTORS);
return;
}
if (mask & SI_PREFETCH_HS)
cik_prefetch_shader_async(sctx, sctx->queued.named.hs); cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_ES) if (mask & SI_PREFETCH_ES)
cik_prefetch_shader_async(sctx, sctx->queued.named.es); cik_prefetch_shader_async(sctx, sctx->queued.named.es);
if (sctx->prefetch_L2_mask & SI_PREFETCH_GS) if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs); cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS) if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs); cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else if (sctx->gs_shader.cso) { } else if (sctx->gs_shader.cso) {
if (sctx->prefetch_L2_mask & SI_PREFETCH_ES) if (mask & SI_PREFETCH_ES)
cik_prefetch_shader_async(sctx, sctx->queued.named.es); cik_prefetch_shader_async(sctx, sctx->queued.named.es);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx); cik_prefetch_VBO_descriptors(sctx);
if (sctx->prefetch_L2_mask & SI_PREFETCH_GS) if (vertex_stage_only) {
sctx->prefetch_L2_mask &= ~(SI_PREFETCH_ES |
SI_PREFETCH_VBO_DESCRIPTORS);
return;
}
if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs); cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS) if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs); cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else { } else {
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS) if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs); cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx); cik_prefetch_VBO_descriptors(sctx);
if (vertex_stage_only) {
sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
SI_PREFETCH_VBO_DESCRIPTORS);
return;
}
} }
} }
if (sctx->prefetch_L2_mask & SI_PREFETCH_PS) if (mask & SI_PREFETCH_PS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ps); cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
sctx->prefetch_L2_mask = 0; sctx->prefetch_L2_mask = 0;

View file

@ -911,7 +911,7 @@ void si_copy_buffer(struct si_context *sctx,
unsigned user_flags); unsigned user_flags);
void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
uint64_t offset, unsigned size); uint64_t offset, unsigned size);
void cik_emit_prefetch_L2(struct si_context *sctx); void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
void si_init_cp_dma_functions(struct si_context *sctx); void si_init_cp_dma_functions(struct si_context *sctx);
/* si_debug.c */ /* si_debug.c */

View file

@ -1456,7 +1456,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
* in parallel, but starting the draw first is more important. * in parallel, but starting the draw first is more important.
*/ */
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx); cik_emit_prefetch_L2(sctx, false);
} else { } else {
/* If we don't wait for idle, start prefetches first, then set /* If we don't wait for idle, start prefetches first, then set
* states, and draw at the end. * states, and draw at the end.
@ -1464,14 +1464,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
if (sctx->flags) if (sctx->flags)
si_emit_cache_flush(sctx); si_emit_cache_flush(sctx);
/* Only prefetch the API VS and VBO descriptors. */
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx); cik_emit_prefetch_L2(sctx, true);
if (!si_upload_graphics_shader_descriptors(sctx)) if (!si_upload_graphics_shader_descriptors(sctx))
return; return;
si_emit_all_states(sctx, info, 0); si_emit_all_states(sctx, info, 0);
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset); si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
/* Prefetch the remaining shaders after the draw has been
* started. */
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, false);
} }
if (unlikely(sctx->current_saved_cs)) { if (unlikely(sctx->current_saved_cs)) {