radeonsi: determine num_vbos_in_user_sgprs from template arguments in draw_vbo

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
Marek Olšák 2021-08-10 19:37:25 -04:00 committed by Marge Bot
parent a65f99b2d1
commit 3ea3621b8d
7 changed files with 31 additions and 16 deletions

View file

@ -98,11 +98,13 @@ void si_blitter_end(struct si_context *sctx)
/* Restore shader pointers because the VS blit shader changed all
* non-global VS user SGPRs. */
sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
sctx->num_vertex_elements >
sctx->screen->num_vbos_in_user_sgprs;
num_vbos_in_user_sgprs;
sctx->vertex_buffer_user_sgprs_dirty = sctx->num_vertex_elements > 0 &&
sctx->screen->num_vbos_in_user_sgprs;
num_vbos_in_user_sgprs;
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
}

View file

@ -1954,11 +1954,13 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shad
u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS);
if (shader == PIPE_SHADER_VERTEX) {
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
sctx->num_vertex_elements >
sctx->screen->num_vbos_in_user_sgprs;
num_vbos_in_user_sgprs;
sctx->vertex_buffer_user_sgprs_dirty =
sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;
sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs;
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
@ -1966,12 +1968,14 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shad
void si_shader_pointers_mark_dirty(struct si_context *sctx)
{
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
sctx->num_vertex_elements >
sctx->screen->num_vbos_in_user_sgprs;
num_vbos_in_user_sgprs;
sctx->vertex_buffer_user_sgprs_dirty =
sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;
sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs;
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;

View file

@ -1155,11 +1155,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3;
/* This decreases CPU overhead if all descriptors are in user SGPRs because we don't
* have to allocate and count references for the upload buffer.
*/
sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1;
/* Determine tessellation ring info. */
bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
sscreen->info.family != CHIP_CARRIZO &&

View file

@ -518,7 +518,6 @@ struct si_screen {
unsigned width, unsigned height, unsigned depth, uint32_t *state,
uint32_t *fmask_state);
unsigned num_vbos_in_user_sgprs;
unsigned max_memory_usage_kb;
unsigned pa_sc_raster_config;
unsigned pa_sc_raster_config_1;
@ -1959,6 +1958,20 @@ static inline unsigned si_get_num_coverage_samples(struct si_context *sctx)
return 1;
}
static unsigned ALWAYS_INLINE
si_num_vbos_in_user_sgprs_inline(enum chip_class chip_class)
{
/* This decreases CPU overhead if all descriptors are in user SGPRs because we don't
* have to allocate and count references for the upload buffer.
*/
return chip_class >= GFX9 ? 5 : 1;
}
static inline unsigned si_num_vbos_in_user_sgprs(struct si_screen *sscreen)
{
return si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
}
#define PRINT_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)

View file

@ -4680,8 +4680,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count,
v->count = count;
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen);
unsigned alloc_count =
count > sscreen->num_vbos_in_user_sgprs ? count - sscreen->num_vbos_in_user_sgprs : 0;
count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0;
v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT);
for (i = 0; i < count; ++i) {

View file

@ -1605,6 +1605,7 @@ template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has
static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
{
unsigned count = sctx->num_vertex_elements;
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs_inline(GFX_VERSION);
bool pointer_dirty, user_sgprs_dirty;
assert(count <= SI_MAX_ATTRIBS);
@ -1641,7 +1642,6 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
}
unsigned first_vb_use_mask = velems->first_vb_use_mask;
unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
for (unsigned i = 0; i < count; i++) {
struct pipe_vertex_buffer *vb;
@ -1706,7 +1706,6 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
if (pointer_dirty || user_sgprs_dirty) {
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
unsigned sh_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
PIPE_SHADER_VERTEX);
assert(count);

View file

@ -2861,7 +2861,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
? sel->info.num_inputs
: 0;
sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, sscreen->num_vbos_in_user_sgprs);
unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, num_vbos_in_sgprs);
/* The prolog is a no-op if there are no inputs. */
sel->vs_needs_prolog = sel->info.stage == MESA_SHADER_VERTEX && sel->info.num_inputs &&