mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radeonsi: determine num_vbos_in_user_sgprs from template arguments in draw_vbo
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
parent
a65f99b2d1
commit
3ea3621b8d
7 changed files with 31 additions and 16 deletions
|
|
@ -98,11 +98,13 @@ void si_blitter_end(struct si_context *sctx)
|
|||
/* Restore shader pointers because the VS blit shader changed all
|
||||
* non-global VS user SGPRs. */
|
||||
sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
|
||||
|
||||
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
|
||||
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
|
||||
sctx->num_vertex_elements >
|
||||
sctx->screen->num_vbos_in_user_sgprs;
|
||||
num_vbos_in_user_sgprs;
|
||||
sctx->vertex_buffer_user_sgprs_dirty = sctx->num_vertex_elements > 0 &&
|
||||
sctx->screen->num_vbos_in_user_sgprs;
|
||||
num_vbos_in_user_sgprs;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1954,11 +1954,13 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shad
|
|||
u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS);
|
||||
|
||||
if (shader == PIPE_SHADER_VERTEX) {
|
||||
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
|
||||
|
||||
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
|
||||
sctx->num_vertex_elements >
|
||||
sctx->screen->num_vbos_in_user_sgprs;
|
||||
num_vbos_in_user_sgprs;
|
||||
sctx->vertex_buffer_user_sgprs_dirty =
|
||||
sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;
|
||||
sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs;
|
||||
}
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
|
||||
|
|
@ -1966,12 +1968,14 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shad
|
|||
|
||||
void si_shader_pointers_mark_dirty(struct si_context *sctx)
|
||||
{
|
||||
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
|
||||
|
||||
sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
|
||||
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
|
||||
sctx->num_vertex_elements >
|
||||
sctx->screen->num_vbos_in_user_sgprs;
|
||||
num_vbos_in_user_sgprs;
|
||||
sctx->vertex_buffer_user_sgprs_dirty =
|
||||
sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;
|
||||
sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
|
||||
sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
|
||||
sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
|
||||
|
|
|
|||
|
|
@ -1155,11 +1155,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
|
||||
sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3;
|
||||
|
||||
/* This decreases CPU overhead if all descriptors are in user SGPRs because we don't
|
||||
* have to allocate and count references for the upload buffer.
|
||||
*/
|
||||
sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1;
|
||||
|
||||
/* Determine tessellation ring info. */
|
||||
bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
|
||||
sscreen->info.family != CHIP_CARRIZO &&
|
||||
|
|
|
|||
|
|
@ -518,7 +518,6 @@ struct si_screen {
|
|||
unsigned width, unsigned height, unsigned depth, uint32_t *state,
|
||||
uint32_t *fmask_state);
|
||||
|
||||
unsigned num_vbos_in_user_sgprs;
|
||||
unsigned max_memory_usage_kb;
|
||||
unsigned pa_sc_raster_config;
|
||||
unsigned pa_sc_raster_config_1;
|
||||
|
|
@ -1959,6 +1958,20 @@ static inline unsigned si_get_num_coverage_samples(struct si_context *sctx)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static unsigned ALWAYS_INLINE
|
||||
si_num_vbos_in_user_sgprs_inline(enum chip_class chip_class)
|
||||
{
|
||||
/* This decreases CPU overhead if all descriptors are in user SGPRs because we don't
|
||||
* have to allocate and count references for the upload buffer.
|
||||
*/
|
||||
return chip_class >= GFX9 ? 5 : 1;
|
||||
}
|
||||
|
||||
static inline unsigned si_num_vbos_in_user_sgprs(struct si_screen *sscreen)
|
||||
{
|
||||
return si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
|
||||
}
|
||||
|
||||
#define PRINT_ERR(fmt, args...) \
|
||||
fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
|
||||
|
||||
|
|
|
|||
|
|
@ -4680,8 +4680,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count,
|
|||
|
||||
v->count = count;
|
||||
|
||||
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen);
|
||||
unsigned alloc_count =
|
||||
count > sscreen->num_vbos_in_user_sgprs ? count - sscreen->num_vbos_in_user_sgprs : 0;
|
||||
count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0;
|
||||
v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT);
|
||||
|
||||
for (i = 0; i < count; ++i) {
|
||||
|
|
|
|||
|
|
@ -1605,6 +1605,7 @@ template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has
|
|||
static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
|
||||
{
|
||||
unsigned count = sctx->num_vertex_elements;
|
||||
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs_inline(GFX_VERSION);
|
||||
bool pointer_dirty, user_sgprs_dirty;
|
||||
|
||||
assert(count <= SI_MAX_ATTRIBS);
|
||||
|
|
@ -1641,7 +1642,6 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
|
|||
}
|
||||
|
||||
unsigned first_vb_use_mask = velems->first_vb_use_mask;
|
||||
unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
struct pipe_vertex_buffer *vb;
|
||||
|
|
@ -1706,7 +1706,6 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
|
|||
|
||||
if (pointer_dirty || user_sgprs_dirty) {
|
||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
|
||||
unsigned sh_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
|
||||
PIPE_SHADER_VERTEX);
|
||||
assert(count);
|
||||
|
|
|
|||
|
|
@ -2861,7 +2861,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
|
||||
? sel->info.num_inputs
|
||||
: 0;
|
||||
sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, sscreen->num_vbos_in_user_sgprs);
|
||||
unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
|
||||
sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, num_vbos_in_sgprs);
|
||||
|
||||
/* The prolog is a no-op if there are no inputs. */
|
||||
sel->vs_needs_prolog = sel->info.stage == MESA_SHADER_VERTEX && sel->info.num_inputs &&
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue