radeonsi: allocate only one set of tessellation rings per device

Move them to si_screen. The "has_tessellation" context flag indicates that
the screen has valid tess rings, so that we don't have to lock the mutex
to check whether the rings are valid.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27943>
This commit is contained in:
Marek Olšák 2024-02-27 17:07:30 -05:00 committed by Marge Bot
parent ea94cb95e4
commit 9e08569d6a
5 changed files with 68 additions and 47 deletions

View file

@ -92,7 +92,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
/* If we use s_sendmsg to set tess factors to all 0 or all 1 instead of writing to the tess
* factor buffer, we need this at the end of command buffers:
*/
if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->tess_rings) {
if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->has_tessellation) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
@ -447,9 +447,10 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
return;
}
if (ctx->tess_rings) {
if (ctx->has_tessellation) {
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs,
unlikely(is_secure) ? si_resource(ctx->tess_rings_tmz) : si_resource(ctx->tess_rings),
unlikely(is_secure) ? si_resource(ctx->screen->tess_rings_tmz)
: si_resource(ctx->screen->tess_rings),
RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS);
}

View file

@ -223,8 +223,6 @@ static void si_destroy_context(struct pipe_context *context)
pipe_resource_reference(&sctx->esgs_ring, NULL);
pipe_resource_reference(&sctx->gsvs_ring, NULL);
pipe_resource_reference(&sctx->tess_rings, NULL);
pipe_resource_reference(&sctx->tess_rings_tmz, NULL);
pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
pipe_resource_reference(&sctx->sample_pos_buffer, NULL);
si_resource_reference(&sctx->border_color_buffer, NULL);
@ -974,6 +972,8 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
}
si_resource_reference(&sscreen->attribute_ring, NULL);
pipe_resource_reference(&sscreen->tess_rings, NULL);
pipe_resource_reference(&sscreen->tess_rings_tmz, NULL);
util_queue_destroy(&sscreen->shader_compiler_queue);
util_queue_destroy(&sscreen->shader_compiler_queue_opt_variants);
@ -1031,6 +1031,7 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
simple_mtx_destroy(&sscreen->gpu_load_mutex);
simple_mtx_destroy(&sscreen->gds_mutex);
simple_mtx_destroy(&sscreen->tess_ring_lock);
radeon_bo_reference(sscreen->ws, &sscreen->gds_oa, NULL);
@ -1277,6 +1278,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
(void)simple_mtx_init(&sscreen->async_compute_context_lock, mtx_plain);
(void)simple_mtx_init(&sscreen->gpu_load_mutex, mtx_plain);
(void)simple_mtx_init(&sscreen->gds_mutex, mtx_plain);
(void)simple_mtx_init(&sscreen->tess_ring_lock, mtx_plain);
si_init_gs_info(sscreen);
if (!si_init_shader_cache(sscreen)) {

View file

@ -712,6 +712,10 @@ struct si_screen {
struct si_resource *attribute_ring;
simple_mtx_t tess_ring_lock;
struct pipe_resource *tess_rings;
struct pipe_resource *tess_rings_tmz;
/* NGG streamout. */
simple_mtx_t gds_mutex;
struct pb_buffer_lean *gds_oa;
@ -1129,6 +1133,7 @@ struct si_context {
bool vs_uses_base_instance;
bool vs_uses_draw_id;
uint8_t patch_vertices;
bool has_tessellation; /* whether si_screen::tess_rings* are valid */
/* shader descriptors */
struct si_descriptors descriptors[SI_NUM_DESCS];
@ -1147,8 +1152,6 @@ struct si_context {
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */
struct pipe_resource *esgs_ring;
struct pipe_resource *gsvs_ring;
struct pipe_resource *tess_rings;
struct pipe_resource *tess_rings_tmz;
union pipe_color_union *border_color_table; /* in CPU memory, any endian */
struct si_resource *border_color_buffer;
union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */

View file

@ -52,9 +52,9 @@ static bool si_update_shaders(struct si_context *sctx)
/* Update TCS and TES. */
if (HAS_TESS) {
if (!sctx->tess_rings) {
if (!sctx->has_tessellation) {
si_init_tess_factor_ring(sctx);
if (!sctx->tess_rings)
if (!sctx->has_tessellation)
return false;
}

View file

@ -4241,42 +4241,55 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
void si_init_tess_factor_ring(struct si_context *sctx)
{
assert(!sctx->tess_rings);
struct si_screen *sscreen = sctx->screen;
assert(!sctx->has_tessellation);
/* The address must be aligned to 2^19, because the shader only
* receives the high 13 bits. Align it to 2MB to match the GPU page size.
*/
sctx->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sctx->screen->hs.tess_offchip_ring_size +
sctx->screen->hs.tess_factor_ring_size,
2 * 1024 * 1024);
if (!sctx->tess_rings)
if (sctx->has_tessellation)
return;
if (sctx->screen->info.has_tmz_support) {
sctx->tess_rings_tmz = pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
PIPE_RESOURCE_FLAG_ENCRYPTED |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sctx->screen->hs.tess_offchip_ring_size +
sctx->screen->hs.tess_factor_ring_size,
2 * 1024 * 1024);
simple_mtx_lock(&sscreen->tess_ring_lock);
if (!sscreen->tess_rings) {
/* The address must be aligned to 2^19, because the shader only
* receives the high 13 bits. Align it to 2MB to match the GPU page size.
*/
sscreen->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sscreen->hs.tess_offchip_ring_size +
sscreen->hs.tess_factor_ring_size,
2 * 1024 * 1024);
if (!sscreen->tess_rings) {
simple_mtx_unlock(&sscreen->tess_ring_lock);
return;
}
if (sscreen->info.has_tmz_support) {
sscreen->tess_rings_tmz = pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
PIPE_RESOURCE_FLAG_ENCRYPTED |
SI_RESOURCE_FLAG_32BIT |
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sscreen->hs.tess_offchip_ring_size +
sscreen->hs.tess_factor_ring_size,
2 * 1024 * 1024);
}
}
uint64_t factor_va =
si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
simple_mtx_unlock(&sscreen->tess_ring_lock);
sctx->has_tessellation = true;
unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4;
uint64_t factor_va = si_resource(sscreen->tess_rings)->gpu_address +
sscreen->hs.tess_offchip_ring_size;
unsigned tf_ring_size_field = sscreen->hs.tess_factor_ring_size / 4;
if (sctx->gfx_level >= GFX11)
tf_ring_size_field /= sctx->screen->info.max_se;
tf_ring_size_field /= sscreen->info.max_se;
assert((tf_ring_size_field & C_030938_SIZE) == 0);
@ -4287,7 +4300,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
assert(sctx->gfx_level >= GFX7);
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sctx->tess_rings),
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sscreen->tess_rings),
RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS);
si_emit_vgt_flush(cs);
@ -4304,7 +4317,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
S_030944_BASE_HI(factor_va >> 40));
}
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM,
sctx->screen->hs.hs_offchip_param);
sscreen->hs.hs_offchip_param);
radeon_end();
return;
}
@ -4313,18 +4326,18 @@ void si_init_tess_factor_ring(struct si_context *sctx)
/* Add these registers to cs_preamble_state. */
for (unsigned tmz = 0; tmz <= 1; tmz++) {
struct si_pm4_state *pm4 = tmz ? sctx->cs_preamble_state_tmz : sctx->cs_preamble_state;
struct pipe_resource *tf_ring = tmz ? sctx->tess_rings_tmz : sctx->tess_rings;
struct pipe_resource *tf_ring = tmz ? sscreen->tess_rings_tmz : sscreen->tess_rings;
if (!tf_ring)
continue; /* TMZ not supported */
uint64_t va = si_resource(tf_ring)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
uint64_t va = si_resource(tf_ring)->gpu_address + sscreen->hs.tess_offchip_ring_size;
si_cs_preamble_add_vgt_flush(sctx, tmz);
if (sctx->gfx_level >= GFX7) {
si_pm4_set_reg(pm4, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size_field));
si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param);
si_pm4_set_reg(pm4, R_030940_VGT_TF_MEMORY_BASE, va >> 8);
if (sctx->gfx_level >= GFX10)
si_pm4_set_reg(pm4, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(va >> 40));
@ -4333,7 +4346,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
} else {
si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size_field));
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param);
}
si_pm4_finalize(pm4);
}
@ -4483,7 +4496,7 @@ static void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertic
/* Update the io layout now if possible,
* otherwise make sure it's done by si_update_shaders.
*/
if (sctx->tess_rings)
if (sctx->has_tessellation)
si_update_tess_io_layout_state(sctx);
else
sctx->do_update_shaders = true;
@ -4656,8 +4669,10 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
assert(num_patches <= 64);
assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0);
uint64_t ring_va = (unlikely(sctx->ws->cs_is_secure(&sctx->gfx_cs)) ?
si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address;
uint64_t ring_va =
sctx->ws->cs_is_secure(&sctx->gfx_cs) ?
si_resource(sctx->screen->tess_rings_tmz)->gpu_address :
si_resource(sctx->screen->tess_rings)->gpu_address;
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
sctx->tes_offchip_ring_va_sgpr = ring_va;