mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 06:20:09 +01:00
radeonsi: allocate only one set of tessellation rings per device
Move them to si_screen. The "has_tessellation" context flag indicates that the screen has valid tess rings, so that we don't have to lock the mutex to check whether the rings are valid. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27943>
This commit is contained in:
parent
ea94cb95e4
commit
9e08569d6a
5 changed files with 68 additions and 47 deletions
|
|
@ -92,7 +92,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
|
|||
/* If we use s_sendmsg to set tess factors to all 0 or all 1 instead of writing to the tess
|
||||
* factor buffer, we need this at the end of command buffers:
|
||||
*/
|
||||
if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->tess_rings) {
|
||||
if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->has_tessellation) {
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
|
||||
|
|
@ -447,9 +447,10 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
return;
|
||||
}
|
||||
|
||||
if (ctx->tess_rings) {
|
||||
if (ctx->has_tessellation) {
|
||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs,
|
||||
unlikely(is_secure) ? si_resource(ctx->tess_rings_tmz) : si_resource(ctx->tess_rings),
|
||||
unlikely(is_secure) ? si_resource(ctx->screen->tess_rings_tmz)
|
||||
: si_resource(ctx->screen->tess_rings),
|
||||
RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -223,8 +223,6 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
|
||||
pipe_resource_reference(&sctx->esgs_ring, NULL);
|
||||
pipe_resource_reference(&sctx->gsvs_ring, NULL);
|
||||
pipe_resource_reference(&sctx->tess_rings, NULL);
|
||||
pipe_resource_reference(&sctx->tess_rings_tmz, NULL);
|
||||
pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
|
||||
pipe_resource_reference(&sctx->sample_pos_buffer, NULL);
|
||||
si_resource_reference(&sctx->border_color_buffer, NULL);
|
||||
|
|
@ -974,6 +972,8 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
|
|||
}
|
||||
|
||||
si_resource_reference(&sscreen->attribute_ring, NULL);
|
||||
pipe_resource_reference(&sscreen->tess_rings, NULL);
|
||||
pipe_resource_reference(&sscreen->tess_rings_tmz, NULL);
|
||||
|
||||
util_queue_destroy(&sscreen->shader_compiler_queue);
|
||||
util_queue_destroy(&sscreen->shader_compiler_queue_opt_variants);
|
||||
|
|
@ -1031,6 +1031,7 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
|
|||
|
||||
simple_mtx_destroy(&sscreen->gpu_load_mutex);
|
||||
simple_mtx_destroy(&sscreen->gds_mutex);
|
||||
simple_mtx_destroy(&sscreen->tess_ring_lock);
|
||||
|
||||
radeon_bo_reference(sscreen->ws, &sscreen->gds_oa, NULL);
|
||||
|
||||
|
|
@ -1277,6 +1278,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
(void)simple_mtx_init(&sscreen->async_compute_context_lock, mtx_plain);
|
||||
(void)simple_mtx_init(&sscreen->gpu_load_mutex, mtx_plain);
|
||||
(void)simple_mtx_init(&sscreen->gds_mutex, mtx_plain);
|
||||
(void)simple_mtx_init(&sscreen->tess_ring_lock, mtx_plain);
|
||||
|
||||
si_init_gs_info(sscreen);
|
||||
if (!si_init_shader_cache(sscreen)) {
|
||||
|
|
|
|||
|
|
@ -712,6 +712,10 @@ struct si_screen {
|
|||
|
||||
struct si_resource *attribute_ring;
|
||||
|
||||
simple_mtx_t tess_ring_lock;
|
||||
struct pipe_resource *tess_rings;
|
||||
struct pipe_resource *tess_rings_tmz;
|
||||
|
||||
/* NGG streamout. */
|
||||
simple_mtx_t gds_mutex;
|
||||
struct pb_buffer_lean *gds_oa;
|
||||
|
|
@ -1129,6 +1133,7 @@ struct si_context {
|
|||
bool vs_uses_base_instance;
|
||||
bool vs_uses_draw_id;
|
||||
uint8_t patch_vertices;
|
||||
bool has_tessellation; /* whether si_screen::tess_rings* are valid */
|
||||
|
||||
/* shader descriptors */
|
||||
struct si_descriptors descriptors[SI_NUM_DESCS];
|
||||
|
|
@ -1147,8 +1152,6 @@ struct si_context {
|
|||
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */
|
||||
struct pipe_resource *esgs_ring;
|
||||
struct pipe_resource *gsvs_ring;
|
||||
struct pipe_resource *tess_rings;
|
||||
struct pipe_resource *tess_rings_tmz;
|
||||
union pipe_color_union *border_color_table; /* in CPU memory, any endian */
|
||||
struct si_resource *border_color_buffer;
|
||||
union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
|
||||
/* Update TCS and TES. */
|
||||
if (HAS_TESS) {
|
||||
if (!sctx->tess_rings) {
|
||||
if (!sctx->has_tessellation) {
|
||||
si_init_tess_factor_ring(sctx);
|
||||
if (!sctx->tess_rings)
|
||||
if (!sctx->has_tessellation)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4241,42 +4241,55 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
|
|||
|
||||
void si_init_tess_factor_ring(struct si_context *sctx)
|
||||
{
|
||||
assert(!sctx->tess_rings);
|
||||
struct si_screen *sscreen = sctx->screen;
|
||||
assert(!sctx->has_tessellation);
|
||||
|
||||
/* The address must be aligned to 2^19, because the shader only
|
||||
* receives the high 13 bits. Align it to 2MB to match the GPU page size.
|
||||
*/
|
||||
sctx->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
|
||||
PIPE_RESOURCE_FLAG_UNMAPPABLE |
|
||||
SI_RESOURCE_FLAG_32BIT |
|
||||
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
|
||||
SI_RESOURCE_FLAG_DISCARDABLE,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
sctx->screen->hs.tess_offchip_ring_size +
|
||||
sctx->screen->hs.tess_factor_ring_size,
|
||||
2 * 1024 * 1024);
|
||||
if (!sctx->tess_rings)
|
||||
if (sctx->has_tessellation)
|
||||
return;
|
||||
|
||||
if (sctx->screen->info.has_tmz_support) {
|
||||
sctx->tess_rings_tmz = pipe_aligned_buffer_create(sctx->b.screen,
|
||||
PIPE_RESOURCE_FLAG_UNMAPPABLE |
|
||||
PIPE_RESOURCE_FLAG_ENCRYPTED |
|
||||
SI_RESOURCE_FLAG_32BIT |
|
||||
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
|
||||
SI_RESOURCE_FLAG_DISCARDABLE,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
sctx->screen->hs.tess_offchip_ring_size +
|
||||
sctx->screen->hs.tess_factor_ring_size,
|
||||
2 * 1024 * 1024);
|
||||
simple_mtx_lock(&sscreen->tess_ring_lock);
|
||||
|
||||
if (!sscreen->tess_rings) {
|
||||
/* The address must be aligned to 2^19, because the shader only
|
||||
* receives the high 13 bits. Align it to 2MB to match the GPU page size.
|
||||
*/
|
||||
sscreen->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
|
||||
PIPE_RESOURCE_FLAG_UNMAPPABLE |
|
||||
SI_RESOURCE_FLAG_32BIT |
|
||||
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
|
||||
SI_RESOURCE_FLAG_DISCARDABLE,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
sscreen->hs.tess_offchip_ring_size +
|
||||
sscreen->hs.tess_factor_ring_size,
|
||||
2 * 1024 * 1024);
|
||||
if (!sscreen->tess_rings) {
|
||||
simple_mtx_unlock(&sscreen->tess_ring_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sscreen->info.has_tmz_support) {
|
||||
sscreen->tess_rings_tmz = pipe_aligned_buffer_create(sctx->b.screen,
|
||||
PIPE_RESOURCE_FLAG_UNMAPPABLE |
|
||||
PIPE_RESOURCE_FLAG_ENCRYPTED |
|
||||
SI_RESOURCE_FLAG_32BIT |
|
||||
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
|
||||
SI_RESOURCE_FLAG_DISCARDABLE,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
sscreen->hs.tess_offchip_ring_size +
|
||||
sscreen->hs.tess_factor_ring_size,
|
||||
2 * 1024 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t factor_va =
|
||||
si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
|
||||
simple_mtx_unlock(&sscreen->tess_ring_lock);
|
||||
sctx->has_tessellation = true;
|
||||
|
||||
unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4;
|
||||
uint64_t factor_va = si_resource(sscreen->tess_rings)->gpu_address +
|
||||
sscreen->hs.tess_offchip_ring_size;
|
||||
|
||||
unsigned tf_ring_size_field = sscreen->hs.tess_factor_ring_size / 4;
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
tf_ring_size_field /= sctx->screen->info.max_se;
|
||||
tf_ring_size_field /= sscreen->info.max_se;
|
||||
|
||||
assert((tf_ring_size_field & C_030938_SIZE) == 0);
|
||||
|
||||
|
|
@ -4287,7 +4300,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
|
||||
assert(sctx->gfx_level >= GFX7);
|
||||
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sctx->tess_rings),
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sscreen->tess_rings),
|
||||
RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS);
|
||||
si_emit_vgt_flush(cs);
|
||||
|
||||
|
|
@ -4304,7 +4317,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
S_030944_BASE_HI(factor_va >> 40));
|
||||
}
|
||||
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM,
|
||||
sctx->screen->hs.hs_offchip_param);
|
||||
sscreen->hs.hs_offchip_param);
|
||||
radeon_end();
|
||||
return;
|
||||
}
|
||||
|
|
@ -4313,18 +4326,18 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
/* Add these registers to cs_preamble_state. */
|
||||
for (unsigned tmz = 0; tmz <= 1; tmz++) {
|
||||
struct si_pm4_state *pm4 = tmz ? sctx->cs_preamble_state_tmz : sctx->cs_preamble_state;
|
||||
struct pipe_resource *tf_ring = tmz ? sctx->tess_rings_tmz : sctx->tess_rings;
|
||||
struct pipe_resource *tf_ring = tmz ? sscreen->tess_rings_tmz : sscreen->tess_rings;
|
||||
|
||||
if (!tf_ring)
|
||||
continue; /* TMZ not supported */
|
||||
|
||||
uint64_t va = si_resource(tf_ring)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
|
||||
uint64_t va = si_resource(tf_ring)->gpu_address + sscreen->hs.tess_offchip_ring_size;
|
||||
|
||||
si_cs_preamble_add_vgt_flush(sctx, tmz);
|
||||
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
si_pm4_set_reg(pm4, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size_field));
|
||||
si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
|
||||
si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param);
|
||||
si_pm4_set_reg(pm4, R_030940_VGT_TF_MEMORY_BASE, va >> 8);
|
||||
if (sctx->gfx_level >= GFX10)
|
||||
si_pm4_set_reg(pm4, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(va >> 40));
|
||||
|
|
@ -4333,7 +4346,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
} else {
|
||||
si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size_field));
|
||||
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
|
||||
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
|
||||
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param);
|
||||
}
|
||||
si_pm4_finalize(pm4);
|
||||
}
|
||||
|
|
@ -4483,7 +4496,7 @@ static void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertic
|
|||
/* Update the io layout now if possible,
|
||||
* otherwise make sure it's done by si_update_shaders.
|
||||
*/
|
||||
if (sctx->tess_rings)
|
||||
if (sctx->has_tessellation)
|
||||
si_update_tess_io_layout_state(sctx);
|
||||
else
|
||||
sctx->do_update_shaders = true;
|
||||
|
|
@ -4656,8 +4669,10 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
|
|||
assert(num_patches <= 64);
|
||||
assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0);
|
||||
|
||||
uint64_t ring_va = (unlikely(sctx->ws->cs_is_secure(&sctx->gfx_cs)) ?
|
||||
si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address;
|
||||
uint64_t ring_va =
|
||||
sctx->ws->cs_is_secure(&sctx->gfx_cs) ?
|
||||
si_resource(sctx->screen->tess_rings_tmz)->gpu_address :
|
||||
si_resource(sctx->screen->tess_rings)->gpu_address;
|
||||
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
|
||||
|
||||
sctx->tes_offchip_ring_va_sgpr = ring_va;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue