diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 40276919c98..ea493836ce8 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -92,7 +92,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h /* If we use s_sendmsg to set tess factors to all 0 or all 1 instead of writing to the tess * factor buffer, we need this at the end of command buffers: */ - if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->tess_rings) { + if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->has_tessellation) { radeon_begin(cs); radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); @@ -447,9 +447,10 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) return; } - if (ctx->tess_rings) { + if (ctx->has_tessellation) { radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, - unlikely(is_secure) ? si_resource(ctx->tess_rings_tmz) : si_resource(ctx->tess_rings), + unlikely(is_secure) ? si_resource(ctx->screen->tess_rings_tmz) + : si_resource(ctx->screen->tess_rings), RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3433385cf54..78be8186fbd 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -223,8 +223,6 @@ static void si_destroy_context(struct pipe_context *context) pipe_resource_reference(&sctx->esgs_ring, NULL); pipe_resource_reference(&sctx->gsvs_ring, NULL); - pipe_resource_reference(&sctx->tess_rings, NULL); - pipe_resource_reference(&sctx->tess_rings_tmz, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); pipe_resource_reference(&sctx->sample_pos_buffer, NULL); si_resource_reference(&sctx->border_color_buffer, NULL); @@ -974,6 +972,8 @@ static void si_destroy_screen(struct pipe_screen *pscreen) } si_resource_reference(&sscreen->attribute_ring, NULL); + pipe_resource_reference(&sscreen->tess_rings, NULL); + pipe_resource_reference(&sscreen->tess_rings_tmz, NULL); util_queue_destroy(&sscreen->shader_compiler_queue); util_queue_destroy(&sscreen->shader_compiler_queue_opt_variants); @@ -1031,6 +1031,7 @@ static void si_destroy_screen(struct pipe_screen *pscreen) simple_mtx_destroy(&sscreen->gpu_load_mutex); simple_mtx_destroy(&sscreen->gds_mutex); + simple_mtx_destroy(&sscreen->tess_ring_lock); radeon_bo_reference(sscreen->ws, &sscreen->gds_oa, NULL); @@ -1277,6 +1278,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, (void)simple_mtx_init(&sscreen->async_compute_context_lock, mtx_plain); (void)simple_mtx_init(&sscreen->gpu_load_mutex, mtx_plain); (void)simple_mtx_init(&sscreen->gds_mutex, mtx_plain); + (void)simple_mtx_init(&sscreen->tess_ring_lock, mtx_plain); si_init_gs_info(sscreen); if (!si_init_shader_cache(sscreen)) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8a94fe97a81..384f6de691e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -712,6 +712,10 @@ struct si_screen { struct si_resource *attribute_ring; + simple_mtx_t tess_ring_lock; + struct pipe_resource *tess_rings; + struct pipe_resource *tess_rings_tmz; + /* NGG streamout. */ simple_mtx_t gds_mutex; struct pb_buffer_lean *gds_oa; @@ -1129,6 +1133,7 @@ struct si_context { bool vs_uses_base_instance; bool vs_uses_draw_id; uint8_t patch_vertices; + bool has_tessellation; /* whether si_screen::tess_rings* are valid */ /* shader descriptors */ struct si_descriptors descriptors[SI_NUM_DESCS]; @@ -1147,8 +1152,6 @@ struct si_context { struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; - struct pipe_resource *tess_rings; - struct pipe_resource *tess_rings_tmz; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ struct si_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 95b6201c216..6660f7c6c11 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -52,9 +52,9 @@ static bool si_update_shaders(struct si_context *sctx) /* Update TCS and TES. */ if (HAS_TESS) { - if (!sctx->tess_rings) { + if (!sctx->has_tessellation) { si_init_tess_factor_ring(sctx); - if (!sctx->tess_rings) + if (!sctx->has_tessellation) return false; } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 4357d3034c6..266e8b2b105 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4241,42 +4241,55 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes) void si_init_tess_factor_ring(struct si_context *sctx) { - assert(!sctx->tess_rings); + struct si_screen *sscreen = sctx->screen; + assert(!sctx->has_tessellation); - /* The address must be aligned to 2^19, because the shader only - * receives the high 13 bits. Align it to 2MB to match the GPU page size. - */ - sctx->tess_rings = pipe_aligned_buffer_create(sctx->b.screen, - PIPE_RESOURCE_FLAG_UNMAPPABLE | - SI_RESOURCE_FLAG_32BIT | - SI_RESOURCE_FLAG_DRIVER_INTERNAL | - SI_RESOURCE_FLAG_DISCARDABLE, - PIPE_USAGE_DEFAULT, - sctx->screen->hs.tess_offchip_ring_size + - sctx->screen->hs.tess_factor_ring_size, - 2 * 1024 * 1024); - if (!sctx->tess_rings) + if (sctx->has_tessellation) return; - if (sctx->screen->info.has_tmz_support) { - sctx->tess_rings_tmz = pipe_aligned_buffer_create(sctx->b.screen, - PIPE_RESOURCE_FLAG_UNMAPPABLE | - PIPE_RESOURCE_FLAG_ENCRYPTED | - SI_RESOURCE_FLAG_32BIT | - SI_RESOURCE_FLAG_DRIVER_INTERNAL | - SI_RESOURCE_FLAG_DISCARDABLE, - PIPE_USAGE_DEFAULT, - sctx->screen->hs.tess_offchip_ring_size + - sctx->screen->hs.tess_factor_ring_size, - 2 * 1024 * 1024); + simple_mtx_lock(&sscreen->tess_ring_lock); + + if (!sscreen->tess_rings) { + /* The address must be aligned to 2^19, because the shader only + * receives the high 13 bits. Align it to 2MB to match the GPU page size. + */ + sscreen->tess_rings = pipe_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | + SI_RESOURCE_FLAG_32BIT | + SI_RESOURCE_FLAG_DRIVER_INTERNAL | + SI_RESOURCE_FLAG_DISCARDABLE, + PIPE_USAGE_DEFAULT, + sscreen->hs.tess_offchip_ring_size + + sscreen->hs.tess_factor_ring_size, + 2 * 1024 * 1024); + if (!sscreen->tess_rings) { + simple_mtx_unlock(&sscreen->tess_ring_lock); + return; + } + + if (sscreen->info.has_tmz_support) { + sscreen->tess_rings_tmz = pipe_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | + PIPE_RESOURCE_FLAG_ENCRYPTED | + SI_RESOURCE_FLAG_32BIT | + SI_RESOURCE_FLAG_DRIVER_INTERNAL | + SI_RESOURCE_FLAG_DISCARDABLE, + PIPE_USAGE_DEFAULT, + sscreen->hs.tess_offchip_ring_size + + sscreen->hs.tess_factor_ring_size, + 2 * 1024 * 1024); + } } - uint64_t factor_va = - si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size; + simple_mtx_unlock(&sscreen->tess_ring_lock); + sctx->has_tessellation = true; - unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4; + uint64_t factor_va = si_resource(sscreen->tess_rings)->gpu_address + + sscreen->hs.tess_offchip_ring_size; + + unsigned tf_ring_size_field = sscreen->hs.tess_factor_ring_size / 4; if (sctx->gfx_level >= GFX11) - tf_ring_size_field /= sctx->screen->info.max_se; + tf_ring_size_field /= sscreen->info.max_se; assert((tf_ring_size_field & C_030938_SIZE) == 0); @@ -4287,7 +4300,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) assert(sctx->gfx_level >= GFX7); - radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sctx->tess_rings), + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sscreen->tess_rings), RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS); si_emit_vgt_flush(cs); @@ -4304,7 +4317,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) S_030944_BASE_HI(factor_va >> 40)); } radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM, - sctx->screen->hs.hs_offchip_param); + sscreen->hs.hs_offchip_param); radeon_end(); return; } @@ -4313,18 +4326,18 @@ void si_init_tess_factor_ring(struct si_context *sctx) /* Add these registers to cs_preamble_state. */ for (unsigned tmz = 0; tmz <= 1; tmz++) { struct si_pm4_state *pm4 = tmz ? sctx->cs_preamble_state_tmz : sctx->cs_preamble_state; - struct pipe_resource *tf_ring = tmz ? sctx->tess_rings_tmz : sctx->tess_rings; + struct pipe_resource *tf_ring = tmz ? sscreen->tess_rings_tmz : sscreen->tess_rings; if (!tf_ring) continue; /* TMZ not supported */ - uint64_t va = si_resource(tf_ring)->gpu_address + sctx->screen->hs.tess_offchip_ring_size; + uint64_t va = si_resource(tf_ring)->gpu_address + sscreen->hs.tess_offchip_ring_size; si_cs_preamble_add_vgt_flush(sctx, tmz); if (sctx->gfx_level >= GFX7) { si_pm4_set_reg(pm4, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size_field)); - si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param); + si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param); si_pm4_set_reg(pm4, R_030940_VGT_TF_MEMORY_BASE, va >> 8); if (sctx->gfx_level >= GFX10) si_pm4_set_reg(pm4, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(va >> 40)); @@ -4333,7 +4346,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) } else { si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size_field)); si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8); - si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param); + si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param); } si_pm4_finalize(pm4); } @@ -4483,7 +4496,7 @@ static void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertic /* Update the io layout now if possible, * otherwise make sure it's done by si_update_shaders. */ - if (sctx->tess_rings) + if (sctx->has_tessellation) si_update_tess_io_layout_state(sctx); else sctx->do_update_shaders = true; @@ -4656,8 +4669,10 @@ void si_update_tess_io_layout_state(struct si_context *sctx) assert(num_patches <= 64); assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0); - uint64_t ring_va = (unlikely(sctx->ws->cs_is_secure(&sctx->gfx_cs)) ? - si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address; + uint64_t ring_va = + sctx->ws->cs_is_secure(&sctx->gfx_cs) ? + si_resource(sctx->screen->tess_rings_tmz)->gpu_address : + si_resource(sctx->screen->tess_rings)->gpu_address; assert((ring_va & u_bit_consecutive(0, 19)) == 0); sctx->tes_offchip_ring_va_sgpr = ring_va;