From 65b40d0b7e9edd85aefd3ae17c73ac7f84d0330f Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 17 Mar 2023 14:44:42 +0100 Subject: [PATCH] radeonsi: implement fw based mcbp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some chips support firmware based mcbp. If supported this means radeonsi needs to allocate 3 buffers and pass them to the firmware. From there, the firmware will handle mcbp and register shadowing on its own so we don't need to insert LOAD packet in the preamble. Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_shadowed_regs.c | 6 ++- .../drivers/radeonsi/si_cp_reg_shadowing.c | 52 ++++++++++++++----- src/gallium/drivers/radeonsi/si_descriptors.c | 2 +- src/gallium/drivers/radeonsi/si_gfx_cs.c | 10 ++-- src/gallium/drivers/radeonsi/si_pipe.c | 3 +- src/gallium/drivers/radeonsi/si_pipe.h | 9 +++- .../drivers/radeonsi/si_state_draw.cpp | 4 +- .../drivers/radeonsi/si_state_shaders.cpp | 6 +-- src/gallium/include/winsys/radeon_winsys.h | 6 +++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 25 ++++++++- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 2 + 11 files changed, 97 insertions(+), 28 deletions(-) diff --git a/src/amd/common/ac_shadowed_regs.c b/src/amd/common/ac_shadowed_regs.c index f060d55cb64..834bda34bc5 100644 --- a/src/amd/common/ac_shadowed_regs.c +++ b/src/amd/common/ac_shadowed_regs.c @@ -4288,6 +4288,8 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info, CC1_SHADOW_GFX_SH_REGS(1) | CC1_SHADOW_GLOBAL_UCONFIG(1)); - for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++) - ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address); + if (!info->has_fw_based_shadowing) { + for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++) + ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address); + } } diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index ecdbc5ec71c..e584186851a 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -41,22 +41,43 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) if (sctx->has_graphics && (sctx->screen->info.mid_command_buffer_preemption_enabled || sctx->screen->debug_flags & DBG(SHADOW_REGS))) { - sctx->shadowed_regs = - si_aligned_buffer_create(sctx->b.screen, - PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, - PIPE_USAGE_DEFAULT, - SI_SHADOWED_REG_BUFFER_SIZE, - 4096); - if (!sctx->shadowed_regs) - fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n"); + if (sctx->screen->info.has_fw_based_shadowing) { + sctx->shadowing.registers = + si_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, + sctx->screen->info.fw_based_mcbp.shadow_size, + sctx->screen->info.fw_based_mcbp.shadow_alignment); + sctx->shadowing.csa = + si_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, + sctx->screen->info.fw_based_mcbp.csa_size, + sctx->screen->info.fw_based_mcbp.csa_alignment); + if (!sctx->shadowing.registers || !sctx->shadowing.csa) + fprintf(stderr, "radeonsi: cannot create register shadowing buffer(s)\n"); + else + sctx->ws->cs_set_mcbp_reg_shadowing_va(&sctx->gfx_cs, + sctx->shadowing.registers->gpu_address, + sctx->shadowing.csa->gpu_address); + } else { + sctx->shadowing.registers = + si_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, + SI_SHADOWED_REG_BUFFER_SIZE, + 4096); + if (!sctx->shadowing.registers) + fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n"); + } } - si_init_cs_preamble_state(sctx, sctx->shadowed_regs != NULL); + si_init_cs_preamble_state(sctx, sctx->shadowing.registers != NULL); - if (sctx->shadowed_regs) { + if (sctx->shadowing.registers) { /* We need to clear the shadowed reg buffer. */ - si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowed_regs->b.b, - 0, sctx->shadowed_regs->bo_size, 0, SI_OP_SYNC_AFTER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowing.registers->b.b, + 0, sctx->shadowing.registers->bo_size, 0, SI_OP_SYNC_AFTER, SI_COHERENCY_CP, L2_BYPASS); /* Create the shadowing preamble. */ @@ -72,11 +93,14 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) ac_create_shadowing_ib_preamble(&sctx->screen->info, (pm4_cmd_add_fn)si_pm4_cmd_add, shadowing_preamble, - sctx->shadowed_regs->gpu_address, sctx->screen->dpbb_allowed); + sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed); /* Initialize shadowed registers as follows. */ - radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.registers, RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); + if (sctx->shadowing.csa) + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.csa, + RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); si_pm4_emit(sctx, shadowing_preamble); ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array); si_pm4_emit(sctx, sctx->cs_preamble_state); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 8116d33c2e9..5eb216141ce 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2184,7 +2184,7 @@ static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_de radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); - } else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) { + } else if (sctx->gfx_level == GFX9 && sctx->shadowing.registers) { /* We can't use the COMMON registers with register shadowing. */ radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index e6e94ce64d0..be2d353169f 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -417,9 +417,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer, RADEON_USAGE_READ | RADEON_PRIO_BORDER_COLORS); } - if (ctx->shadowed_regs) { - radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs, + if (ctx->shadowing.registers) { + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.registers, RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); + + if (ctx->shadowing.csa) + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.csa, + RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); } si_add_all_descriptors_to_bo_list(ctx); @@ -484,7 +488,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) if (ctx->screen->use_ngg_culling) si_mark_atom_dirty(ctx, &ctx->atoms.s.ngg_cull_state); - if (first_cs || !ctx->shadowed_regs) { + if (first_cs || !ctx->shadowing.registers) { /* These don't add any buffers, so skip them with shadowing. */ si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs); /* CLEAR_STATE sets zeros. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 08fe4583c8e..ab42506cda4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -350,7 +350,8 @@ static void si_destroy_context(struct pipe_context *context) sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL); si_resource_reference(&sctx->eop_bug_scratch, NULL); si_resource_reference(&sctx->eop_bug_scratch_tmz, NULL); - si_resource_reference(&sctx->shadowed_regs, NULL); + si_resource_reference(&sctx->shadowing.registers, NULL); + si_resource_reference(&sctx->shadowing.csa, NULL); si_destroy_compiler(&sctx->compiler); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 282135bad16..e127e0ac0c9 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -962,7 +962,14 @@ struct si_context { struct u_log_context *log; void *query_result_shader; void *sh_query_result_shader; - struct si_resource *shadowed_regs; + struct { + /* Memory where the shadowed registers will be saved and loaded from. */ + struct si_resource *registers; + /* Context Save Area: scratch area to save other required data. Only + * used if info->has_fw_based_mcbp is true. + */ + struct si_resource *csa; + } shadowing; void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 71196c0a781..8bd203aa498 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1471,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw /* draw packet */ if (index_size) { /* Register shadowing doesn't shadow INDEX_TYPE. */ - if (index_size != sctx->last_index_size || sctx->shadowed_regs || + if (index_size != sctx->last_index_size || sctx->shadowing.registers || (GFX_VERSION == GFX10_3 && disable_instance_packing != sctx->disable_instance_packing)) { unsigned index_type; @@ -1598,7 +1598,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw } } else { /* Register shadowing requires that we always emit PKT3_NUM_INSTANCES. */ - if (sctx->shadowed_regs || + if (sctx->shadowing.registers || sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN || sctx->last_instance_count != instance_count) { radeon_emit(PKT3(PKT3_NUM_INSTANCES, 0, 0)); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 63f23ff092a..ec58fa28c17 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -3693,7 +3693,7 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz) &sctx->cs_preamble_has_vgt_flush; /* We shouldn't get here if registers are shadowed. */ - assert(!sctx->shadowed_regs); + assert(!sctx->shadowing.registers); if (*has_vgt_flush) return; @@ -3810,7 +3810,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx) false, 0, 0, 0); } - if (sctx->shadowed_regs) { + if (sctx->shadowing.registers) { /* These registers will be shadowed, so set them only once. */ struct radeon_cmdbuf *cs = &sctx->gfx_cs; @@ -4080,7 +4080,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) assert((tf_ring_size_field & C_030938_SIZE) == 0); - if (sctx->shadowed_regs) { + if (sctx->shadowing.registers) { /* These registers will be shadowed, so set them only once. */ /* TODO: tmz + shadowed_regs support */ struct radeon_cmdbuf *cs = &sctx->gfx_cs; diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index ad5810872d0..46b9c96d844 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -751,6 +751,12 @@ struct radeon_winsys { * Stable pstate */ bool (*cs_set_pstate)(struct radeon_cmdbuf *cs, enum radeon_ctx_pstate state); + + /** + * Pass the VAs to the buffers where various information is saved by the FW during mcbp. + */ + void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *cs, uint64_t regs_va, + uint64_t csa_va); }; static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 8916002cad4..b7e1b9c02c5 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1490,7 +1490,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) if (acs->ip_type == AMD_IP_GFX) ws->gfx_bo_list_counter += cs->num_real_buffers; - struct drm_amdgpu_cs_chunk chunks[7]; + struct drm_amdgpu_cs_chunk chunks[8]; unsigned num_chunks = 0; /* BO list */ @@ -1565,6 +1565,13 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) num_chunks++; } + if (ws->info.has_fw_based_shadowing) { + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_CP_GFX_SHADOW; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_cp_gfx_shadow) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&acs->mcbp_fw_shadow_chunk; + num_chunks++; + } + /* Fence */ if (has_user_fence) { chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; @@ -1674,6 +1681,9 @@ cleanup: if (r || noop) amdgpu_fence_signalled(cs->fence); + if (unlikely(ws->info.has_fw_based_shadowing && acs->mcbp_fw_shadow_chunk.flags && r == 0)) + acs->mcbp_fw_shadow_chunk.flags = 0; + cs->error_code = r; /* Only decrement num_active_ioctls for those buffers where we incremented it. */ @@ -1855,6 +1865,16 @@ static bool amdgpu_bo_is_referenced(struct radeon_cmdbuf *rcs, return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage); } +static void amdgpu_cs_set_mcbp_reg_shadowing_va(struct radeon_cmdbuf *rcs,uint64_t regs_va, + uint64_t csa_va) +{ + struct amdgpu_cs *cs = amdgpu_cs(rcs); + cs->mcbp_fw_shadow_chunk.shadow_va = regs_va; + cs->mcbp_fw_shadow_chunk.csa_va = csa_va; + cs->mcbp_fw_shadow_chunk.gds_va = 0; + cs->mcbp_fw_shadow_chunk.flags = AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; +} + void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) { ws->base.ctx_create = amdgpu_ctx_create; @@ -1880,4 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file; ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file; ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file; + + if (ws->aws->info.has_fw_based_shadowing) + ws->base.cs_set_mcbp_reg_shadowing_va = amdgpu_cs_set_mcbp_reg_shadowing_va; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 13b8bf73d4f..5038463db40 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -161,6 +161,8 @@ struct amdgpu_cs { struct util_queue_fence flush_completed; struct pipe_fence_handle *next_fence; struct pb_buffer *preamble_ib_bo; + + struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk; }; struct amdgpu_fence {