diff --git a/src/amd/common/ac_shadowed_regs.c b/src/amd/common/ac_shadowed_regs.c index f060d55cb64..834bda34bc5 100644 --- a/src/amd/common/ac_shadowed_regs.c +++ b/src/amd/common/ac_shadowed_regs.c @@ -4288,6 +4288,8 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info, CC1_SHADOW_GFX_SH_REGS(1) | CC1_SHADOW_GLOBAL_UCONFIG(1)); - for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++) - ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address); + if (!info->has_fw_based_shadowing) { + for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++) + ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address); + } } diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index ecdbc5ec71c..e584186851a 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -41,22 +41,43 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) if (sctx->has_graphics && (sctx->screen->info.mid_command_buffer_preemption_enabled || sctx->screen->debug_flags & DBG(SHADOW_REGS))) { - sctx->shadowed_regs = - si_aligned_buffer_create(sctx->b.screen, - PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, - PIPE_USAGE_DEFAULT, - SI_SHADOWED_REG_BUFFER_SIZE, - 4096); - if (!sctx->shadowed_regs) - fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n"); + if (sctx->screen->info.has_fw_based_shadowing) { + sctx->shadowing.registers = + si_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, + sctx->screen->info.fw_based_mcbp.shadow_size, + sctx->screen->info.fw_based_mcbp.shadow_alignment); + sctx->shadowing.csa = + si_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, + sctx->screen->info.fw_based_mcbp.csa_size, + sctx->screen->info.fw_based_mcbp.csa_alignment); + if (!sctx->shadowing.registers || !sctx->shadowing.csa) + fprintf(stderr, "radeonsi: cannot create register shadowing buffer(s)\n"); + else + sctx->ws->cs_set_mcbp_reg_shadowing_va(&sctx->gfx_cs, + sctx->shadowing.registers->gpu_address, + sctx->shadowing.csa->gpu_address); + } else { + sctx->shadowing.registers = + si_aligned_buffer_create(sctx->b.screen, + PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, + SI_SHADOWED_REG_BUFFER_SIZE, + 4096); + if (!sctx->shadowing.registers) + fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n"); + } } - si_init_cs_preamble_state(sctx, sctx->shadowed_regs != NULL); + si_init_cs_preamble_state(sctx, sctx->shadowing.registers != NULL); - if (sctx->shadowed_regs) { + if (sctx->shadowing.registers) { /* We need to clear the shadowed reg buffer. */ - si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowed_regs->b.b, - 0, sctx->shadowed_regs->bo_size, 0, SI_OP_SYNC_AFTER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowing.registers->b.b, + 0, sctx->shadowing.registers->bo_size, 0, SI_OP_SYNC_AFTER, SI_COHERENCY_CP, L2_BYPASS); /* Create the shadowing preamble. */ @@ -72,11 +93,14 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) ac_create_shadowing_ib_preamble(&sctx->screen->info, (pm4_cmd_add_fn)si_pm4_cmd_add, shadowing_preamble, - sctx->shadowed_regs->gpu_address, sctx->screen->dpbb_allowed); + sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed); /* Initialize shadowed registers as follows. */ - radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.registers, RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); + if (sctx->shadowing.csa) + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.csa, + RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); si_pm4_emit(sctx, shadowing_preamble); ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array); si_pm4_emit(sctx, sctx->cs_preamble_state); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 8116d33c2e9..5eb216141ce 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2184,7 +2184,7 @@ static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_de radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); - } else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) { + } else if (sctx->gfx_level == GFX9 && sctx->shadowing.registers) { /* We can't use the COMMON registers with register shadowing. */ radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index e6e94ce64d0..be2d353169f 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -417,9 +417,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer, RADEON_USAGE_READ | RADEON_PRIO_BORDER_COLORS); } - if (ctx->shadowed_regs) { - radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs, + if (ctx->shadowing.registers) { + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.registers, RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); + + if (ctx->shadowing.csa) + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.csa, + RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); } si_add_all_descriptors_to_bo_list(ctx); @@ -484,7 +488,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) if (ctx->screen->use_ngg_culling) si_mark_atom_dirty(ctx, &ctx->atoms.s.ngg_cull_state); - if (first_cs || !ctx->shadowed_regs) { + if (first_cs || !ctx->shadowing.registers) { /* These don't add any buffers, so skip them with shadowing. */ si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs); /* CLEAR_STATE sets zeros. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 08fe4583c8e..ab42506cda4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -350,7 +350,8 @@ static void si_destroy_context(struct pipe_context *context) sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL); si_resource_reference(&sctx->eop_bug_scratch, NULL); si_resource_reference(&sctx->eop_bug_scratch_tmz, NULL); - si_resource_reference(&sctx->shadowed_regs, NULL); + si_resource_reference(&sctx->shadowing.registers, NULL); + si_resource_reference(&sctx->shadowing.csa, NULL); si_destroy_compiler(&sctx->compiler); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 282135bad16..e127e0ac0c9 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -962,7 +962,14 @@ struct si_context { struct u_log_context *log; void *query_result_shader; void *sh_query_result_shader; - struct si_resource *shadowed_regs; + struct { + /* Memory where the shadowed registers will be saved and loaded from. */ + struct si_resource *registers; + /* Context Save Area: scratch area to save other required data. Only + * used if info->has_fw_based_mcbp is true. + */ + struct si_resource *csa; + } shadowing; void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 71196c0a781..8bd203aa498 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1471,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw /* draw packet */ if (index_size) { /* Register shadowing doesn't shadow INDEX_TYPE. */ - if (index_size != sctx->last_index_size || sctx->shadowed_regs || + if (index_size != sctx->last_index_size || sctx->shadowing.registers || (GFX_VERSION == GFX10_3 && disable_instance_packing != sctx->disable_instance_packing)) { unsigned index_type; @@ -1598,7 +1598,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw } } else { /* Register shadowing requires that we always emit PKT3_NUM_INSTANCES. */ - if (sctx->shadowed_regs || + if (sctx->shadowing.registers || sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN || sctx->last_instance_count != instance_count) { radeon_emit(PKT3(PKT3_NUM_INSTANCES, 0, 0)); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 63f23ff092a..ec58fa28c17 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -3693,7 +3693,7 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz) &sctx->cs_preamble_has_vgt_flush; /* We shouldn't get here if registers are shadowed. */ - assert(!sctx->shadowed_regs); + assert(!sctx->shadowing.registers); if (*has_vgt_flush) return; @@ -3810,7 +3810,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx) false, 0, 0, 0); } - if (sctx->shadowed_regs) { + if (sctx->shadowing.registers) { /* These registers will be shadowed, so set them only once. */ struct radeon_cmdbuf *cs = &sctx->gfx_cs; @@ -4080,7 +4080,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) assert((tf_ring_size_field & C_030938_SIZE) == 0); - if (sctx->shadowed_regs) { + if (sctx->shadowing.registers) { /* These registers will be shadowed, so set them only once. */ /* TODO: tmz + shadowed_regs support */ struct radeon_cmdbuf *cs = &sctx->gfx_cs; diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index ad5810872d0..46b9c96d844 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -751,6 +751,12 @@ struct radeon_winsys { * Stable pstate */ bool (*cs_set_pstate)(struct radeon_cmdbuf *cs, enum radeon_ctx_pstate state); + + /** + * Pass the VAs to the buffers where various information is saved by the FW during mcbp. + */ + void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *cs, uint64_t regs_va, + uint64_t csa_va); }; static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 8916002cad4..b7e1b9c02c5 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1490,7 +1490,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) if (acs->ip_type == AMD_IP_GFX) ws->gfx_bo_list_counter += cs->num_real_buffers; - struct drm_amdgpu_cs_chunk chunks[7]; + struct drm_amdgpu_cs_chunk chunks[8]; unsigned num_chunks = 0; /* BO list */ @@ -1565,6 +1565,13 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) num_chunks++; } + if (ws->info.has_fw_based_shadowing) { + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_CP_GFX_SHADOW; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_cp_gfx_shadow) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&acs->mcbp_fw_shadow_chunk; + num_chunks++; + } + /* Fence */ if (has_user_fence) { chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; @@ -1674,6 +1681,9 @@ cleanup: if (r || noop) amdgpu_fence_signalled(cs->fence); + if (unlikely(ws->info.has_fw_based_shadowing && acs->mcbp_fw_shadow_chunk.flags && r == 0)) + acs->mcbp_fw_shadow_chunk.flags = 0; + cs->error_code = r; /* Only decrement num_active_ioctls for those buffers where we incremented it. */ @@ -1855,6 +1865,16 @@ static bool amdgpu_bo_is_referenced(struct radeon_cmdbuf *rcs, return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage); } +static void amdgpu_cs_set_mcbp_reg_shadowing_va(struct radeon_cmdbuf *rcs,uint64_t regs_va, + uint64_t csa_va) +{ + struct amdgpu_cs *cs = amdgpu_cs(rcs); + cs->mcbp_fw_shadow_chunk.shadow_va = regs_va; + cs->mcbp_fw_shadow_chunk.csa_va = csa_va; + cs->mcbp_fw_shadow_chunk.gds_va = 0; + cs->mcbp_fw_shadow_chunk.flags = AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; +} + void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) { ws->base.ctx_create = amdgpu_ctx_create; @@ -1880,4 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file; ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file; ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file; + + if (ws->aws->info.has_fw_based_shadowing) + ws->base.cs_set_mcbp_reg_shadowing_va = amdgpu_cs_set_mcbp_reg_shadowing_va; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 13b8bf73d4f..5038463db40 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -161,6 +161,8 @@ struct amdgpu_cs { struct util_queue_fence flush_completed; struct pipe_fence_handle *next_fence; struct pb_buffer *preamble_ib_bo; + + struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk; }; struct amdgpu_fence {