From 14b1ed1ce105d42652f70e2fd13c90fc4f2e7ffc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 10 May 2022 11:47:33 +1000 Subject: [PATCH] radeonsi: port tess ring calcs to the common helper. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This uses the common helper code to implement the tess ring sizing. One question is if radeonsi should be using tess_offchip_ring_offset in some places it's using tess_factor_ring_size? Reviewed-by: Marek Olšák Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/gallium/drivers/radeonsi/si_pipe.c | 55 +------------------ src/gallium/drivers/radeonsi/si_pipe.h | 5 +- .../drivers/radeonsi/si_shader_llvm_tess.c | 2 +- .../drivers/radeonsi/si_state_draw.cpp | 2 +- .../drivers/radeonsi/si_state_shaders.cpp | 18 +++--- 5 files changed, 13 insertions(+), 69 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3c94693af53..0c46ef6b2e0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1215,60 +1215,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3; - /* Determine tessellation ring info. */ - bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 && - sscreen->info.family != CHIP_CARRIZO && - sscreen->info.family != CHIP_STONEY; - /* This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs need this. - */ - unsigned max_offchip_buffers_per_se; - - if (sscreen->info.chip_class >= GFX11) - max_offchip_buffers_per_se = 256; /* TODO: we could decrease this to reduce memory/cache usage */ - else if (sscreen->info.chip_class >= GFX10) - max_offchip_buffers_per_se = 128; - /* Only certain chips can use the maximum value. */ - else if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20) - max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; - else - max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; - - unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se; - unsigned offchip_granularity; - - /* Hawaii has a bug with offchip buffers > 256 that can be worked - * around by setting 4K granularity. - */ - if (sscreen->info.family == CHIP_HAWAII) { - sscreen->tess_offchip_block_dw_size = 4096; - offchip_granularity = V_03093C_X_4K_DWORDS; - } else { - sscreen->tess_offchip_block_dw_size = 8192; - offchip_granularity = V_03093C_X_8K_DWORDS; - } - - sscreen->tess_factor_ring_size = 48 * 1024 * sscreen->info.max_se; - sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; - - if (sscreen->info.chip_class >= GFX11) { - /* OFFCHIP_BUFFERING is per SE. */ - sscreen->vgt_hs_offchip_param = - S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers_per_se - 1) | - S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity); - } else if (sscreen->info.chip_class >= GFX10_3) { - sscreen->vgt_hs_offchip_param = - S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) | - S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity); - } else if (sscreen->info.chip_class >= GFX7) { - if (sscreen->info.chip_class >= GFX8) - --max_offchip_buffers; - sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) | - S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity); - } else { - assert(offchip_granularity == V_03093C_X_8K_DWORDS); - sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); - } + ac_get_hs_info(&sscreen->info, &sscreen->hs); sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ebbee0b8aac..8b5b82dd6cc 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -572,10 +572,7 @@ struct si_screen { unsigned pa_sc_raster_config_1; unsigned se_tile_repeat; unsigned gs_table_depth; - unsigned tess_offchip_block_dw_size; - unsigned tess_offchip_ring_size; - unsigned tess_factor_ring_size; - unsigned vgt_hs_offchip_param; + struct ac_hs_info hs; unsigned eqaa_force_coverage_samples; unsigned eqaa_force_z_samples; unsigned eqaa_force_color_samples; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 5950e1d1d01..c61bee867d2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -350,7 +350,7 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum } if (ring == TCS_FACTOR_RING) { - unsigned tf_offset = ctx->screen->tess_offchip_ring_size; + unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size; addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), ""); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index eda67cd336b..cf053b4d2e2 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -720,7 +720,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa /* Make sure the output data fits in the offchip buffer */ *num_patches = - MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size); + MIN2(*num_patches, (sctx->screen->hs.tess_offchip_block_dw_size * 4) / output_patch_size); /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 785373d310c..33bb4923157 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4022,7 +4022,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) */ sctx->tess_rings = pipe_aligned_buffer_create( sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, - sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024); + sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024); if (!sctx->tess_rings) return; @@ -4031,13 +4031,13 @@ void si_init_tess_factor_ring(struct si_context *sctx) sctx->b.screen, PIPE_RESOURCE_FLAG_ENCRYPTED | SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, - sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024); + sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024); } uint64_t factor_va = - si_resource(sctx->tess_rings)->gpu_address + sctx->screen->tess_offchip_ring_size; + si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size; - unsigned tf_ring_size_field = sctx->screen->tess_factor_ring_size / 4; + unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4; if (sctx->chip_class >= GFX11) tf_ring_size_field /= sctx->screen->info.max_se; @@ -4067,7 +4067,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) S_030944_BASE_HI(factor_va >> 40)); } radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM, - sctx->screen->vgt_hs_offchip_param); + sctx->screen->hs.hs_offchip_param); radeon_end(); return; } @@ -4087,7 +4087,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) si_pm4_set_reg(sctx->cs_preamble_state, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(factor_va >> 40)); si_pm4_set_reg(sctx->cs_preamble_state, R_03093C_VGT_HS_OFFCHIP_PARAM, - sctx->screen->vgt_hs_offchip_param); + sctx->screen->hs.hs_offchip_param); } else { struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); @@ -4095,18 +4095,18 @@ void si_init_tess_factor_ring(struct si_context *sctx) S_008988_SIZE(tf_ring_size_field)); si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8); si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, - sctx->screen->vgt_hs_offchip_param); + sctx->screen->hs.hs_offchip_param); sctx->cs_preamble_tess_rings = pm4; if (sctx->screen->info.has_tmz_support) { pm4 = CALLOC_STRUCT(si_pm4_state); uint64_t factor_va_tmz = - si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->tess_offchip_ring_size; + si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->hs.tess_offchip_ring_size; si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size_field)); si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va_tmz >> 8); si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, - sctx->screen->vgt_hs_offchip_param); + sctx->screen->hs.hs_offchip_param); sctx->cs_preamble_tess_rings_tmz = pm4; } }