radeonsi: port tess ring calcs to the common helper.

This uses the common helper code to implement the tess ring sizing.

One question is if radeonsi should be using tess_offchip_ring_offset
in some places it's using tess_factor_ring_size?

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16415>
This commit is contained in:
Dave Airlie 2022-05-10 11:47:33 +10:00 committed by Marge Bot
parent 17fcbd8742
commit 14b1ed1ce1
5 changed files with 13 additions and 69 deletions

View file

@ -1215,60 +1215,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3;
/* Determine tessellation ring info. */
bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
sscreen->info.family != CHIP_CARRIZO &&
sscreen->info.family != CHIP_STONEY;
/* This must be one less than the maximum number due to a hw limitation.
* Various hardware bugs need this.
*/
unsigned max_offchip_buffers_per_se;
if (sscreen->info.chip_class >= GFX11)
max_offchip_buffers_per_se = 256; /* TODO: we could decrease this to reduce memory/cache usage */
else if (sscreen->info.chip_class >= GFX10)
max_offchip_buffers_per_se = 128;
/* Only certain chips can use the maximum value. */
else if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20)
max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
else
max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se;
unsigned offchip_granularity;
/* Hawaii has a bug with offchip buffers > 256 that can be worked
* around by setting 4K granularity.
*/
if (sscreen->info.family == CHIP_HAWAII) {
sscreen->tess_offchip_block_dw_size = 4096;
offchip_granularity = V_03093C_X_4K_DWORDS;
} else {
sscreen->tess_offchip_block_dw_size = 8192;
offchip_granularity = V_03093C_X_8K_DWORDS;
}
sscreen->tess_factor_ring_size = 48 * 1024 * sscreen->info.max_se;
sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
if (sscreen->info.chip_class >= GFX11) {
/* OFFCHIP_BUFFERING is per SE. */
sscreen->vgt_hs_offchip_param =
S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers_per_se - 1) |
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
} else if (sscreen->info.chip_class >= GFX10_3) {
sscreen->vgt_hs_offchip_param =
S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
} else if (sscreen->info.chip_class >= GFX7) {
if (sscreen->info.chip_class >= GFX8)
--max_offchip_buffers;
sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
} else {
assert(offchip_granularity == V_03093C_X_8K_DWORDS);
sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
}
ac_get_hs_info(&sscreen->info, &sscreen->hs);
sscreen->has_draw_indirect_multi =
(sscreen->info.family >= CHIP_POLARIS10) ||

View file

@ -572,10 +572,7 @@ struct si_screen {
unsigned pa_sc_raster_config_1;
unsigned se_tile_repeat;
unsigned gs_table_depth;
unsigned tess_offchip_block_dw_size;
unsigned tess_offchip_ring_size;
unsigned tess_factor_ring_size;
unsigned vgt_hs_offchip_param;
struct ac_hs_info hs;
unsigned eqaa_force_coverage_samples;
unsigned eqaa_force_z_samples;
unsigned eqaa_force_color_samples;

View file

@ -350,7 +350,7 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum
}
if (ring == TCS_FACTOR_RING) {
unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size;
addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
}

View file

@ -720,7 +720,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
/* Make sure the output data fits in the offchip buffer */
*num_patches =
MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size);
MIN2(*num_patches, (sctx->screen->hs.tess_offchip_block_dw_size * 4) / output_patch_size);
/* Make sure that the data fits in LDS. This assumes the shaders only
* use LDS for the inputs and outputs.

View file

@ -4022,7 +4022,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
*/
sctx->tess_rings = pipe_aligned_buffer_create(
sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT,
sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024);
sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024);
if (!sctx->tess_rings)
return;
@ -4031,13 +4031,13 @@ void si_init_tess_factor_ring(struct si_context *sctx)
sctx->b.screen,
PIPE_RESOURCE_FLAG_ENCRYPTED | SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024);
sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024);
}
uint64_t factor_va =
si_resource(sctx->tess_rings)->gpu_address + sctx->screen->tess_offchip_ring_size;
si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
unsigned tf_ring_size_field = sctx->screen->tess_factor_ring_size / 4;
unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4;
if (sctx->chip_class >= GFX11)
tf_ring_size_field /= sctx->screen->info.max_se;
@ -4067,7 +4067,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
S_030944_BASE_HI(factor_va >> 40));
}
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM,
sctx->screen->vgt_hs_offchip_param);
sctx->screen->hs.hs_offchip_param);
radeon_end();
return;
}
@ -4087,7 +4087,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
si_pm4_set_reg(sctx->cs_preamble_state, R_030944_VGT_TF_MEMORY_BASE_HI,
S_030944_BASE_HI(factor_va >> 40));
si_pm4_set_reg(sctx->cs_preamble_state, R_03093C_VGT_HS_OFFCHIP_PARAM,
sctx->screen->vgt_hs_offchip_param);
sctx->screen->hs.hs_offchip_param);
} else {
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
@ -4095,18 +4095,18 @@ void si_init_tess_factor_ring(struct si_context *sctx)
S_008988_SIZE(tf_ring_size_field));
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
sctx->screen->vgt_hs_offchip_param);
sctx->screen->hs.hs_offchip_param);
sctx->cs_preamble_tess_rings = pm4;
if (sctx->screen->info.has_tmz_support) {
pm4 = CALLOC_STRUCT(si_pm4_state);
uint64_t factor_va_tmz =
si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->tess_offchip_ring_size;
si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE,
S_008988_SIZE(tf_ring_size_field));
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va_tmz >> 8);
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
sctx->screen->vgt_hs_offchip_param);
sctx->screen->hs.hs_offchip_param);
sctx->cs_preamble_tess_rings_tmz = pm4;
}
}