mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
radeonsi: port tess ring calcs to the common helper.
This uses the common helper code to implement the tess ring sizing. One question is if radeonsi should be using tess_offchip_ring_offset in some places it's using tess_factor_ring_size? Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16415>
This commit is contained in:
parent
17fcbd8742
commit
14b1ed1ce1
5 changed files with 13 additions and 69 deletions
|
|
@ -1215,60 +1215,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
|
||||
sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3;
|
||||
|
||||
/* Determine tessellation ring info. */
|
||||
bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
|
||||
sscreen->info.family != CHIP_CARRIZO &&
|
||||
sscreen->info.family != CHIP_STONEY;
|
||||
/* This must be one less than the maximum number due to a hw limitation.
|
||||
* Various hardware bugs need this.
|
||||
*/
|
||||
unsigned max_offchip_buffers_per_se;
|
||||
|
||||
if (sscreen->info.chip_class >= GFX11)
|
||||
max_offchip_buffers_per_se = 256; /* TODO: we could decrease this to reduce memory/cache usage */
|
||||
else if (sscreen->info.chip_class >= GFX10)
|
||||
max_offchip_buffers_per_se = 128;
|
||||
/* Only certain chips can use the maximum value. */
|
||||
else if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20)
|
||||
max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
|
||||
else
|
||||
max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
|
||||
|
||||
unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se;
|
||||
unsigned offchip_granularity;
|
||||
|
||||
/* Hawaii has a bug with offchip buffers > 256 that can be worked
|
||||
* around by setting 4K granularity.
|
||||
*/
|
||||
if (sscreen->info.family == CHIP_HAWAII) {
|
||||
sscreen->tess_offchip_block_dw_size = 4096;
|
||||
offchip_granularity = V_03093C_X_4K_DWORDS;
|
||||
} else {
|
||||
sscreen->tess_offchip_block_dw_size = 8192;
|
||||
offchip_granularity = V_03093C_X_8K_DWORDS;
|
||||
}
|
||||
|
||||
sscreen->tess_factor_ring_size = 48 * 1024 * sscreen->info.max_se;
|
||||
sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
|
||||
|
||||
if (sscreen->info.chip_class >= GFX11) {
|
||||
/* OFFCHIP_BUFFERING is per SE. */
|
||||
sscreen->vgt_hs_offchip_param =
|
||||
S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers_per_se - 1) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
|
||||
} else if (sscreen->info.chip_class >= GFX10_3) {
|
||||
sscreen->vgt_hs_offchip_param =
|
||||
S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
|
||||
} else if (sscreen->info.chip_class >= GFX7) {
|
||||
if (sscreen->info.chip_class >= GFX8)
|
||||
--max_offchip_buffers;
|
||||
sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
|
||||
} else {
|
||||
assert(offchip_granularity == V_03093C_X_8K_DWORDS);
|
||||
sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
|
||||
}
|
||||
ac_get_hs_info(&sscreen->info, &sscreen->hs);
|
||||
|
||||
sscreen->has_draw_indirect_multi =
|
||||
(sscreen->info.family >= CHIP_POLARIS10) ||
|
||||
|
|
|
|||
|
|
@ -572,10 +572,7 @@ struct si_screen {
|
|||
unsigned pa_sc_raster_config_1;
|
||||
unsigned se_tile_repeat;
|
||||
unsigned gs_table_depth;
|
||||
unsigned tess_offchip_block_dw_size;
|
||||
unsigned tess_offchip_ring_size;
|
||||
unsigned tess_factor_ring_size;
|
||||
unsigned vgt_hs_offchip_param;
|
||||
struct ac_hs_info hs;
|
||||
unsigned eqaa_force_coverage_samples;
|
||||
unsigned eqaa_force_z_samples;
|
||||
unsigned eqaa_force_color_samples;
|
||||
|
|
|
|||
|
|
@ -350,7 +350,7 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum
|
|||
}
|
||||
|
||||
if (ring == TCS_FACTOR_RING) {
|
||||
unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
|
||||
unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size;
|
||||
addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -720,7 +720,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
|||
|
||||
/* Make sure the output data fits in the offchip buffer */
|
||||
*num_patches =
|
||||
MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size);
|
||||
MIN2(*num_patches, (sctx->screen->hs.tess_offchip_block_dw_size * 4) / output_patch_size);
|
||||
|
||||
/* Make sure that the data fits in LDS. This assumes the shaders only
|
||||
* use LDS for the inputs and outputs.
|
||||
|
|
|
|||
|
|
@ -4022,7 +4022,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
*/
|
||||
sctx->tess_rings = pipe_aligned_buffer_create(
|
||||
sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT,
|
||||
sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024);
|
||||
sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024);
|
||||
if (!sctx->tess_rings)
|
||||
return;
|
||||
|
||||
|
|
@ -4031,13 +4031,13 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
sctx->b.screen,
|
||||
PIPE_RESOURCE_FLAG_ENCRYPTED | SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024);
|
||||
sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024);
|
||||
}
|
||||
|
||||
uint64_t factor_va =
|
||||
si_resource(sctx->tess_rings)->gpu_address + sctx->screen->tess_offchip_ring_size;
|
||||
si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
|
||||
|
||||
unsigned tf_ring_size_field = sctx->screen->tess_factor_ring_size / 4;
|
||||
unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4;
|
||||
if (sctx->chip_class >= GFX11)
|
||||
tf_ring_size_field /= sctx->screen->info.max_se;
|
||||
|
||||
|
|
@ -4067,7 +4067,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
S_030944_BASE_HI(factor_va >> 40));
|
||||
}
|
||||
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM,
|
||||
sctx->screen->vgt_hs_offchip_param);
|
||||
sctx->screen->hs.hs_offchip_param);
|
||||
radeon_end();
|
||||
return;
|
||||
}
|
||||
|
|
@ -4087,7 +4087,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
si_pm4_set_reg(sctx->cs_preamble_state, R_030944_VGT_TF_MEMORY_BASE_HI,
|
||||
S_030944_BASE_HI(factor_va >> 40));
|
||||
si_pm4_set_reg(sctx->cs_preamble_state, R_03093C_VGT_HS_OFFCHIP_PARAM,
|
||||
sctx->screen->vgt_hs_offchip_param);
|
||||
sctx->screen->hs.hs_offchip_param);
|
||||
} else {
|
||||
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
|
||||
|
|
@ -4095,18 +4095,18 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
S_008988_SIZE(tf_ring_size_field));
|
||||
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
|
||||
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
|
||||
sctx->screen->vgt_hs_offchip_param);
|
||||
sctx->screen->hs.hs_offchip_param);
|
||||
sctx->cs_preamble_tess_rings = pm4;
|
||||
|
||||
if (sctx->screen->info.has_tmz_support) {
|
||||
pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
uint64_t factor_va_tmz =
|
||||
si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->tess_offchip_ring_size;
|
||||
si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
|
||||
si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE,
|
||||
S_008988_SIZE(tf_ring_size_field));
|
||||
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va_tmz >> 8);
|
||||
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
|
||||
sctx->screen->vgt_hs_offchip_param);
|
||||
sctx->screen->hs.hs_offchip_param);
|
||||
sctx->cs_preamble_tess_rings_tmz = pm4;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue