ac/gpu_info: move HS info into radeon_info

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34544>
This commit is contained in:
Marek Olšák 2025-04-14 20:05:21 -04:00
parent ea294349bd
commit d82eda72a1
9 changed files with 86 additions and 98 deletions

View file

@ -1733,6 +1733,69 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->has_set_sh_pairs_packed = info->register_shadowing_required;
}
bool double_offchip_wg = info->gfx_level >= GFX7 &&
info->family != CHIP_CARRIZO &&
info->family != CHIP_STONEY;
/* This is the size of all TCS outputs in memory per workgroup.
* Hawaii can't handle num_workgroups > 256 with 8K per workgroup, so use 4K.
*/
unsigned wg_size_in_dwords = info->family == CHIP_HAWAII ? 4096 : 8192;
unsigned wg_size_enum;
unsigned num_workgroups_per_se;
switch (wg_size_in_dwords) {
case 8192:
wg_size_enum = V_03093C_X_8K_DWORDS;
break;
case 4096:
wg_size_enum = V_03093C_X_4K_DWORDS;
break;
case 2048:
wg_size_enum = V_03093C_X_2K_DWORDS;
break;
case 1024:
wg_size_enum = V_03093C_X_1K_DWORDS;
break;
default:
unreachable("invalid TCS workgroup size");
}
/* Vega10 should limit num_workgroups to 508 (127 per SE)
* Gfx7 should limit num_workgroups to 508 (127 per SE)
* Gfx6 should limit num_workgroups to 126 (63 per SE)
*/
if (info->gfx_level >= GFX11) {
num_workgroups_per_se = 256;
} else if (info->gfx_level >= GFX10) {
num_workgroups_per_se = 128;
} else if (info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20) {
num_workgroups_per_se = double_offchip_wg ? 128 : 64;
} else {
num_workgroups_per_se = double_offchip_wg ? 127 : 63;
}
unsigned num_workgroups = num_workgroups_per_se * info->max_se;
if (info->gfx_level >= GFX11) {
/* OFFCHIP_BUFFERING is per SE. */
info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(num_workgroups_per_se - 1) |
S_03093C_OFFCHIP_GRANULARITY_GFX103(wg_size_enum);
} else if (info->gfx_level >= GFX10_3) {
info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(num_workgroups - 1) |
S_03093C_OFFCHIP_GRANULARITY_GFX103(wg_size_enum);
} else if (info->gfx_level >= GFX7) {
info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(num_workgroups -
(info->gfx_level >= GFX8 ? 1 : 0)) |
S_03093C_OFFCHIP_GRANULARITY_GFX7(wg_size_enum);
} else {
info->hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(num_workgroups) |
S_0089B0_OFFCHIP_GRANULARITY(wg_size_enum);
}
info->tess_offchip_ring_size = num_workgroups * wg_size_in_dwords * 4;
info->tess_factor_ring_size = 48 * 1024 * info->max_se;
info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size;
/* GFX1013 is GFX10 plus ray tracing instructions */
info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 ||
info->family == CHIP_GFX1013;
@ -2410,73 +2473,6 @@ ac_get_compute_resource_limits(const struct radeon_info *info, unsigned waves_pe
return compute_resource_limits;
}
void ac_get_hs_info(const struct radeon_info *info,
struct ac_hs_info *hs)
{
bool double_offchip_wg = info->gfx_level >= GFX7 &&
info->family != CHIP_CARRIZO &&
info->family != CHIP_STONEY;
/* This is the size of all TCS outputs in memory per workgroup.
* Hawaii can't handle num_workgroups > 256 with 8K per workgroup, so use 4K.
*/
unsigned wg_size_in_dwords = info->family == CHIP_HAWAII ? 4096 : 8192;
unsigned wg_size_enum;
unsigned num_workgroups_per_se;
switch (wg_size_in_dwords) {
case 8192:
wg_size_enum = V_03093C_X_8K_DWORDS;
break;
case 4096:
wg_size_enum = V_03093C_X_4K_DWORDS;
break;
case 2048:
wg_size_enum = V_03093C_X_2K_DWORDS;
break;
case 1024:
wg_size_enum = V_03093C_X_1K_DWORDS;
break;
default:
unreachable("invalid TCS workgroup size");
}
/* Vega10 should limit num_workgroups to 508 (127 per SE)
* Gfx7 should limit num_workgroups to 508 (127 per SE)
* Gfx6 should limit num_workgroups to 126 (63 per SE)
*/
if (info->gfx_level >= GFX11) {
num_workgroups_per_se = 256;
} else if (info->gfx_level >= GFX10) {
num_workgroups_per_se = 128;
} else if (info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20) {
num_workgroups_per_se = double_offchip_wg ? 128 : 64;
} else {
num_workgroups_per_se = double_offchip_wg ? 127 : 63;
}
unsigned num_workgroups = num_workgroups_per_se * info->max_se;
if (info->gfx_level >= GFX11) {
/* OFFCHIP_BUFFERING is per SE. */
hs->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(num_workgroups_per_se - 1) |
S_03093C_OFFCHIP_GRANULARITY_GFX103(wg_size_enum);
} else if (info->gfx_level >= GFX10_3) {
hs->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(num_workgroups - 1) |
S_03093C_OFFCHIP_GRANULARITY_GFX103(wg_size_enum);
} else if (info->gfx_level >= GFX7) {
hs->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(num_workgroups -
(info->gfx_level >= GFX8 ? 1 : 0)) |
S_03093C_OFFCHIP_GRANULARITY_GFX7(wg_size_enum);
} else {
hs->hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(num_workgroups) |
S_0089B0_OFFCHIP_GRANULARITY(wg_size_enum);
}
hs->tess_offchip_ring_size = num_workgroups * wg_size_in_dwords * 4;
hs->tess_factor_ring_size = 48 * 1024 * info->max_se;
hs->total_tess_ring_size = hs->tess_offchip_ring_size + hs->tess_factor_ring_size;
}
static uint16_t get_task_num_entries(enum radeon_family fam)
{
/* Number of task shader ring entries. Needs to be a power of two.

View file

@ -293,6 +293,12 @@ struct radeon_info {
uint32_t total_attribute_pos_prim_ring_size; /* GFX11+ */
bool has_attr_ring;
/* Tessellation rings. */
uint32_t hs_offchip_param;
uint32_t tess_factor_ring_size;
uint32_t tess_offchip_ring_size;
uint32_t total_tess_ring_size;
/* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
@ -347,16 +353,6 @@ unsigned ac_get_compute_resource_limits(const struct radeon_info *info,
unsigned waves_per_threadgroup, unsigned max_waves_per_sh,
unsigned threadgroups_per_cu);
struct ac_hs_info {
uint32_t hs_offchip_param;
uint32_t tess_factor_ring_size;
uint32_t tess_offchip_ring_size;
uint32_t total_tess_ring_size;
};
void ac_get_hs_info(const struct radeon_info *info,
struct ac_hs_info *hs);
/* Task rings BO layout information.
* This BO is shared between GFX and ACE queues so that the ACE and GFX
* firmware can cooperate on task->mesh dispatches and is also used to

View file

@ -2285,7 +2285,6 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
pdev->gs_table_depth = ac_get_gs_table_depth(pdev->info.gfx_level, pdev->info.family);
ac_get_hs_info(&pdev->info, &pdev->hs);
ac_get_task_info(&pdev->info, &pdev->task_info);
radv_get_binning_settings(pdev, &pdev->binning_settings);

View file

@ -158,7 +158,6 @@ struct radv_physical_device {
uint32_t gs_table_depth;
struct ac_hs_info hs;
struct ac_task_info task_info;
struct radv_binning_settings binning_settings;

View file

@ -313,10 +313,11 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc += 8;
if (tess_rings_bo) {
radv_set_ring_buffer(pdev, tess_rings_bo, pdev->hs.tess_offchip_ring_size, pdev->hs.tess_factor_ring_size, false,
false, true, 0, 0, &desc[0]);
radv_set_ring_buffer(pdev, tess_rings_bo, pdev->info.tess_offchip_ring_size, pdev->info.tess_factor_ring_size,
false, false, true, 0, 0, &desc[0]);
radv_set_ring_buffer(pdev, tess_rings_bo, 0, pdev->hs.tess_offchip_ring_size, false, false, true, 0, 0, &desc[4]);
radv_set_ring_buffer(pdev, tess_rings_bo, 0, pdev->info.tess_offchip_ring_size, false, false, true, 0, 0,
&desc[4]);
}
desc += 8;
@ -397,8 +398,8 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
if (!tess_rings_bo)
return;
tf_ring_size = pdev->hs.tess_factor_ring_size / 4;
tf_va = radv_buffer_get_va(tess_rings_bo) + pdev->hs.tess_offchip_ring_size;
tf_ring_size = pdev->info.tess_factor_ring_size / 4;
tf_va = radv_buffer_get_va(tess_rings_bo) + pdev->info.tess_offchip_ring_size;
radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
@ -421,11 +422,11 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_set_uconfig_reg(R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
}
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM, pdev->info.hs_offchip_param);
} else {
radeon_set_config_reg(R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
radeon_set_config_reg(R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
radeon_set_config_reg(R_0089B0_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
radeon_set_config_reg(R_0089B0_VGT_HS_OFFCHIP_PARAM, pdev->info.hs_offchip_param);
}
radeon_end();
@ -997,11 +998,11 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (!queue->ring_info.tess_rings && needs->tess_rings) {
result = radv_bo_create(device, NULL, pdev->hs.total_tess_ring_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags,
result = radv_bo_create(device, NULL, pdev->info.total_tess_ring_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH, 0, true, &tess_rings_bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_command_buffer_bo_create(device, tess_rings_bo, 0, 0, pdev->hs.total_tess_ring_size);
radv_rmv_log_command_buffer_bo_create(device, tess_rings_bo, 0, 0, pdev->info.total_tess_ring_size);
}
if (!queue->ring_info.task_rings && needs->task_rings) {

View file

@ -490,7 +490,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
case nir_intrinsic_load_ring_tess_factors_amd: {
assert(s->tess_offchip_ring);
nir_def *addr = nir_channel(b, s->tess_offchip_ring, 0);
addr = nir_iadd_imm(b, addr, sel->screen->hs.tess_offchip_ring_size);
addr = nir_iadd_imm(b, addr, sel->screen->info.tess_offchip_ring_size);
replacement = nir_vector_insert_imm(b, s->tess_offchip_ring, addr, 0);
break;
}

View file

@ -1461,8 +1461,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
si_init_perfcounters(sscreen);
ac_get_hs_info(&sscreen->info, &sscreen->hs);
if (sscreen->debug_flags & DBG(NO_OUT_OF_ORDER))
sscreen->info.has_out_of_order_rast = false;

View file

@ -519,7 +519,6 @@ struct si_screen {
unsigned pa_sc_raster_config_1;
unsigned se_tile_repeat;
unsigned gs_table_depth;
struct ac_hs_info hs;
unsigned eqaa_force_coverage_samples;
unsigned eqaa_force_z_samples;
unsigned eqaa_force_color_samples;

View file

@ -4544,7 +4544,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sscreen->hs.total_tess_ring_size,
sscreen->info.total_tess_ring_size,
2 * 1024 * 1024);
if (!sscreen->tess_rings) {
simple_mtx_unlock(&sscreen->tess_ring_lock);
@ -4559,7 +4559,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
SI_RESOURCE_FLAG_DRIVER_INTERNAL |
SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sscreen->hs.total_tess_ring_size,
sscreen->info.total_tess_ring_size,
2 * 1024 * 1024);
}
}
@ -5085,9 +5085,9 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index)
struct pipe_resource *tf_ring =
sctx->ws->cs_is_secure(&sctx->gfx_cs) ? sscreen->tess_rings_tmz : sscreen->tess_rings;
uint64_t factor_va = si_resource(tf_ring)->gpu_address +
sscreen->hs.tess_offchip_ring_size;
sscreen->info.tess_offchip_ring_size;
unsigned tf_ring_size_field = sscreen->hs.tess_factor_ring_size / 4;
unsigned tf_ring_size_field = sscreen->info.tess_factor_ring_size / 4;
if (sctx->gfx_level >= GFX11)
tf_ring_size_field /= sscreen->info.max_se;
@ -5104,7 +5104,7 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX7) {
radeon_set_uconfig_reg_seq(R_030938_VGT_TF_RING_SIZE, 3);
radeon_emit(S_030938_SIZE(tf_ring_size_field)); /* R_030938_VGT_TF_RING_SIZE */
radeon_emit(sscreen->hs.hs_offchip_param); /* R_03093C_VGT_HS_OFFCHIP_PARAM */
radeon_emit(sscreen->info.hs_offchip_param); /* R_03093C_VGT_HS_OFFCHIP_PARAM */
radeon_emit(factor_va >> 8); /* R_030940_VGT_TF_MEMORY_BASE */
if (sctx->gfx_level >= GFX12)
@ -5116,7 +5116,7 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index)
} else {
radeon_set_config_reg(R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size_field));
radeon_set_config_reg(R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
radeon_set_config_reg(R_0089B0_VGT_HS_OFFCHIP_PARAM, sscreen->hs.hs_offchip_param);
radeon_set_config_reg(R_0089B0_VGT_HS_OFFCHIP_PARAM, sscreen->info.hs_offchip_param);
}
radeon_end();
}