pvr: Feature support TPU_PARALLEL_INSTANCES

Signed-off-by: Ashish Chauhan <Ashish.Chauhan@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Ashish Chauhan 2024-09-11 12:06:30 +01:00 committed by Marge Bot
parent 82fad3f258
commit 849a331a7d
6 changed files with 23 additions and 11 deletions

View file

@ -59,6 +59,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_tpu_border_colour_enhanced = true,
.has_tpu_extended_integer_lookup = true,
.has_tpu_image_state_v2 = true,
.has_tpu_parallel_instances = true,
.has_unified_store_depth = true,
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
@ -83,6 +84,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.slc_cache_line_size_bits = 512U,
.tile_size_x = 16U,
.tile_size_y = 16U,
.tpu_parallel_instances = 1U,
.unified_store_depth = 64U,
.usc_min_output_registers_per_pix = 1U,
.usc_slots = 14U,

View file

@ -66,6 +66,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_tpu_dm_global_registers = true,
.has_tpu_extended_integer_lookup = true,
.has_tpu_image_state_v2 = true,
.has_tpu_parallel_instances = true,
.has_unified_store_depth = true,
.has_usc_f16sop_u8 = true,
.has_usc_itrsmp = true,
@ -94,6 +95,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.slc_cache_line_size_bits = 512U,
.tile_size_x = 16U,
.tile_size_y = 16U,
.tpu_parallel_instances = 4U,
.unified_store_depth = 256U,
.usc_min_output_registers_per_pix = 2U,
.usc_slots = 64U,

View file

@ -59,6 +59,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_tpu_array_textures = true,
.has_tpu_extended_integer_lookup = true,
.has_tpu_image_state_v2 = true,
.has_tpu_parallel_instances = true,
.has_unified_store_depth = true,
.has_usc_f16sop_u8 = true,
.has_usc_itrsmp = true,
@ -84,6 +85,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.slc_cache_line_size_bits = 512U,
.tile_size_x = 32U,
.tile_size_y = 32U,
.tpu_parallel_instances = 4U,
.unified_store_depth = 256U,
.usc_min_output_registers_per_pix = 2U,
.usc_slots = 32U,

View file

@ -291,6 +291,7 @@ struct pvr_device_features {
bool has_tpu_dm_global_registers : 1;
bool has_tpu_extended_integer_lookup : 1;
bool has_tpu_image_state_v2 : 1;
bool has_tpu_parallel_instances : 1;
bool has_unified_store_depth : 1;
bool has_usc_f16sop_u8 : 1;
bool has_usc_itrsmp : 1;
@ -321,6 +322,7 @@ struct pvr_device_features {
uint32_t slc_cache_line_size_bits;
uint32_t tile_size_x;
uint32_t tile_size_y;
uint32_t tpu_parallel_instances;
uint32_t unified_store_depth;
uint32_t usc_min_output_registers_per_pix;
uint32_t usc_slots;

View file

@ -267,12 +267,23 @@ rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info)
static inline uint32_t pvr_get_max_user_vertex_output_components(
const struct pvr_device_info *dev_info)
{
/* Default value based on the minimum value found in all existing cores. */
const uint32_t uvs_pba_entries =
PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U);
const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U);
PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160U);
/* Default value based on the minimum value found in all existing cores. */
const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2U);
if (uvs_banks <= 8U && uvs_pba_entries == 160U) {
ASSERTED const uint32_t tpu_parallel_instances =
PVR_GET_FEATURE_VALUE(dev_info, tpu_parallel_instances, 1U);
/* Cores with > 2 ppc support vertex sizes of >= 128 dwords */
assert(tpu_parallel_instances <= 2 ||
(dev_info->ident.b <= 36 || dev_info->ident.b == 46));
if (uvs_banks <= 8U && uvs_pba_entries == 160U)
return 64U;
}
return 128U;
}

View file

@ -308,13 +308,6 @@ static bool pvr_physical_device_get_properties(
const uint32_t max_multisample =
PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
/* Default value based on the minimum value found in all existing cores. */
const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
/* Default value based on the minimum value found in all existing cores. */
const uint32_t uvs_pba_entries =
PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
UNUSED const uint32_t sub_pixel_precision =
PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
@ -323,7 +316,7 @@ static bool pvr_physical_device_get_properties(
UNUSED const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
UNUSED const uint32_t max_user_vertex_components =
((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
pvr_get_max_user_vertex_output_components(dev_info);
/* The workgroup invocations are limited by the case where we have a compute
* barrier - each slot has a fixed number of invocations, the whole workgroup