pvr: add device info and functions for calculating available temps

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32258>
This commit is contained in:
Simon Perretta 2024-05-03 21:23:53 +01:00 committed by Marge Bot
parent aea38c1e47
commit 7a32dc673b
6 changed files with 144 additions and 0 deletions

View file

@ -62,6 +62,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_tpu_border_colour_enhanced = true,
.has_tpu_extended_integer_lookup = true,
.has_tpu_image_state_v2 = true,
.has_unified_store_depth = true,
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_pixel_partition_mask = true,
@ -86,6 +87,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.slc_cache_line_size_bits = 512U,
.tile_size_x = 16U,
.tile_size_y = 16U,
.unified_store_depth = 64U,
.usc_min_output_registers_per_pix = 1U,
.usc_slots = 14U,
.uvs_banks = 2U,

View file

@ -70,6 +70,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_tpu_dm_global_registers = true,
.has_tpu_extended_integer_lookup = true,
.has_tpu_image_state_v2 = true,
.has_unified_store_depth = true,
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_pixel_partition_mask = true,
@ -96,6 +97,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.slc_cache_line_size_bits = 512U,
.tile_size_x = 16U,
.tile_size_y = 16U,
.unified_store_depth = 256U,
.usc_min_output_registers_per_pix = 2U,
.usc_slots = 64U,
.uvs_banks = 8U,

View file

@ -63,6 +63,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_tpu_array_textures = true,
.has_tpu_extended_integer_lookup = true,
.has_tpu_image_state_v2 = true,
.has_unified_store_depth = true,
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_slots = true,
@ -87,6 +88,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.slc_cache_line_size_bits = 512U,
.tile_size_x = 32U,
.tile_size_y = 32U,
.unified_store_depth = 256U,
.usc_min_output_registers_per_pix = 2U,
.usc_slots = 32U,
.uvs_banks = 8U,

View file

@ -292,6 +292,7 @@ struct pvr_device_features {
bool has_tpu_dm_global_registers : 1;
bool has_tpu_extended_integer_lookup : 1;
bool has_tpu_image_state_v2 : 1;
bool has_unified_store_depth : 1;
bool has_usc_f16sop_u8 : 1;
bool has_usc_min_output_registers_per_pix : 1;
bool has_usc_pixel_partition_mask : 1;
@ -320,6 +321,7 @@ struct pvr_device_features {
uint32_t slc_cache_line_size_bits;
uint32_t tile_size_x;
uint32_t tile_size_y;
uint32_t unified_store_depth;
uint32_t usc_min_output_registers_per_pix;
uint32_t usc_slots;
uint32_t uvs_banks;

View file

@ -142,4 +142,20 @@
#define PVR_NUM_PBE_EMIT_REGS 8U
#define ROGUE_USRM_GRANULARITY_IN_REGISTERS 4U
#define ROGUE_RESERVED_USRM_LINES 2U
#define ROGUE_USC_NUM_UNIFIED_STORE_BANKS 8U
#define ROGUE_PDS_US_REGISTER_ALLOCATION_GRANULARITY 8U
#define ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY \
ROGUE_PDS_US_REGISTER_ALLOCATION_GRANULARITY
#define ROGUE_USRM_LINE_SIZE 16U
#define ROGUE_USRM_LINE_SIZE_PER_INSTANCE \
(ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY * ROGUE_USRM_LINE_SIZE)
#endif /* ROGUE_HW_DEFS_H */

View file

@ -347,4 +347,124 @@ __rogue_get_param_vf_max(const struct pvr_device_info *dev_info)
#define rogue_get_param_vf_max_x(dev_info) __rogue_get_param_vf_max(dev_info)
#define rogue_get_param_vf_max_y(dev_info) __rogue_get_param_vf_max(dev_info)
static inline uint32_t
rogue_get_max_total_instances(const struct pvr_device_info *dev_info)
{
const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 0U);
assert(usc_slots);
return usc_slots * ROGUE_MAX_INSTANCES_PER_TASK;
}
static inline uint32_t rogue_get_unified_store_size_per_instance(
const struct pvr_device_info *dev_info)
{
const uint32_t unified_store_depth =
PVR_GET_FEATURE_VALUE(dev_info, unified_store_depth, 0U);
assert(unified_store_depth);
return unified_store_depth * ROGUE_USC_NUM_UNIFIED_STORE_BANKS;
}
static inline uint32_t
rogue_get_min_attr_in_usrm_lines(const struct pvr_device_info *dev_info)
{
const uint32_t unified_store_size_per_instance =
rogue_get_unified_store_size_per_instance(dev_info);
assert(unified_store_size_per_instance);
return (unified_store_size_per_instance /
ROGUE_USRM_LINE_SIZE_PER_INSTANCE) -
ROGUE_RESERVED_USRM_LINES;
}
static inline uint32_t
rogue_get_parallel_instances(const struct pvr_device_info *dev_info)
{
return ROGUE_MAX_INSTANCES_PER_TASK / 2;
}
static inline uint32_t rogue_get_unified_store_temps_per_instance(
const struct pvr_device_info *dev_info)
{
return rogue_get_min_attr_in_usrm_lines(dev_info) *
ROGUE_USRM_LINE_SIZE_PER_INSTANCE;
}
static inline uint32_t
rogue_get_unified_store_total_temps(const struct pvr_device_info *dev_info)
{
return rogue_get_unified_store_temps_per_instance(dev_info) *
rogue_get_parallel_instances(dev_info);
}
static inline uint32_t
rogue_get_instance_groups_per_slot(const struct pvr_device_info *dev_info)
{
return ROGUE_MAX_INSTANCES_PER_TASK / rogue_get_parallel_instances(dev_info);
}
static inline uint32_t
rogue_get_optimal_temps(const struct pvr_device_info *dev_info)
{
const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 0U);
assert(usc_slots);
uint32_t max_temps_full_slot_use =
rogue_get_unified_store_temps_per_instance(dev_info) /
(rogue_get_instance_groups_per_slot(dev_info) * usc_slots);
max_temps_full_slot_use &= ~(ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY - 1);
return MAX2(max_temps_full_slot_use, 24U);
}
static inline uint32_t rogue_get_temps(const struct pvr_device_info *dev_info)
{
uint32_t temps = rogue_get_unified_store_temps_per_instance(dev_info) / 2;
return MIN2(temps, 248U);
}
static inline uint32_t
rogue_max_wg_temps(const struct pvr_device_info *dev_info,
unsigned temps,
unsigned wg_size,
bool has_barrier)
{
assert(wg_size <= rogue_get_max_total_instances(dev_info));
if (!wg_size)
return rogue_get_compute_max_work_group_size(dev_info);
if (wg_size > ROGUE_MAX_INSTANCES_PER_TASK && has_barrier) {
/* Number of slots allocated for each workgroup. */
unsigned slots_per_wg =
DIV_ROUND_UP(wg_size, ROGUE_MAX_INSTANCES_PER_TASK);
/* Lines of USRM lines available for each slot
* (+1 for fragmentation / coarse checking).
*/
unsigned lines_per_slot =
rogue_get_min_attr_in_usrm_lines(dev_info) / (slots_per_wg + 1);
unsigned max_allocs;
if (lines_per_slot != 0) {
/* Convert lines to USRM allocs. */
max_allocs = lines_per_slot * ROGUE_USRM_LINE_SIZE;
} else {
max_allocs = (rogue_get_min_attr_in_usrm_lines(dev_info) *
ROGUE_USRM_LINE_SIZE) /
(slots_per_wg + 1);
}
/* Convert USRM allocs to temporary registers. */
unsigned max_temps_for_barrier =
max_allocs * ROGUE_USRM_GRANULARITY_IN_REGISTERS;
/* Clamp to provided limit */
temps = MIN2(temps, max_temps_for_barrier);
}
return temps;
}
#endif /* ROGUE_HW_UTILS_H */