mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
pvr: add device info and functions for calculating available temps
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32258>
This commit is contained in:
parent
aea38c1e47
commit
7a32dc673b
6 changed files with 144 additions and 0 deletions
|
|
@ -62,6 +62,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = {
|
|||
.has_tpu_border_colour_enhanced = true,
|
||||
.has_tpu_extended_integer_lookup = true,
|
||||
.has_tpu_image_state_v2 = true,
|
||||
.has_unified_store_depth = true,
|
||||
.has_usc_f16sop_u8 = true,
|
||||
.has_usc_min_output_registers_per_pix = true,
|
||||
.has_usc_pixel_partition_mask = true,
|
||||
|
|
@ -86,6 +87,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = {
|
|||
.slc_cache_line_size_bits = 512U,
|
||||
.tile_size_x = 16U,
|
||||
.tile_size_y = 16U,
|
||||
.unified_store_depth = 64U,
|
||||
.usc_min_output_registers_per_pix = 1U,
|
||||
.usc_slots = 14U,
|
||||
.uvs_banks = 2U,
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = {
|
|||
.has_tpu_dm_global_registers = true,
|
||||
.has_tpu_extended_integer_lookup = true,
|
||||
.has_tpu_image_state_v2 = true,
|
||||
.has_unified_store_depth = true,
|
||||
.has_usc_f16sop_u8 = true,
|
||||
.has_usc_min_output_registers_per_pix = true,
|
||||
.has_usc_pixel_partition_mask = true,
|
||||
|
|
@ -96,6 +97,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = {
|
|||
.slc_cache_line_size_bits = 512U,
|
||||
.tile_size_x = 16U,
|
||||
.tile_size_y = 16U,
|
||||
.unified_store_depth = 256U,
|
||||
.usc_min_output_registers_per_pix = 2U,
|
||||
.usc_slots = 64U,
|
||||
.uvs_banks = 8U,
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = {
|
|||
.has_tpu_array_textures = true,
|
||||
.has_tpu_extended_integer_lookup = true,
|
||||
.has_tpu_image_state_v2 = true,
|
||||
.has_unified_store_depth = true,
|
||||
.has_usc_f16sop_u8 = true,
|
||||
.has_usc_min_output_registers_per_pix = true,
|
||||
.has_usc_slots = true,
|
||||
|
|
@ -87,6 +88,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = {
|
|||
.slc_cache_line_size_bits = 512U,
|
||||
.tile_size_x = 32U,
|
||||
.tile_size_y = 32U,
|
||||
.unified_store_depth = 256U,
|
||||
.usc_min_output_registers_per_pix = 2U,
|
||||
.usc_slots = 32U,
|
||||
.uvs_banks = 8U,
|
||||
|
|
|
|||
|
|
@ -292,6 +292,7 @@ struct pvr_device_features {
|
|||
bool has_tpu_dm_global_registers : 1;
|
||||
bool has_tpu_extended_integer_lookup : 1;
|
||||
bool has_tpu_image_state_v2 : 1;
|
||||
bool has_unified_store_depth : 1;
|
||||
bool has_usc_f16sop_u8 : 1;
|
||||
bool has_usc_min_output_registers_per_pix : 1;
|
||||
bool has_usc_pixel_partition_mask : 1;
|
||||
|
|
@ -320,6 +321,7 @@ struct pvr_device_features {
|
|||
uint32_t slc_cache_line_size_bits;
|
||||
uint32_t tile_size_x;
|
||||
uint32_t tile_size_y;
|
||||
uint32_t unified_store_depth;
|
||||
uint32_t usc_min_output_registers_per_pix;
|
||||
uint32_t usc_slots;
|
||||
uint32_t uvs_banks;
|
||||
|
|
|
|||
|
|
@ -142,4 +142,20 @@
|
|||
|
||||
#define PVR_NUM_PBE_EMIT_REGS 8U
|
||||
|
||||
#define ROGUE_USRM_GRANULARITY_IN_REGISTERS 4U
|
||||
|
||||
#define ROGUE_RESERVED_USRM_LINES 2U
|
||||
|
||||
#define ROGUE_USC_NUM_UNIFIED_STORE_BANKS 8U
|
||||
|
||||
#define ROGUE_PDS_US_REGISTER_ALLOCATION_GRANULARITY 8U
|
||||
|
||||
#define ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY \
|
||||
ROGUE_PDS_US_REGISTER_ALLOCATION_GRANULARITY
|
||||
|
||||
#define ROGUE_USRM_LINE_SIZE 16U
|
||||
|
||||
#define ROGUE_USRM_LINE_SIZE_PER_INSTANCE \
|
||||
(ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY * ROGUE_USRM_LINE_SIZE)
|
||||
|
||||
#endif /* ROGUE_HW_DEFS_H */
|
||||
|
|
|
|||
|
|
@ -347,4 +347,124 @@ __rogue_get_param_vf_max(const struct pvr_device_info *dev_info)
|
|||
#define rogue_get_param_vf_max_x(dev_info) __rogue_get_param_vf_max(dev_info)
|
||||
#define rogue_get_param_vf_max_y(dev_info) __rogue_get_param_vf_max(dev_info)
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_max_total_instances(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 0U);
|
||||
assert(usc_slots);
|
||||
|
||||
return usc_slots * ROGUE_MAX_INSTANCES_PER_TASK;
|
||||
}
|
||||
|
||||
static inline uint32_t rogue_get_unified_store_size_per_instance(
|
||||
const struct pvr_device_info *dev_info)
|
||||
{
|
||||
const uint32_t unified_store_depth =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, unified_store_depth, 0U);
|
||||
assert(unified_store_depth);
|
||||
|
||||
return unified_store_depth * ROGUE_USC_NUM_UNIFIED_STORE_BANKS;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_min_attr_in_usrm_lines(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
const uint32_t unified_store_size_per_instance =
|
||||
rogue_get_unified_store_size_per_instance(dev_info);
|
||||
assert(unified_store_size_per_instance);
|
||||
|
||||
return (unified_store_size_per_instance /
|
||||
ROGUE_USRM_LINE_SIZE_PER_INSTANCE) -
|
||||
ROGUE_RESERVED_USRM_LINES;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_parallel_instances(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
return ROGUE_MAX_INSTANCES_PER_TASK / 2;
|
||||
}
|
||||
|
||||
static inline uint32_t rogue_get_unified_store_temps_per_instance(
|
||||
const struct pvr_device_info *dev_info)
|
||||
{
|
||||
return rogue_get_min_attr_in_usrm_lines(dev_info) *
|
||||
ROGUE_USRM_LINE_SIZE_PER_INSTANCE;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_unified_store_total_temps(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
return rogue_get_unified_store_temps_per_instance(dev_info) *
|
||||
rogue_get_parallel_instances(dev_info);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_instance_groups_per_slot(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
return ROGUE_MAX_INSTANCES_PER_TASK / rogue_get_parallel_instances(dev_info);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_optimal_temps(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 0U);
|
||||
assert(usc_slots);
|
||||
|
||||
uint32_t max_temps_full_slot_use =
|
||||
rogue_get_unified_store_temps_per_instance(dev_info) /
|
||||
(rogue_get_instance_groups_per_slot(dev_info) * usc_slots);
|
||||
|
||||
max_temps_full_slot_use &= ~(ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY - 1);
|
||||
|
||||
return MAX2(max_temps_full_slot_use, 24U);
|
||||
}
|
||||
|
||||
static inline uint32_t rogue_get_temps(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
uint32_t temps = rogue_get_unified_store_temps_per_instance(dev_info) / 2;
|
||||
|
||||
return MIN2(temps, 248U);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_max_wg_temps(const struct pvr_device_info *dev_info,
|
||||
unsigned temps,
|
||||
unsigned wg_size,
|
||||
bool has_barrier)
|
||||
{
|
||||
assert(wg_size <= rogue_get_max_total_instances(dev_info));
|
||||
if (!wg_size)
|
||||
return rogue_get_compute_max_work_group_size(dev_info);
|
||||
|
||||
if (wg_size > ROGUE_MAX_INSTANCES_PER_TASK && has_barrier) {
|
||||
/* Number of slots allocated for each workgroup. */
|
||||
unsigned slots_per_wg =
|
||||
DIV_ROUND_UP(wg_size, ROGUE_MAX_INSTANCES_PER_TASK);
|
||||
|
||||
/* Lines of USRM lines available for each slot
|
||||
* (+1 for fragmentation / coarse checking).
|
||||
*/
|
||||
unsigned lines_per_slot =
|
||||
rogue_get_min_attr_in_usrm_lines(dev_info) / (slots_per_wg + 1);
|
||||
|
||||
unsigned max_allocs;
|
||||
if (lines_per_slot != 0) {
|
||||
/* Convert lines to USRM allocs. */
|
||||
max_allocs = lines_per_slot * ROGUE_USRM_LINE_SIZE;
|
||||
} else {
|
||||
max_allocs = (rogue_get_min_attr_in_usrm_lines(dev_info) *
|
||||
ROGUE_USRM_LINE_SIZE) /
|
||||
(slots_per_wg + 1);
|
||||
}
|
||||
|
||||
/* Convert USRM allocs to temporary registers. */
|
||||
unsigned max_temps_for_barrier =
|
||||
max_allocs * ROGUE_USRM_GRANULARITY_IN_REGISTERS;
|
||||
|
||||
/* Clamp to provided limit */
|
||||
temps = MIN2(temps, max_temps_for_barrier);
|
||||
}
|
||||
|
||||
return temps;
|
||||
}
|
||||
#endif /* ROGUE_HW_UTILS_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue