diff --git a/src/imagination/common/device_info/axe-1-16m.h b/src/imagination/common/device_info/axe-1-16m.h index 4d79e0aa1a8..894a763fc46 100644 --- a/src/imagination/common/device_info/axe-1-16m.h +++ b/src/imagination/common/device_info/axe-1-16m.h @@ -62,6 +62,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = { .has_tpu_border_colour_enhanced = true, .has_tpu_extended_integer_lookup = true, .has_tpu_image_state_v2 = true, + .has_unified_store_depth = true, .has_usc_f16sop_u8 = true, .has_usc_min_output_registers_per_pix = true, .has_usc_pixel_partition_mask = true, @@ -86,6 +87,7 @@ static const struct pvr_device_features pvr_device_features_33_V_11_3 = { .slc_cache_line_size_bits = 512U, .tile_size_x = 16U, .tile_size_y = 16U, + .unified_store_depth = 64U, .usc_min_output_registers_per_pix = 1U, .usc_slots = 14U, .uvs_banks = 2U, diff --git a/src/imagination/common/device_info/bxs-4-64.h b/src/imagination/common/device_info/bxs-4-64.h index 61dfadc2b72..da8bfb2203f 100644 --- a/src/imagination/common/device_info/bxs-4-64.h +++ b/src/imagination/common/device_info/bxs-4-64.h @@ -70,6 +70,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = { .has_tpu_dm_global_registers = true, .has_tpu_extended_integer_lookup = true, .has_tpu_image_state_v2 = true, + .has_unified_store_depth = true, .has_usc_f16sop_u8 = true, .has_usc_min_output_registers_per_pix = true, .has_usc_pixel_partition_mask = true, @@ -96,6 +97,7 @@ static const struct pvr_device_features pvr_device_features_36_V_104_796 = { .slc_cache_line_size_bits = 512U, .tile_size_x = 16U, .tile_size_y = 16U, + .unified_store_depth = 256U, .usc_min_output_registers_per_pix = 2U, .usc_slots = 64U, .uvs_banks = 8U, diff --git a/src/imagination/common/device_info/gx6250.h b/src/imagination/common/device_info/gx6250.h index c967341640a..db3776e847a 100644 --- a/src/imagination/common/device_info/gx6250.h +++ b/src/imagination/common/device_info/gx6250.h @@ -63,6 +63,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = { .has_tpu_array_textures = true, .has_tpu_extended_integer_lookup = true, .has_tpu_image_state_v2 = true, + .has_unified_store_depth = true, .has_usc_f16sop_u8 = true, .has_usc_min_output_registers_per_pix = true, .has_usc_slots = true, @@ -87,6 +88,7 @@ static const struct pvr_device_features pvr_device_features_4_V_2_51 = { .slc_cache_line_size_bits = 512U, .tile_size_x = 32U, .tile_size_y = 32U, + .unified_store_depth = 256U, .usc_min_output_registers_per_pix = 2U, .usc_slots = 32U, .uvs_banks = 8U, diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h index 779923f7d20..5fe43c8f72e 100644 --- a/src/imagination/common/pvr_device_info.h +++ b/src/imagination/common/pvr_device_info.h @@ -292,6 +292,7 @@ struct pvr_device_features { bool has_tpu_dm_global_registers : 1; bool has_tpu_extended_integer_lookup : 1; bool has_tpu_image_state_v2 : 1; + bool has_unified_store_depth : 1; bool has_usc_f16sop_u8 : 1; bool has_usc_min_output_registers_per_pix : 1; bool has_usc_pixel_partition_mask : 1; @@ -320,6 +321,7 @@ struct pvr_device_features { uint32_t slc_cache_line_size_bits; uint32_t tile_size_x; uint32_t tile_size_y; + uint32_t unified_store_depth; uint32_t usc_min_output_registers_per_pix; uint32_t usc_slots; uint32_t uvs_banks; diff --git a/src/imagination/include/hwdef/rogue_hw_defs.h b/src/imagination/include/hwdef/rogue_hw_defs.h index b040e01809e..20dd979b368 100644 --- a/src/imagination/include/hwdef/rogue_hw_defs.h +++ b/src/imagination/include/hwdef/rogue_hw_defs.h @@ -142,4 +142,20 @@ #define PVR_NUM_PBE_EMIT_REGS 8U +#define ROGUE_USRM_GRANULARITY_IN_REGISTERS 4U + +#define ROGUE_RESERVED_USRM_LINES 2U + +#define ROGUE_USC_NUM_UNIFIED_STORE_BANKS 8U + +#define ROGUE_PDS_US_REGISTER_ALLOCATION_GRANULARITY 8U + +#define ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY \ + ROGUE_PDS_US_REGISTER_ALLOCATION_GRANULARITY + +#define ROGUE_USRM_LINE_SIZE 16U + +#define ROGUE_USRM_LINE_SIZE_PER_INSTANCE \ + (ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY * ROGUE_USRM_LINE_SIZE) + #endif /* ROGUE_HW_DEFS_H */ diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h index 8d2b936ad6f..faffa543f67 100644 --- a/src/imagination/include/hwdef/rogue_hw_utils.h +++ b/src/imagination/include/hwdef/rogue_hw_utils.h @@ -347,4 +347,124 @@ __rogue_get_param_vf_max(const struct pvr_device_info *dev_info) #define rogue_get_param_vf_max_x(dev_info) __rogue_get_param_vf_max(dev_info) #define rogue_get_param_vf_max_y(dev_info) __rogue_get_param_vf_max(dev_info) +static inline uint32_t +rogue_get_max_total_instances(const struct pvr_device_info *dev_info) +{ + const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 0U); + assert(usc_slots); + + return usc_slots * ROGUE_MAX_INSTANCES_PER_TASK; +} + +static inline uint32_t rogue_get_unified_store_size_per_instance( + const struct pvr_device_info *dev_info) +{ + const uint32_t unified_store_depth = + PVR_GET_FEATURE_VALUE(dev_info, unified_store_depth, 0U); + assert(unified_store_depth); + + return unified_store_depth * ROGUE_USC_NUM_UNIFIED_STORE_BANKS; +} + +static inline uint32_t +rogue_get_min_attr_in_usrm_lines(const struct pvr_device_info *dev_info) +{ + const uint32_t unified_store_size_per_instance = + rogue_get_unified_store_size_per_instance(dev_info); + assert(unified_store_size_per_instance); + + return (unified_store_size_per_instance / + ROGUE_USRM_LINE_SIZE_PER_INSTANCE) - + ROGUE_RESERVED_USRM_LINES; +} + +static inline uint32_t +rogue_get_parallel_instances(const struct pvr_device_info *dev_info) +{ + return ROGUE_MAX_INSTANCES_PER_TASK / 2; +} + +static inline uint32_t rogue_get_unified_store_temps_per_instance( + const struct pvr_device_info *dev_info) +{ + return rogue_get_min_attr_in_usrm_lines(dev_info) * + ROGUE_USRM_LINE_SIZE_PER_INSTANCE; +} + +static inline uint32_t +rogue_get_unified_store_total_temps(const struct pvr_device_info *dev_info) +{ + return rogue_get_unified_store_temps_per_instance(dev_info) * + rogue_get_parallel_instances(dev_info); +} + +static inline uint32_t +rogue_get_instance_groups_per_slot(const struct pvr_device_info *dev_info) +{ + return ROGUE_MAX_INSTANCES_PER_TASK / rogue_get_parallel_instances(dev_info); +} + +static inline uint32_t +rogue_get_optimal_temps(const struct pvr_device_info *dev_info) +{ + const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 0U); + assert(usc_slots); + + uint32_t max_temps_full_slot_use = + rogue_get_unified_store_temps_per_instance(dev_info) / + (rogue_get_instance_groups_per_slot(dev_info) * usc_slots); + + max_temps_full_slot_use &= ~(ROGUE_PDS_US_TEMP_ALLOCATION_GRANULARITY - 1); + + return MAX2(max_temps_full_slot_use, 24U); +} + +static inline uint32_t rogue_get_temps(const struct pvr_device_info *dev_info) +{ + uint32_t temps = rogue_get_unified_store_temps_per_instance(dev_info) / 2; + + return MIN2(temps, 248U); +} + +static inline uint32_t +rogue_max_wg_temps(const struct pvr_device_info *dev_info, + unsigned temps, + unsigned wg_size, + bool has_barrier) +{ + assert(wg_size <= rogue_get_max_total_instances(dev_info)); + if (!wg_size) + return rogue_get_compute_max_work_group_size(dev_info); + + if (wg_size > ROGUE_MAX_INSTANCES_PER_TASK && has_barrier) { + /* Number of slots allocated for each workgroup. */ + unsigned slots_per_wg = + DIV_ROUND_UP(wg_size, ROGUE_MAX_INSTANCES_PER_TASK); + + /* Lines of USRM lines available for each slot + * (+1 for fragmentation / coarse checking). + */ + unsigned lines_per_slot = + rogue_get_min_attr_in_usrm_lines(dev_info) / (slots_per_wg + 1); + + unsigned max_allocs; + if (lines_per_slot != 0) { + /* Convert lines to USRM allocs. */ + max_allocs = lines_per_slot * ROGUE_USRM_LINE_SIZE; + } else { + max_allocs = (rogue_get_min_attr_in_usrm_lines(dev_info) * + ROGUE_USRM_LINE_SIZE) / + (slots_per_wg + 1); + } + + /* Convert USRM allocs to temporary registers. */ + unsigned max_temps_for_barrier = + max_allocs * ROGUE_USRM_GRANULARITY_IN_REGISTERS; + + /* Clamp to provided limit */ + temps = MIN2(temps, max_temps_for_barrier); + } + + return temps; +} #endif /* ROGUE_HW_UTILS_H */