mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-18 14:30:37 +01:00
broadcom/common: Optimize CSD super-group packing
Return one work group per super group when the work group size is multiple of 16 (elements per batch) and recalculate max_wgs_per_sg only when TSY barriers cut the available QPU threads. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37836>
This commit is contained in:
parent
099ac5be1a
commit
1326d52d23
1 changed files with 9 additions and 1 deletions
|
|
@ -41,6 +41,12 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
|
|||
if (has_subgroups)
|
||||
return 1;
|
||||
|
||||
/* If the workgroup size is a multiple of 16 (elements per batch),
|
||||
* the lane occupancy is already maximized.
|
||||
*/
|
||||
if (wg_size % 16 == 0)
|
||||
return 1;
|
||||
|
||||
/* Compute maximum number of batches in a supergroup for this workgroup size.
|
||||
* Each batch is 16 elements, and we can have up to 16 work groups in a
|
||||
* supergroup:
|
||||
|
|
@ -56,11 +62,13 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
|
|||
* available, so we can have at least 2 supergroups executing in parallel
|
||||
* and we don't stall all our QPU threads when a supergroup hits a barrier.
|
||||
*/
|
||||
uint32_t max_wgs_per_sg = 16;
|
||||
|
||||
if (has_tsy_barrier) {
|
||||
uint32_t max_qpu_threads = devinfo->qpu_count * threads;
|
||||
max_batches_per_sg = MIN2(max_batches_per_sg, max_qpu_threads / 2);
|
||||
max_wgs_per_sg = max_batches_per_sg * 16 / wg_size;
|
||||
}
|
||||
uint32_t max_wgs_per_sg = max_batches_per_sg * 16 / wg_size;
|
||||
|
||||
uint32_t best_wgs_per_sg = 1;
|
||||
uint32_t best_unused_lanes = 16;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue