mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 17:58:26 +02:00
aux/pb: add a tolerance for reclaim failure
originally, a slab attempts to reclaim a single bo. there are two outcomes
to this which can occur:
* the bo is reclaimed
* the bo is not reclaimed
if the bo is reclaimed, great.
if the bo is not reclaimed, it remains at the head of the list until it can
be reclaimed. this means that any bo with a "long" work queue which makes it
into a slab will effectively kill the entire slab. in a benchmarking scenario,
this can occur in rapid succession, and every slab will get 1-2 suballocations
before it reaches a bo that blocks long enough for a new slab to be needed.
the inevitable result of this scenario is that all memory is depleted almost instantly,
all because pb assumes that if the first bo in the reclaim list isn't ready, none of them
can be ready
for drivers like radeonsi, this happens to be a fine assumption
for drivers like zink, this is entirely not workable and explodes the gpu
Cc: mesa-stable
Reviewed-by: Witold Baryluk <witold.baryluk@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tested-by: Witold Baryluk <witold.baryluk@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13345>
(cherry picked from commit 3d6c8829f5)
This commit is contained in:
parent
7cddbaab2d
commit
b7942e3134
2 changed files with 18 additions and 8 deletions
|
|
@ -1219,7 +1219,7 @@
|
|||
"description": "aux/pb: add a tolerance for reclaim failure",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null
|
||||
},
|
||||
|
|
|
|||
|
|
@ -71,17 +71,27 @@ pb_slab_reclaim(struct pb_slabs *slabs, struct pb_slab_entry *entry)
|
|||
}
|
||||
}
|
||||
|
||||
#define MAX_FAILED_RECLAIMS 2
|
||||
|
||||
static void
|
||||
pb_slabs_reclaim_locked(struct pb_slabs *slabs)
|
||||
{
|
||||
while (!list_is_empty(&slabs->reclaim)) {
|
||||
struct pb_slab_entry *entry =
|
||||
LIST_ENTRY(struct pb_slab_entry, slabs->reclaim.next, head);
|
||||
|
||||
if (!slabs->can_reclaim(slabs->priv, entry))
|
||||
struct pb_slab_entry *entry, *next;
|
||||
unsigned num_failed_reclaims = 0;
|
||||
LIST_FOR_EACH_ENTRY_SAFE(entry, next, &slabs->reclaim, head) {
|
||||
if (slabs->can_reclaim(slabs->priv, entry)) {
|
||||
pb_slab_reclaim(slabs, entry);
|
||||
/* there are typically three possible scenarios when reclaiming:
|
||||
* - all entries reclaimed
|
||||
* - no entries reclaimed
|
||||
* - all but one entry reclaimed
|
||||
* in the scenario where a slab contains many (10+) unused entries,
|
||||
* the driver should not walk the entire list, as this is likely to
|
||||
* result in zero reclaims if the first few entries fail to reclaim
|
||||
*/
|
||||
} else if (num_failed_reclaims++ > MAX_FAILED_RECLAIMS) {
|
||||
break;
|
||||
|
||||
pb_slab_reclaim(slabs, entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue