Skip to content

Commit 68ca27e

Browse files
pixelclusterptr1337
authored andcommitted
drm/ttm: Split cgroup charge and resource allocation
Coupling resource allocation and cgroup charging is racy when charging succeeds, but subsequent resource allocation fails. Certain eviction decisions are made on the basis of whether the allocating cgroup is protected, i.e. within its min/low limits, but with the charge being tied to resource allocation (and uncharged when the resource allocation fails), this check is done at a point where the allocation is not actually charged to the cgroup. This is subtly wrong if the allocation were to cause the cgroup to exceed the min/low protection, but it's even more wrong if the same cgroup tries allocating multiple buffers concurrently: In this case, the min/low protection may pass for all allocation attempts when the real min/low protection covers only some, or potentially none of the allocated buffers. Instead, charge the allocation to the cgroup once and keep the charge for as long as we try to allocate a ttm_resource, and only undo the charge if allocating the resource is ultimately unsuccessful and we move on to a different ttm_place. Signed-off-by: Natalie Vock <natalie.vock@gmx.de>
1 parent 7806d94 commit 68ca27e

3 files changed

Lines changed: 85 additions & 35 deletions

File tree

drivers/gpu/drm/ttm/ttm_bo.c

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -490,8 +490,12 @@ int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man
490490
}
491491

492492
struct ttm_bo_alloc_state {
493+
/** @charge_pool: The memory pool the resource is charged to */
494+
struct dmem_cgroup_pool_state *charge_pool;
493495
/** @limit_pool: Which pool limit we should test against */
494496
struct dmem_cgroup_pool_state *limit_pool;
497+
/** @in_evict: Whether we are currently evicting buffers */
498+
bool in_evict;
495499
};
496500

497501
/**
@@ -520,28 +524,39 @@ static int ttm_bo_alloc_at_place(struct ttm_buffer_object *bo,
520524
bool may_evict;
521525
int ret;
522526

523-
may_evict = force_space && place->mem_type != TTM_PL_SYSTEM;
524-
525-
ret = ttm_resource_alloc(bo, place, res,
526-
force_space ? &alloc_state->limit_pool : NULL);
527+
may_evict = !alloc_state->in_evict && force_space &&
528+
place->mem_type != TTM_PL_SYSTEM;
529+
if (!alloc_state->charge_pool) {
530+
ret = ttm_resource_try_charge(bo, place, &alloc_state->charge_pool,
531+
force_space ? &alloc_state->limit_pool
532+
: NULL);
533+
if (ret) {
534+
/*
535+
* -EAGAIN means the charge failed, which we treat
536+
* like an allocation failure. Therefore, return an
537+
* error code indicating the allocation failed -
538+
* either -EBUSY if the allocation should be
539+
* retried with eviction, or -ENOSPC if there should
540+
* be no second attempt.
541+
*/
542+
if (ret == -EAGAIN)
543+
ret = may_evict ? -EBUSY : -ENOSPC;
544+
return ret;
545+
}
546+
}
527547

548+
ret = ttm_resource_alloc(bo, place, res, alloc_state->charge_pool);
528549
if (ret) {
529-
/*
530-
* -EAGAIN means the charge failed, which we treat like an
531-
* allocation failure. Therefore, return an error code indicating
532-
* the allocation failed - either -EBUSY if the allocation should
533-
* be retried with eviction, or -ENOSPC if there should be no second
534-
* attempt.
535-
*/
536-
if (ret == -EAGAIN)
537-
return may_evict ? -EBUSY : -ENOSPC;
538-
539550
if (ret == -ENOSPC && may_evict)
540-
return -EBUSY;
541-
551+
ret = -EBUSY;
542552
return ret;
543553
}
544554

555+
/*
556+
* Ownership of charge_pool has been transferred to the TTM resource,
557+
* don't make the caller think we still hold a reference to it.
558+
*/
559+
alloc_state->charge_pool = NULL;
545560
return 0;
546561
}
547562

@@ -596,8 +611,9 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *
596611

597612
evict_walk->evicted++;
598613
if (evict_walk->res)
599-
lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place,
600-
evict_walk->res, NULL);
614+
lret = ttm_bo_alloc_at_place(evict_walk->evictor, evict_walk->place,
615+
walk->arg.ctx, false, evict_walk->res,
616+
evict_walk->alloc_state);
601617
if (lret == 0)
602618
return 1;
603619
out:
@@ -636,6 +652,8 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
636652
};
637653
s64 lret;
638654

655+
state->in_evict = true;
656+
639657
evict_walk.walk.arg.trylock_only = true;
640658
lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
641659

@@ -666,6 +684,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
666684
goto retry;
667685
}
668686
out:
687+
state->in_evict = false;
669688
if (lret < 0)
670689
return lret;
671690
if (lret == 0)
@@ -798,6 +817,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
798817
res, &alloc_state);
799818

800819
if (ret == -ENOSPC) {
820+
dmem_cgroup_uncharge(alloc_state.charge_pool, bo->base.size);
801821
dmem_cgroup_pool_state_put(alloc_state.limit_pool);
802822
continue;
803823
} else if (ret == -EBUSY) {
@@ -806,11 +826,15 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
806826

807827
dmem_cgroup_pool_state_put(alloc_state.limit_pool);
808828

809-
if (ret == -EBUSY)
810-
continue;
811-
else if (ret)
829+
if (ret) {
830+
dmem_cgroup_uncharge(alloc_state.charge_pool,
831+
bo->base.size);
832+
if (ret == -EBUSY)
833+
continue;
812834
return ret;
835+
}
813836
} else if (ret) {
837+
dmem_cgroup_uncharge(alloc_state.charge_pool, bo->base.size);
814838
dmem_cgroup_pool_state_put(alloc_state.limit_pool);
815839
return ret;
816840
}

drivers/gpu/drm/ttm/ttm_resource.c

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -373,30 +373,52 @@ void ttm_resource_fini(struct ttm_resource_manager *man,
373373
}
374374
EXPORT_SYMBOL(ttm_resource_fini);
375375

376+
/**
377+
* ttm_resource_try_charge - charge a resource manager's cgroup pool
378+
* @bo: buffer for which an allocation should be charged
379+
* @place: where the allocation is attempted to be placed
380+
* @ret_pool: on charge success, the pool that was charged
381+
* @ret_limit_pool: on charge failure, the pool responsible for the failure
382+
*
383+
* Should be used to charge cgroups before attempting resource allocation.
384+
* When charging succeeds, the value of ret_pool should be passed to
385+
* ttm_resource_alloc.
386+
*
387+
* Returns: 0 on charge success, negative errno on failure.
388+
*/
389+
int ttm_resource_try_charge(struct ttm_buffer_object *bo,
390+
const struct ttm_place *place,
391+
struct dmem_cgroup_pool_state **ret_pool,
392+
struct dmem_cgroup_pool_state **ret_limit_pool)
393+
{
394+
struct ttm_resource_manager *man =
395+
ttm_manager_type(bo->bdev, place->mem_type);
396+
397+
if (!man->cg) {
398+
*ret_pool = NULL;
399+
if (ret_limit_pool)
400+
*ret_limit_pool = NULL;
401+
return 0;
402+
}
403+
404+
return dmem_cgroup_try_charge(man->cg, bo->base.size, ret_pool,
405+
ret_limit_pool);
406+
}
407+
376408
int ttm_resource_alloc(struct ttm_buffer_object *bo,
377409
const struct ttm_place *place,
378410
struct ttm_resource **res_ptr,
379-
struct dmem_cgroup_pool_state **ret_limit_pool)
411+
struct dmem_cgroup_pool_state *charge_pool)
380412
{
381413
struct ttm_resource_manager *man =
382414
ttm_manager_type(bo->bdev, place->mem_type);
383-
struct dmem_cgroup_pool_state *pool = NULL;
384415
int ret;
385416

386-
if (man->cg) {
387-
ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool);
388-
if (ret)
389-
return ret;
390-
}
391-
392417
ret = man->func->alloc(man, bo, place, res_ptr);
393-
if (ret) {
394-
if (pool)
395-
dmem_cgroup_uncharge(pool, bo->base.size);
418+
if (ret)
396419
return ret;
397-
}
398420

399-
(*res_ptr)->css = pool;
421+
(*res_ptr)->css = charge_pool;
400422

401423
spin_lock(&bo->bdev->lru_lock);
402424
ttm_resource_add_bulk_move(*res_ptr, bo);

include/drm/ttm/ttm_resource.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,14 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
456456
void ttm_resource_fini(struct ttm_resource_manager *man,
457457
struct ttm_resource *res);
458458

459+
int ttm_resource_try_charge(struct ttm_buffer_object *bo,
460+
const struct ttm_place *place,
461+
struct dmem_cgroup_pool_state **ret_pool,
462+
struct dmem_cgroup_pool_state **ret_limit_pool);
459463
int ttm_resource_alloc(struct ttm_buffer_object *bo,
460464
const struct ttm_place *place,
461465
struct ttm_resource **res,
462-
struct dmem_cgroup_pool_state **ret_limit_pool);
466+
struct dmem_cgroup_pool_state *charge_pool);
463467
void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res);
464468
bool ttm_resource_intersects(struct ttm_device *bdev,
465469
struct ttm_resource *res,

0 commit comments

Comments
 (0)