@@ -489,6 +489,117 @@ int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man
489489 return ret ;
490490}
491491
492+ struct ttm_bo_alloc_state {
493+ /** @charge_pool: The memory pool the resource is charged to */
494+ struct dmem_cgroup_pool_state * charge_pool ;
495+ /** @limit_pool: Which pool limit we should test against */
496+ struct dmem_cgroup_pool_state * limit_pool ;
497+ /** @in_evict: Whether we are currently evicting buffers */
498+ bool in_evict ;
499+ /** @may_try_low: If only unprotected BOs, i.e. BOs whose cgroup
500+ * is exceeding its dmem low/min protection, should be considered for eviction
501+ */
502+ bool may_try_low ;
503+ };
504+
505+ /**
506+ * ttm_bo_alloc_at_place - Attempt allocating a BO's backing store in a place
507+ *
508+ * @bo: The buffer to allocate the backing store of
509+ * @place: The place to attempt allocation in
510+ * @ctx: ttm_operation_ctx associated with this allocation
511+ * @force_space: If we should evict buffers to force space
512+ * @res: On allocation success, the resulting struct ttm_resource.
513+ * @alloc_state: Object holding allocation state such as charged cgroups.
514+ *
515+ * Returns:
516+ * -EBUSY: No space available, but allocation should be retried with ttm_bo_evict_alloc.
517+ * -ENOSPC: No space available, allocation should not be retried.
518+ * -ERESTARTSYS: An interruptible sleep was interrupted by a signal.
519+ *
520+ */
521+ static int ttm_bo_alloc_at_place (struct ttm_buffer_object * bo ,
522+ const struct ttm_place * place ,
523+ struct ttm_operation_ctx * ctx ,
524+ bool force_space ,
525+ struct ttm_resource * * res ,
526+ struct ttm_bo_alloc_state * alloc_state )
527+ {
528+ bool may_evict ;
529+ int ret ;
530+
531+ may_evict = !alloc_state -> in_evict && force_space &&
532+ place -> mem_type != TTM_PL_SYSTEM ;
533+ if (!alloc_state -> charge_pool ) {
534+ ret = ttm_resource_try_charge (bo , place , & alloc_state -> charge_pool ,
535+ force_space ? & alloc_state -> limit_pool
536+ : NULL );
537+ if (ret ) {
538+ /*
539+ * -EAGAIN means the charge failed, which we treat
540+ * like an allocation failure. Therefore, return an
541+ * error code indicating the allocation failed -
542+ * either -EBUSY if the allocation should be
543+ * retried with eviction, or -ENOSPC if there should
544+ * be no second attempt.
545+ */
546+ if (ret == - EAGAIN )
547+ ret = may_evict ? - EBUSY : - ENOSPC ;
548+ return ret ;
549+ }
550+ }
551+
552+ /*
553+ * cgroup protection plays a special role in eviction.
554+ * Conceptually, protection of memory via the dmem cgroup controller
555+ * entitles the protected cgroup to use a certain amount of memory.
556+ * There are two types of protection - the 'low' limit is a
557+ * "best-effort" protection, whereas the 'min' limit provides a hard
558+ * guarantee that memory within the cgroup's allowance will not be
559+ * evicted under any circumstance.
560+ *
561+ * To faithfully model this concept in TTM, we also need to take cgroup
562+ * protection into account when allocating. When allocation in one
563+ * place fails, TTM will default to trying other places first before
564+ * evicting.
565+ * If the allocation is covered by dmem cgroup protection, however,
566+ * this prevents the allocation from using the memory it is "entitled"
567+ * to. To make sure unprotected allocations cannot push new protected
568+ * allocations out of places they are "entitled" to use, we should
569+ * evict buffers not covered by any cgroup protection, if this
570+ * allocation is covered by cgroup protection.
571+ *
572+ * Buffers covered by 'min' protection are a special case - the 'min'
573+ * limit is a stronger guarantee than 'low', and thus buffers protected
574+ * by 'low' but not 'min' should also be considered for eviction.
575+ * Buffers protected by 'min' will never be considered for eviction
576+ * anyway, so the regular eviction path should be triggered here.
577+ * Buffers protected by 'low' but not 'min' will take a special
578+ * eviction path that only evicts buffers covered by neither 'low' or
579+ * 'min' protections.
580+ */
581+ if (!alloc_state -> in_evict ) {
582+ may_evict |= dmem_cgroup_below_min (NULL , alloc_state -> charge_pool );
583+ alloc_state -> may_try_low = may_evict ;
584+
585+ may_evict |= dmem_cgroup_below_low (NULL , alloc_state -> charge_pool );
586+ }
587+
588+ ret = ttm_resource_alloc (bo , place , res , alloc_state -> charge_pool );
589+ if (ret ) {
590+ if (ret == - ENOSPC && may_evict )
591+ ret = - EBUSY ;
592+ return ret ;
593+ }
594+
595+ /*
596+ * Ownership of charge_pool has been transferred to the TTM resource,
597+ * don't make the caller think we still hold a reference to it.
598+ */
599+ alloc_state -> charge_pool = NULL ;
600+ return 0 ;
601+ }
602+
492603/**
493604 * struct ttm_bo_evict_walk - Parameters for the evict walk.
494605 */
@@ -504,22 +615,61 @@ struct ttm_bo_evict_walk {
504615 /** @evicted: Number of successful evictions. */
505616 unsigned long evicted ;
506617
507- /** @limit_pool: Which pool limit we should test against */
508- struct dmem_cgroup_pool_state * limit_pool ;
509618 /** @try_low: Whether we should attempt to evict BO's with low watermark threshold */
510619 bool try_low ;
511620 /** @hit_low: If we cannot evict a bo when @try_low is false (first pass) */
512621 bool hit_low ;
622+
623+ /** @alloc_state: State associated with the allocation attempt. */
624+ struct ttm_bo_alloc_state * alloc_state ;
513625};
514626
515627static s64 ttm_bo_evict_cb (struct ttm_lru_walk * walk , struct ttm_buffer_object * bo )
516628{
517629 struct ttm_bo_evict_walk * evict_walk =
518630 container_of (walk , typeof (* evict_walk ), walk );
631+ struct dmem_cgroup_pool_state * limit_pool , * ancestor = NULL ;
632+ bool evict_valuable ;
519633 s64 lret ;
520634
521- if (!dmem_cgroup_state_evict_valuable (evict_walk -> limit_pool , bo -> resource -> css ,
522- evict_walk -> try_low , & evict_walk -> hit_low ))
635+ /*
636+ * If may_try_low is not set, then we're trying to evict unprotected
637+ * buffers in favor of a protected allocation for charge_pool. Explicitly skip
638+ * buffers belonging to the same cgroup here - that cgroup is definitely protected,
639+ * even though dmem_cgroup_state_evict_valuable would allow the eviction because a
640+ * cgroup is always allowed to evict from itself even if it is protected.
641+ */
642+ if (!evict_walk -> alloc_state -> may_try_low &&
643+ bo -> resource -> css == evict_walk -> alloc_state -> charge_pool )
644+ return 0 ;
645+
646+ limit_pool = evict_walk -> alloc_state -> limit_pool ;
647+ /*
648+ * If there is no explicit limit pool, find the root of the shared subtree between
649+ * evictor and evictee. This is important so that recursive protection rules can
650+ * apply properly: Recursive protection distributes cgroup protection afforded
651+ * to a parent cgroup but not used explicitly by a child cgroup between all child
652+ * cgroups (see docs of effective_protection in mm/page_counter.c). However, when
653+ * direct siblings compete for memory, siblings that were explicitly protected
654+ * should get prioritized over siblings that weren't. This only happens correctly
655+ * when the root of the shared subtree is passed to
656+ * dmem_cgroup_state_evict_valuable. Otherwise, the effective-protection
657+ * calculation cannot distinguish direct siblings from unrelated subtrees and the
658+ * calculated protection ends up wrong.
659+ */
660+ if (!limit_pool ) {
661+ ancestor = dmem_cgroup_get_common_ancestor (bo -> resource -> css ,
662+ evict_walk -> alloc_state -> charge_pool );
663+ limit_pool = ancestor ;
664+ }
665+
666+ evict_valuable = dmem_cgroup_state_evict_valuable (limit_pool , bo -> resource -> css ,
667+ evict_walk -> try_low ,
668+ & evict_walk -> hit_low );
669+ if (ancestor )
670+ dmem_cgroup_pool_state_put (ancestor );
671+
672+ if (!evict_valuable )
523673 return 0 ;
524674
525675 if (bo -> pin_count || !bo -> bdev -> funcs -> eviction_valuable (bo , evict_walk -> place ))
@@ -538,8 +688,9 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *
538688
539689 evict_walk -> evicted ++ ;
540690 if (evict_walk -> res )
541- lret = ttm_resource_alloc (evict_walk -> evictor , evict_walk -> place ,
542- evict_walk -> res , NULL );
691+ lret = ttm_bo_alloc_at_place (evict_walk -> evictor , evict_walk -> place ,
692+ walk -> arg .ctx , false, evict_walk -> res ,
693+ evict_walk -> alloc_state );
543694 if (lret == 0 )
544695 return 1 ;
545696out :
@@ -561,7 +712,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
561712 struct ttm_operation_ctx * ctx ,
562713 struct ww_acquire_ctx * ticket ,
563714 struct ttm_resource * * res ,
564- struct dmem_cgroup_pool_state * limit_pool )
715+ struct ttm_bo_alloc_state * state )
565716{
566717 struct ttm_bo_evict_walk evict_walk = {
567718 .walk = {
@@ -574,15 +725,21 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
574725 .place = place ,
575726 .evictor = evictor ,
576727 .res = res ,
577- .limit_pool = limit_pool ,
728+ .alloc_state = state ,
578729 };
579730 s64 lret ;
580731
732+ state -> in_evict = true;
733+
581734 evict_walk .walk .arg .trylock_only = true;
582735 lret = ttm_lru_walk_for_evict (& evict_walk .walk , bdev , man , 1 );
583736
584- /* One more attempt if we hit low limit? */
585- if (!lret && evict_walk .hit_low ) {
737+ /* If we failed to find enough BOs to evict, but we skipped over
738+ * some BOs because they were covered by dmem low protection, retry
739+ * evicting these protected BOs too, except if we're told not to
740+ * consider protected BOs at all.
741+ */
742+ if (!lret && evict_walk .hit_low && state -> may_try_low ) {
586743 evict_walk .try_low = true;
587744 lret = ttm_lru_walk_for_evict (& evict_walk .walk , bdev , man , 1 );
588745 }
@@ -603,11 +760,13 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
603760 } while (!lret && evict_walk .evicted );
604761
605762 /* We hit the low limit? Try once more */
606- if (!lret && evict_walk .hit_low && !evict_walk .try_low ) {
763+ if (!lret && evict_walk .hit_low && !evict_walk .try_low &&
764+ state -> may_try_low ) {
607765 evict_walk .try_low = true;
608766 goto retry ;
609767 }
610768out :
769+ state -> in_evict = false;
611770 if (lret < 0 )
612771 return lret ;
613772 if (lret == 0 )
@@ -725,9 +884,8 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
725884
726885 for (i = 0 ; i < placement -> num_placement ; ++ i ) {
727886 const struct ttm_place * place = & placement -> placement [i ];
728- struct dmem_cgroup_pool_state * limit_pool = NULL ;
887+ struct ttm_bo_alloc_state alloc_state = {} ;
729888 struct ttm_resource_manager * man ;
730- bool may_evict ;
731889
732890 man = ttm_manager_type (bdev , place -> mem_type );
733891 if (!man || !ttm_resource_manager_used (man ))
@@ -737,25 +895,30 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
737895 TTM_PL_FLAG_FALLBACK ))
738896 continue ;
739897
740- may_evict = (force_space && place -> mem_type != TTM_PL_SYSTEM );
741- ret = ttm_resource_alloc (bo , place , res , force_space ? & limit_pool : NULL );
742- if (ret ) {
743- if (ret != - ENOSPC && ret != - EAGAIN ) {
744- dmem_cgroup_pool_state_put (limit_pool );
745- return ret ;
746- }
747- if (!may_evict ) {
748- dmem_cgroup_pool_state_put (limit_pool );
749- continue ;
750- }
898+ ret = ttm_bo_alloc_at_place (bo , place , ctx , force_space ,
899+ res , & alloc_state );
751900
901+ if (ret == - ENOSPC ) {
902+ dmem_cgroup_uncharge (alloc_state .charge_pool , bo -> base .size );
903+ dmem_cgroup_pool_state_put (alloc_state .limit_pool );
904+ continue ;
905+ } else if (ret == - EBUSY ) {
752906 ret = ttm_bo_evict_alloc (bdev , man , place , bo , ctx ,
753- ticket , res , limit_pool );
754- dmem_cgroup_pool_state_put (limit_pool );
755- if (ret == - EBUSY )
756- continue ;
757- if (ret )
907+ ticket , res , & alloc_state );
908+
909+ dmem_cgroup_pool_state_put (alloc_state .limit_pool );
910+
911+ if (ret ) {
912+ dmem_cgroup_uncharge (alloc_state .charge_pool ,
913+ bo -> base .size );
914+ if (ret == - EBUSY )
915+ continue ;
758916 return ret ;
917+ }
918+ } else if (ret ) {
919+ dmem_cgroup_uncharge (alloc_state .charge_pool , bo -> base .size );
920+ dmem_cgroup_pool_state_put (alloc_state .limit_pool );
921+ return ret ;
759922 }
760923
761924 ret = ttm_bo_add_pipelined_eviction_fences (bo , man , ctx -> no_wait_gpu );
0 commit comments