@@ -595,83 +595,106 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
595595 struct vhost_dev * dev = & v -> vdev ;
596596 struct vhost_iotlb * iotlb = dev -> iotlb ;
597597 struct page * * page_list ;
598- unsigned long list_size = PAGE_SIZE / sizeof ( struct page * ) ;
598+ struct vm_area_struct * * vmas ;
599599 unsigned int gup_flags = FOLL_LONGTERM ;
600- unsigned long npages , cur_base , map_pfn , last_pfn = 0 ;
601- unsigned long locked , lock_limit , pinned , i ;
600+ unsigned long map_pfn , last_pfn = 0 ;
601+ unsigned long npages , lock_limit ;
602+ unsigned long i , nmap = 0 ;
602603 u64 iova = msg -> iova ;
604+ long pinned ;
603605 int ret = 0 ;
604606
605607 if (vhost_iotlb_itree_first (iotlb , msg -> iova ,
606608 msg -> iova + msg -> size - 1 ))
607609 return - EEXIST ;
608610
609- page_list = (struct page * * ) __get_free_page (GFP_KERNEL );
610- if (!page_list )
611- return - ENOMEM ;
612-
613611 if (msg -> perm & VHOST_ACCESS_WO )
614612 gup_flags |= FOLL_WRITE ;
615613
616614 npages = PAGE_ALIGN (msg -> size + (iova & ~PAGE_MASK )) >> PAGE_SHIFT ;
617615 if (!npages )
618616 return - EINVAL ;
619617
618+ page_list = kvmalloc_array (npages , sizeof (struct page * ), GFP_KERNEL );
619+ vmas = kvmalloc_array (npages , sizeof (struct vm_area_struct * ),
620+ GFP_KERNEL );
621+ if (!page_list || !vmas ) {
622+ ret = - ENOMEM ;
623+ goto free ;
624+ }
625+
620626 mmap_read_lock (dev -> mm );
621627
622- locked = atomic64_add_return (npages , & dev -> mm -> pinned_vm );
623628 lock_limit = rlimit (RLIMIT_MEMLOCK ) >> PAGE_SHIFT ;
624-
625- if (locked > lock_limit ) {
629+ if (npages + atomic64_read (& dev -> mm -> pinned_vm ) > lock_limit ) {
626630 ret = - ENOMEM ;
627- goto out ;
631+ goto unlock ;
628632 }
629633
630- cur_base = msg -> uaddr & PAGE_MASK ;
631- iova &= PAGE_MASK ;
634+ pinned = pin_user_pages (msg -> uaddr & PAGE_MASK , npages , gup_flags ,
635+ page_list , vmas );
636+ if (npages != pinned ) {
637+ if (pinned < 0 ) {
638+ ret = pinned ;
639+ } else {
640+ unpin_user_pages (page_list , pinned );
641+ ret = - ENOMEM ;
642+ }
643+ goto unlock ;
644+ }
632645
633- while (npages ) {
634- pinned = min_t (unsigned long , npages , list_size );
635- ret = pin_user_pages (cur_base , pinned ,
636- gup_flags , page_list , NULL );
637- if (ret != pinned )
638- goto out ;
639-
640- if (!last_pfn )
641- map_pfn = page_to_pfn (page_list [0 ]);
642-
643- for (i = 0 ; i < ret ; i ++ ) {
644- unsigned long this_pfn = page_to_pfn (page_list [i ]);
645- u64 csize ;
646-
647- if (last_pfn && (this_pfn != last_pfn + 1 )) {
648- /* Pin a contiguous chunk of memory */
649- csize = (last_pfn - map_pfn + 1 ) << PAGE_SHIFT ;
650- if (vhost_vdpa_map (v , iova , csize ,
651- map_pfn << PAGE_SHIFT ,
652- msg -> perm ))
653- goto out ;
654- map_pfn = this_pfn ;
655- iova += csize ;
646+ iova &= PAGE_MASK ;
647+ map_pfn = page_to_pfn (page_list [0 ]);
648+
649+ /* One more iteration to avoid extra vdpa_map() call out of loop. */
650+ for (i = 0 ; i <= npages ; i ++ ) {
651+ unsigned long this_pfn ;
652+ u64 csize ;
653+
654+ /* The last chunk may have no valid PFN next to it */
655+ this_pfn = i < npages ? page_to_pfn (page_list [i ]) : -1UL ;
656+
657+ if (last_pfn && (this_pfn == -1UL ||
658+ this_pfn != last_pfn + 1 )) {
659+ /* Pin a contiguous chunk of memory */
660+ csize = last_pfn - map_pfn + 1 ;
661+ ret = vhost_vdpa_map (v , iova , csize << PAGE_SHIFT ,
662+ map_pfn << PAGE_SHIFT ,
663+ msg -> perm );
664+ if (ret ) {
665+ /*
666+ * Unpin the rest chunks of memory on the
667+ * flight with no corresponding vdpa_map()
668+ * calls having been made yet. On the other
669+ * hand, vdpa_unmap() in the failure path
670+ * is in charge of accounting the number of
671+ * pinned pages for its own.
672+ * This asymmetrical pattern of accounting
673+ * is for efficiency to pin all pages at
674+ * once, while there is no other callsite
675+ * of vdpa_map() than here above.
676+ */
677+ unpin_user_pages (& page_list [nmap ],
678+ npages - nmap );
679+ goto out ;
656680 }
657-
658- last_pfn = this_pfn ;
681+ atomic64_add (csize , & dev -> mm -> pinned_vm );
682+ nmap += csize ;
683+ iova += csize << PAGE_SHIFT ;
684+ map_pfn = this_pfn ;
659685 }
660-
661- cur_base += ret << PAGE_SHIFT ;
662- npages -= ret ;
686+ last_pfn = this_pfn ;
663687 }
664688
665- /* Pin the rest chunk */
666- ret = vhost_vdpa_map (v , iova , (last_pfn - map_pfn + 1 ) << PAGE_SHIFT ,
667- map_pfn << PAGE_SHIFT , msg -> perm );
689+ WARN_ON (nmap != npages );
668690out :
669- if (ret ) {
691+ if (ret )
670692 vhost_vdpa_unmap (v , msg -> iova , msg -> size );
671- atomic64_sub (npages , & dev -> mm -> pinned_vm );
672- }
693+ unlock :
673694 mmap_read_unlock (dev -> mm );
674- free_page ((unsigned long )page_list );
695+ free :
696+ kvfree (vmas );
697+ kvfree (page_list );
675698 return ret ;
676699}
677700
0 commit comments