Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger...
[linux-drm-fsl-dcu.git] / mm / gup.c
index bed30efad77cde82edc22af7bb21b51fa88e88d3..a6e24e246f8688af7664966e66d99dbd038b8066 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -55,7 +55,7 @@ retry:
                 */
                if (likely(!(flags & FOLL_MIGRATION)))
                        goto no_page;
-               if (pte_none(pte) || pte_file(pte))
+               if (pte_none(pte))
                        goto no_page;
                entry = pte_to_swp_entry(pte);
                if (!is_migration_entry(entry))
@@ -64,7 +64,7 @@ retry:
                migration_entry_wait(mm, pmd, address);
                goto retry;
        }
-       if ((flags & FOLL_NUMA) && pte_numa(pte))
+       if ((flags & FOLL_NUMA) && pte_protnone(pte))
                goto no_page;
        if ((flags & FOLL_WRITE) && !pte_write(pte)) {
                pte_unmap_unlock(ptep, ptl);
@@ -167,10 +167,10 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
        if (pud_none(*pud))
                return no_page_table(vma, flags);
        if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
-               if (flags & FOLL_GET)
-                       return NULL;
-               page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
-               return page;
+               page = follow_huge_pud(mm, address, pud, flags);
+               if (page)
+                       return page;
+               return no_page_table(vma, flags);
        }
        if (unlikely(pud_bad(*pud)))
                return no_page_table(vma, flags);
@@ -179,21 +179,12 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
        if (pmd_none(*pmd))
                return no_page_table(vma, flags);
        if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
-               page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
-               if (flags & FOLL_GET) {
-                       /*
-                        * Refcount on tail pages are not well-defined and
-                        * shouldn't be taken. The caller should handle a NULL
-                        * return when trying to follow tail pages.
-                        */
-                       if (PageHead(page))
-                               get_page(page);
-                       else
-                               page = NULL;
-               }
-               return page;
+               page = follow_huge_pmd(mm, address, pmd, flags);
+               if (page)
+                       return page;
+               return no_page_table(vma, flags);
        }
-       if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+       if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
                return no_page_table(vma, flags);
        if (pmd_trans_huge(*pmd)) {
                if (flags & FOLL_SPLIT) {
@@ -296,7 +287,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
                        return -ENOMEM;
                if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                        return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                        return -EFAULT;
                BUG();
        }
@@ -571,7 +562,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                        return -ENOMEM;
                if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                        return -EHWPOISON;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                        return -EFAULT;
                BUG();
        }
@@ -584,6 +575,185 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
        return 0;
 }
 
+static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
+                                               struct mm_struct *mm,
+                                               unsigned long start,
+                                               unsigned long nr_pages,
+                                               int write, int force,
+                                               struct page **pages,
+                                               struct vm_area_struct **vmas,
+                                               int *locked, bool notify_drop,
+                                               unsigned int flags)
+{
+       long ret, pages_done;
+       bool lock_dropped;
+
+       if (locked) {
+               /* if VM_FAULT_RETRY can be returned, vmas become invalid */
+               BUG_ON(vmas);
+               /* check caller initialized locked */
+               BUG_ON(*locked != 1);
+       }
+
+       if (pages)
+               flags |= FOLL_GET;
+       if (write)
+               flags |= FOLL_WRITE;
+       if (force)
+               flags |= FOLL_FORCE;
+
+       pages_done = 0;
+       lock_dropped = false;
+       for (;;) {
+               ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
+                                      vmas, locked);
+               if (!locked)
+                       /* VM_FAULT_RETRY couldn't trigger, bypass */
+                       return ret;
+
+               /* VM_FAULT_RETRY cannot return errors */
+               if (!*locked) {
+                       BUG_ON(ret < 0);
+                       BUG_ON(ret >= nr_pages);
+               }
+
+               if (!pages)
+                       /* If it's a prefault don't insist harder */
+                       return ret;
+
+               if (ret > 0) {
+                       nr_pages -= ret;
+                       pages_done += ret;
+                       if (!nr_pages)
+                               break;
+               }
+               if (*locked) {
+                       /* VM_FAULT_RETRY didn't trigger */
+                       if (!pages_done)
+                               pages_done = ret;
+                       break;
+               }
+               /* VM_FAULT_RETRY triggered, so seek to the faulting offset */
+               pages += ret;
+               start += ret << PAGE_SHIFT;
+
+               /*
+                * Repeat on the address that fired VM_FAULT_RETRY
+                * without FAULT_FLAG_ALLOW_RETRY but with
+                * FAULT_FLAG_TRIED.
+                */
+               *locked = 1;
+               lock_dropped = true;
+               down_read(&mm->mmap_sem);
+               ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
+                                      pages, NULL, NULL);
+               if (ret != 1) {
+                       BUG_ON(ret > 1);
+                       if (!pages_done)
+                               pages_done = ret;
+                       break;
+               }
+               nr_pages--;
+               pages_done++;
+               if (!nr_pages)
+                       break;
+               pages++;
+               start += PAGE_SIZE;
+       }
+       if (notify_drop && lock_dropped && *locked) {
+               /*
+                * We must let the caller know we temporarily dropped the lock
+                * and so the critical section protected by it was lost.
+                */
+               up_read(&mm->mmap_sem);
+               *locked = 0;
+       }
+       return pages_done;
+}
+
+/*
+ * We can leverage the VM_FAULT_RETRY functionality in the page fault
+ * paths better by using either get_user_pages_locked() or
+ * get_user_pages_unlocked().
+ *
+ * get_user_pages_locked() is suitable to replace the form:
+ *
+ *      down_read(&mm->mmap_sem);
+ *      do_something()
+ *      get_user_pages(tsk, mm, ..., pages, NULL);
+ *      up_read(&mm->mmap_sem);
+ *
+ *  to:
+ *
+ *      int locked = 1;
+ *      down_read(&mm->mmap_sem);
+ *      do_something()
+ *      get_user_pages_locked(tsk, mm, ..., pages, &locked);
+ *      if (locked)
+ *          up_read(&mm->mmap_sem);
+ */
+long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
+                          unsigned long start, unsigned long nr_pages,
+                          int write, int force, struct page **pages,
+                          int *locked)
+{
+       return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
+                                      pages, NULL, locked, true, FOLL_TOUCH);
+}
+EXPORT_SYMBOL(get_user_pages_locked);
+
+/*
+ * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
+ * pass additional gup_flags as last parameter (like FOLL_HWPOISON).
+ *
+ * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
+ * caller if required (just like with __get_user_pages). "FOLL_GET",
+ * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
+ * according to the parameters "pages", "write", "force"
+ * respectively.
+ */
+__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+                                              unsigned long start, unsigned long nr_pages,
+                                              int write, int force, struct page **pages,
+                                              unsigned int gup_flags)
+{
+       long ret;
+       int locked = 1;
+       down_read(&mm->mmap_sem);
+       ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
+                                     pages, NULL, &locked, false, gup_flags);
+       if (locked)
+               up_read(&mm->mmap_sem);
+       return ret;
+}
+EXPORT_SYMBOL(__get_user_pages_unlocked);
+
+/*
+ * get_user_pages_unlocked() is suitable to replace the form:
+ *
+ *      down_read(&mm->mmap_sem);
+ *      get_user_pages(tsk, mm, ..., pages, NULL);
+ *      up_read(&mm->mmap_sem);
+ *
+ *  with:
+ *
+ *      get_user_pages_unlocked(tsk, mm, ..., pages);
+ *
+ * It is functionally equivalent to get_user_pages_fast so
+ * get_user_pages_fast should be used instead, if the two parameters
+ * "tsk" and "mm" are respectively equal to current and current->mm,
+ * or if "force" shall be set to 1 (get_user_pages_fast misses the
+ * "force" parameter).
+ */
+long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+                            unsigned long start, unsigned long nr_pages,
+                            int write, int force, struct page **pages)
+{
+       return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
+                                        force, pages, FOLL_TOUCH);
+}
+EXPORT_SYMBOL(get_user_pages_unlocked);
+
 /*
  * get_user_pages() - pin user pages in memory
  * @tsk:       the task_struct to use for page fault accounting, or
@@ -633,22 +803,18 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
  * use the correct cache flushing APIs.
  *
  * See also get_user_pages_fast, for performance critical applications.
+ *
+ * get_user_pages should be phased out in favor of
+ * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
+ * should use get_user_pages because it cannot pass
+ * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
  */
 long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, unsigned long nr_pages, int write,
                int force, struct page **pages, struct vm_area_struct **vmas)
 {
-       int flags = FOLL_TOUCH;
-
-       if (pages)
-               flags |= FOLL_GET;
-       if (write)
-               flags |= FOLL_WRITE;
-       if (force)
-               flags |= FOLL_FORCE;
-
-       return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
-                               NULL);
+       return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
+                                      pages, vmas, NULL, false, FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -740,10 +906,10 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
                /*
                 * Similar to the PMD case below, NUMA hinting must take slow
-                * path
+                * path using the pte_protnone check.
                 */
                if (!pte_present(pte) || pte_special(pte) ||
-                       pte_numa(pte) || (write && !pte_write(pte)))
+                       pte_protnone(pte) || (write && !pte_write(pte)))
                        goto pte_unmap;
 
                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -938,7 +1104,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                         * slowpath for accounting purposes and so that they
                         * can be serialised against THP migration.
                         */
-                       if (pmd_numa(pmd))
+                       if (pmd_protnone(pmd))
                                return 0;
 
                        if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
@@ -1077,10 +1243,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                start += nr << PAGE_SHIFT;
                pages += nr;
 
-               down_read(&mm->mmap_sem);
-               ret = get_user_pages(current, mm, start,
-                                    nr_pages - nr, write, 0, pages, NULL);
-               up_read(&mm->mmap_sem);
+               ret = get_user_pages_unlocked(current, mm, start,
+                                             nr_pages - nr, write, 0, pages);
 
                /* Have to be a bit careful with return values */
                if (nr > 0) {