mm, thp: change pmd_trans_huge_lock() to return taken lock
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Thu, 14 Nov 2013 22:30:54 +0000 (14:30 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 Nov 2013 00:32:14 +0000 (09:32 +0900)
With split ptlock it's important to know which lock
pmd_trans_huge_lock() took.  This patch adds one more parameter to the
function to return the lock.

In most places migration to new api is trivial.  Exception is
move_huge_pmd(): we need to take two locks if pmd tables are different.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/proc/task_mmu.c
include/linux/huge_mm.h
mm/huge_memory.c
mm/memcontrol.c

index 8faaebdc6b0245ac6993b8c5d157254480377a2d..42b5cf5d03262cc082dc6fa86a79da580ef9ea46 100644 (file)
@@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
-               spin_unlock(&walk->mm->page_table_lock);
+               spin_unlock(ptl);
                mss->anonymous_thp += HPAGE_PMD_SIZE;
                return 0;
        }
@@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 {
        struct vm_area_struct *vma;
        struct pagemapread *pm = walk->private;
+       spinlock_t *ptl;
        pte_t *pte;
        int err = 0;
        pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
 
        /* find the first VMA at or above 'addr' */
        vma = find_vma(walk->mm, addr);
-       if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                int pmd_flags2;
 
                if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                        if (err)
                                break;
                }
-               spin_unlock(&walk->mm->page_table_lock);
+               spin_unlock(ptl);
                return err;
        }
 
@@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 
        md = walk->private;
 
-       if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
+       if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
@@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
                if (page)
                        gather_stats(page, md, pte_dirty(huge_pte),
                                     HPAGE_PMD_SIZE/PAGE_SIZE);
-               spin_unlock(&walk->mm->page_table_lock);
+               spin_unlock(ptl);
                return 0;
        }
 
index 3935428c57cff80d01236428fdea979d7a9f911b..4aca0d8da11249ff26896f43d453ef1b4ecf9d10 100644 (file)
@@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
                                    unsigned long start,
                                    unsigned long end,
                                    long adjust_next);
-extern int __pmd_trans_huge_lock(pmd_t *pmd,
-                                struct vm_area_struct *vma);
+extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+               spinlock_t **ptl);
 /* mmap_sem must be held on entry */
-static inline int pmd_trans_huge_lock(pmd_t *pmd,
-                                     struct vm_area_struct *vma)
+static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+               spinlock_t **ptl)
 {
        VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
        if (pmd_trans_huge(*pmd))
-               return __pmd_trans_huge_lock(pmd, vma);
+               return __pmd_trans_huge_lock(pmd, vma, ptl);
        else
                return 0;
 }
@@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                         long adjust_next)
 {
 }
-static inline int pmd_trans_huge_lock(pmd_t *pmd,
-                                     struct vm_area_struct *vma)
+static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+               spinlock_t **ptl)
 {
        return 0;
 }
index e5b2d316be2e81398db364c2c19593e2de462e64..471eb04066ff81a0b1107f0dc845d31d3a12a4ad 100644 (file)
@@ -1376,9 +1376,10 @@ out:
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 pmd_t *pmd, unsigned long addr)
 {
+       spinlock_t *ptl;
        int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                struct page *page;
                pgtable_t pgtable;
                pmd_t orig_pmd;
@@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
                if (is_huge_zero_pmd(orig_pmd)) {
                        atomic_long_dec(&tlb->mm->nr_ptes);
-                       spin_unlock(&tlb->mm->page_table_lock);
+                       spin_unlock(ptl);
                        put_huge_zero_page();
                } else {
                        page = pmd_page(orig_pmd);
@@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
                        VM_BUG_ON(!PageHead(page));
                        atomic_long_dec(&tlb->mm->nr_ptes);
-                       spin_unlock(&tlb->mm->page_table_lock);
+                       spin_unlock(ptl);
                        tlb_remove_page(tlb, page);
                }
                pte_free(tlb->mm, pgtable);
@@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, unsigned long end,
                unsigned char *vec)
 {
+       spinlock_t *ptl;
        int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                /*
                 * All logical pages in the range are present
                 * if backed by a huge page.
                 */
-               spin_unlock(&vma->vm_mm->page_table_lock);
+               spin_unlock(ptl);
                memset(vec, 1, (end - addr) >> PAGE_SHIFT);
                ret = 1;
        }
@@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
                  unsigned long new_addr, unsigned long old_end,
                  pmd_t *old_pmd, pmd_t *new_pmd)
 {
+       spinlock_t *old_ptl, *new_ptl;
        int ret = 0;
        pmd_t pmd;
 
@@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
                goto out;
        }
 
-       ret = __pmd_trans_huge_lock(old_pmd, vma);
+       /*
+        * We don't have to worry about the ordering of src and dst
+        * ptlocks because exclusive mmap_sem prevents deadlock.
+        */
+       ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
        if (ret == 1) {
+               new_ptl = pmd_lockptr(mm, new_pmd);
+               if (new_ptl != old_ptl)
+                       spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
                pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
                VM_BUG_ON(!pmd_none(*new_pmd));
                set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
-               spin_unlock(&mm->page_table_lock);
+               if (new_ptl != old_ptl)
+                       spin_unlock(new_ptl);
+               spin_unlock(old_ptl);
        }
 out:
        return ret;
@@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, pgprot_t newprot, int prot_numa)
 {
        struct mm_struct *mm = vma->vm_mm;
+       spinlock_t *ptl;
        int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pmd_t entry;
                ret = 1;
                if (!prot_numa) {
@@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                if (ret == HPAGE_PMD_NR)
                        set_pmd_at(mm, addr, pmd, entry);
 
-               spin_unlock(&vma->vm_mm->page_table_lock);
+               spin_unlock(ptl);
        }
 
        return ret;
@@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  * Note that if it returns 1, this routine returns without unlocking page
  * table locks. So callers must unlock them.
  */
-int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
+int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+               spinlock_t **ptl)
 {
-       spin_lock(&vma->vm_mm->page_table_lock);
+       *ptl = pmd_lock(vma->vm_mm, pmd);
        if (likely(pmd_trans_huge(*pmd))) {
                if (unlikely(pmd_trans_splitting(*pmd))) {
-                       spin_unlock(&vma->vm_mm->page_table_lock);
+                       spin_unlock(*ptl);
                        wait_split_huge_page(vma->anon_vma, pmd);
                        return -1;
                } else {
@@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
                        return 1;
                }
        }
-       spin_unlock(&vma->vm_mm->page_table_lock);
+       spin_unlock(*ptl);
        return 0;
 }
 
index e3cd40b2d5d926d45344bcdf2c513c2ee074c438..f1a0ae6e11b86b3020c90d7241ba12d47d2bbaa8 100644 (file)
@@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
-               spin_unlock(&vma->vm_mm->page_table_lock);
+               spin_unlock(ptl);
                return 0;
        }
 
@@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
         *    to be unlocked in __split_huge_page_splitting(), where the main
         *    part of thp split is not executed yet.
         */
-       if (pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                if (mc.precharge < HPAGE_PMD_NR) {
-                       spin_unlock(&vma->vm_mm->page_table_lock);
+                       spin_unlock(ptl);
                        return 0;
                }
                target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
@@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                        }
                        put_page(page);
                }
-               spin_unlock(&vma->vm_mm->page_table_lock);
+               spin_unlock(ptl);
                return 0;
        }