Merge branch 'master' into for_paulus
[linux-drm-fsl-dcu.git] / fs / dcache.c
index fc2faa44f8d185b6be9b8600a4ffb9991d5b980f..b5f613932912436da57d062d6ffbe91c2c5e139a 100644 (file)
@@ -43,7 +43,7 @@ static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache __read_mostly;
+static struct kmem_cache *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -68,15 +68,19 @@ struct dentry_stat_t dentry_stat = {
        .age_limit = 45,
 };
 
-static void d_callback(struct rcu_head *head)
+static void __d_free(struct dentry *dentry)
 {
-       struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
-
        if (dname_external(dentry))
                kfree(dentry->d_name.name);
        kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void d_callback(struct rcu_head *head)
+{
+       struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
+       __d_free(dentry);
+}
+
 /*
  * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
  * inside dcache_lock.
@@ -85,7 +89,11 @@ static void d_free(struct dentry *dentry)
 {
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);
-       call_rcu(&dentry->d_u.d_rcu, d_callback);
+       /* if dentry was never inserted into hash, immediate free is OK */
+       if (dentry->d_hash.pprev == NULL)
+               __d_free(dentry);
+       else
+               call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
@@ -291,9 +299,9 @@ struct dentry * dget_locked(struct dentry *dentry)
  * it can be unhashed only if it has no children, or if it is the root
  * of a filesystem.
  *
- * If the inode has a DCACHE_DISCONNECTED alias, then prefer
+ * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
  * any other hashed alias over that one unless @want_discon is set,
- * in which case only return a DCACHE_DISCONNECTED alias.
+ * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
  */
 
 static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
@@ -309,7 +317,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
                prefetch(next);
                alias = list_entry(tmp, struct dentry, d_alias);
                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
-                       if (alias->d_flags & DCACHE_DISCONNECTED)
+                       if (IS_ROOT(alias) &&
+                           (alias->d_flags & DCACHE_DISCONNECTED))
                                discon_alias = alias;
                        else if (!want_discon) {
                                __dget_locked(alias);
@@ -477,11 +486,12 @@ static void prune_dcache(int count, struct super_block *sb)
                        up_read(s_umount);
                }
                spin_unlock(&dentry->d_lock);
-               /* Cannot remove the first dentry, and it isn't appropriate
-                * to move it to the head of the list, so give up, and try
-                * later
+               /*
+                * Insert dentry at the head of the list as inserting at the
+                * tail leads to a cycle.
                 */
-               break;
+               list_add(&dentry->d_lru, &dentry_unused);
+               dentry_stat.nr_unused++;
        }
        spin_unlock(&dcache_lock);
 }
@@ -547,6 +557,142 @@ repeat:
        spin_unlock(&dcache_lock);
 }
 
+/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ *   locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+       struct dentry *parent;
+       unsigned detached = 0;
+
+       BUG_ON(!IS_ROOT(dentry));
+
+       /* detach this root from the system */
+       spin_lock(&dcache_lock);
+       if (!list_empty(&dentry->d_lru)) {
+               dentry_stat.nr_unused--;
+               list_del_init(&dentry->d_lru);
+       }
+       __d_drop(dentry);
+       spin_unlock(&dcache_lock);
+
+       for (;;) {
+               /* descend to the first leaf in the current subtree */
+               while (!list_empty(&dentry->d_subdirs)) {
+                       struct dentry *loop;
+
+                       /* this is a branch with children - detach all of them
+                        * from the system in one go */
+                       spin_lock(&dcache_lock);
+                       list_for_each_entry(loop, &dentry->d_subdirs,
+                                           d_u.d_child) {
+                               if (!list_empty(&loop->d_lru)) {
+                                       dentry_stat.nr_unused--;
+                                       list_del_init(&loop->d_lru);
+                               }
+
+                               __d_drop(loop);
+                               cond_resched_lock(&dcache_lock);
+                       }
+                       spin_unlock(&dcache_lock);
+
+                       /* move to the first child */
+                       dentry = list_entry(dentry->d_subdirs.next,
+                                           struct dentry, d_u.d_child);
+               }
+
+               /* consume the dentries from this leaf up through its parents
+                * until we find one with children or run out altogether */
+               do {
+                       struct inode *inode;
+
+                       if (atomic_read(&dentry->d_count) != 0) {
+                               printk(KERN_ERR
+                                      "BUG: Dentry %p{i=%lx,n=%s}"
+                                      " still in use (%d)"
+                                      " [unmount of %s %s]\n",
+                                      dentry,
+                                      dentry->d_inode ?
+                                      dentry->d_inode->i_ino : 0UL,
+                                      dentry->d_name.name,
+                                      atomic_read(&dentry->d_count),
+                                      dentry->d_sb->s_type->name,
+                                      dentry->d_sb->s_id);
+                               BUG();
+                       }
+
+                       parent = dentry->d_parent;
+                       if (parent == dentry)
+                               parent = NULL;
+                       else
+                               atomic_dec(&parent->d_count);
+
+                       list_del(&dentry->d_u.d_child);
+                       detached++;
+
+                       inode = dentry->d_inode;
+                       if (inode) {
+                               dentry->d_inode = NULL;
+                               list_del_init(&dentry->d_alias);
+                               if (dentry->d_op && dentry->d_op->d_iput)
+                                       dentry->d_op->d_iput(dentry, inode);
+                               else
+                                       iput(inode);
+                       }
+
+                       d_free(dentry);
+
+                       /* finished when we fall off the top of the tree,
+                        * otherwise we ascend to the parent and move to the
+                        * next sibling if there is one */
+                       if (!parent)
+                               goto out;
+
+                       dentry = parent;
+
+               } while (list_empty(&dentry->d_subdirs));
+
+               dentry = list_entry(dentry->d_subdirs.next,
+                                   struct dentry, d_u.d_child);
+       }
+out:
+       /* several dentries were freed, need to correct nr_dentry */
+       spin_lock(&dcache_lock);
+       dentry_stat.nr_dentry -= detached;
+       spin_unlock(&dcache_lock);
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ *   removing the dentry from the system lists and hashes because:
+ *   - the superblock is detached from all mountings and open files, so the
+ *     dentry trees will not be rearranged by the VFS
+ *   - s_umount is write-locked, so the memory pressure shrinker will ignore
+ *     any dentries belonging to this superblock that it comes across
+ *   - the filesystem itself is no longer permitted to rearrange the dentries
+ *     in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+       struct dentry *dentry;
+
+       if (down_read_trylock(&sb->s_umount))
+               BUG();
+
+       dentry = sb->s_root;
+       sb->s_root = NULL;
+       atomic_dec(&dentry->d_count);
+       shrink_dcache_for_umount_subtree(dentry);
+
+       while (!hlist_empty(&sb->s_anon)) {
+               dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+               shrink_dcache_for_umount_subtree(dentry);
+       }
+}
+
 /*
  * Search for at least 1 mount point in the dentry's subdirs.
  * We descend to the next level whenever the d_subdirs
@@ -1004,7 +1150,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 {
        struct dentry *new = NULL;
 
-       if (inode) {
+       if (inode && S_ISDIR(inode->i_mode)) {
                spin_lock(&dcache_lock);
                new = __d_find_alias(inode, 1);
                if (new) {
@@ -1338,23 +1484,21 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
  * deleted it.
  */
  
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
  * @dentry: entry to move
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way.
  */
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
 {
        struct hlist_head *list;
 
        if (!dentry->d_inode)
                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
-       spin_lock(&dcache_lock);
        write_seqlock(&rename_lock);
        /*
         * XXXX: do we really need to take target->d_lock?
@@ -1405,9 +1549,83 @@ already_unhashed:
        fsnotify_d_move(dentry);
        spin_unlock(&dentry->d_lock);
        write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+       spin_lock(&dcache_lock);
+       d_move_locked(dentry, target);
        spin_unlock(&dcache_lock);
 }
 
+/*
+ * Helper that returns 1 if p1 is a parent of p2, else 0
+ */
+static int d_isparent(struct dentry *p1, struct dentry *p2)
+{
+       struct dentry *p;
+
+       for (p = p2; p->d_parent != p; p = p->d_parent) {
+               if (p->d_parent == p1)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ *
+ * On return, dcache_lock will have been unlocked.
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+{
+       struct mutex *m1 = NULL, *m2 = NULL;
+       struct dentry *ret;
+
+       /* If alias and dentry share a parent, then no extra locks required */
+       if (alias->d_parent == dentry->d_parent)
+               goto out_unalias;
+
+       /* Check for loops */
+       ret = ERR_PTR(-ELOOP);
+       if (d_isparent(alias, dentry))
+               goto out_err;
+
+       /* See lock_rename() */
+       ret = ERR_PTR(-EBUSY);
+       if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+               goto out_err;
+       m1 = &dentry->d_sb->s_vfs_rename_mutex;
+       if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+               goto out_err;
+       m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+       d_move_locked(alias, dentry);
+       ret = alias;
+out_err:
+       spin_unlock(&dcache_lock);
+       if (m2)
+               mutex_unlock(m2);
+       if (m1)
+               mutex_unlock(m1);
+       return ret;
+}
+
 /*
  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
  * named dentry in place of the dentry to be replaced.
@@ -1450,7 +1668,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
  */
 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 {
-       struct dentry *alias, *actual;
+       struct dentry *actual;
 
        BUG_ON(!d_unhashed(dentry));
 
@@ -1462,26 +1680,27 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
                goto found_lock;
        }
 
-       /* See if a disconnected directory already exists as an anonymous root
-        * that we should splice into the tree instead */
-       if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
-               spin_lock(&alias->d_lock);
-
-               /* Is this a mountpoint that we could splice into our tree? */
-               if (IS_ROOT(alias))
-                       goto connect_mountpoint;
-
-               if (alias->d_name.len == dentry->d_name.len &&
-                   alias->d_parent == dentry->d_parent &&
-                   memcmp(alias->d_name.name,
-                          dentry->d_name.name,
-                          dentry->d_name.len) == 0)
-                       goto replace_with_alias;
-
-               spin_unlock(&alias->d_lock);
-
-               /* Doh! Seem to be aliasing directories for some reason... */
-               dput(alias);
+       if (S_ISDIR(inode->i_mode)) {
+               struct dentry *alias;
+
+               /* Does an aliased dentry already exist? */
+               alias = __d_find_alias(inode, 0);
+               if (alias) {
+                       actual = alias;
+                       /* Is this an anonymous mountpoint that we could splice
+                        * into our tree? */
+                       if (IS_ROOT(alias)) {
+                               spin_lock(&alias->d_lock);
+                               __d_materialise_dentry(dentry, alias);
+                               __d_drop(alias);
+                               goto found;
+                       }
+                       /* Nope, but we must(!) avoid directory aliasing */
+                       actual = __d_unalias(dentry, alias);
+                       if (IS_ERR(actual))
+                               dput(alias);
+                       goto out_nolock;
+               }
        }
 
        /* Add a unique reference */
@@ -1497,7 +1716,7 @@ found:
        _d_rehash(actual);
        spin_unlock(&actual->d_lock);
        spin_unlock(&dcache_lock);
-
+out_nolock:
        if (actual == dentry) {
                security_d_instantiate(dentry, inode);
                return NULL;
@@ -1506,16 +1725,6 @@ found:
        iput(inode);
        return actual;
 
-       /* Convert the anonymous/root alias into an ordinary dentry */
-connect_mountpoint:
-       __d_materialise_dentry(dentry, alias);
-
-       /* Replace the candidate dentry with the alias in the tree */
-replace_with_alias:
-       __d_drop(alias);
-       actual = alias;
-       goto found;
-
 shouldnt_be_hashed:
        spin_unlock(&dcache_lock);
        BUG();
@@ -1530,45 +1739,41 @@ shouldnt_be_hashed:
  * @rootmnt: vfsmnt to which the root dentry belongs
  * @buffer: buffer to return value in
  * @buflen: buffer length
+ * @fail_deleted: what to return for deleted files
  *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
- * the string " (deleted)" is appended. Note that this is ambiguous.
- *
- * Returns the buffer or an error code if the path was too long.
+ * Convert a dentry into an ASCII path name. If the entry has been deleted,
+ * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise,
+ * the the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * Returns the buffer or an error code.
  */
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
-                       struct dentry *root, struct vfsmount *rootmnt,
-                       char *buffer, int buflen)
+static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+                     struct dentry *root, struct vfsmount *rootmnt,
+                     char *buffer, int buflen, int fail_deleted)
 {
-       char * end = buffer+buflen;
-       char * retval;
-       int namelen;
+       int namelen, is_slash;
 
-       *--end = '\0';
-       buflen--;
+       if (buflen < 2)
+               return ERR_PTR(-ENAMETOOLONG);
+       buffer += --buflen;
+       *buffer = '\0';
+
+       spin_lock(&dcache_lock);
        if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-               buflen -= 10;
-               end -= 10;
-               if (buflen < 0)
+               if (fail_deleted) {
+                       buffer = ERR_PTR(-ENOENT);
+                       goto out;
+               }
+               if (buflen < 10)
                        goto Elong;
-               memcpy(end, " (deleted)", 10);
+               buflen -= 10;
+               buffer -= 10;
+               memcpy(buffer, " (deleted)", 10);
        }
-
-       if (buflen < 1)
-               goto Elong;
-       /* Get '/' right */
-       retval = end-1;
-       *retval = '/';
-
-       for (;;) {
+       while (dentry != root || vfsmnt != rootmnt) {
                struct dentry * parent;
 
-               if (dentry == root && vfsmnt == rootmnt)
-                       break;
                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-                       /* Global root? */
                        spin_lock(&vfsmount_lock);
                        if (vfsmnt->mnt_parent == vfsmnt) {
                                spin_unlock(&vfsmount_lock);
@@ -1582,33 +1787,60 @@ static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
                parent = dentry->d_parent;
                prefetch(parent);
                namelen = dentry->d_name.len;
-               buflen -= namelen + 1;
-               if (buflen < 0)
+               if (buflen <= namelen)
                        goto Elong;
-               end -= namelen;
-               memcpy(end, dentry->d_name.name, namelen);
-               *--end = '/';
-               retval = end;
+               buflen -= namelen + 1;
+               buffer -= namelen;
+               memcpy(buffer, dentry->d_name.name, namelen);
+               *--buffer = '/';
                dentry = parent;
        }
+       /* Get '/' right */
+       if (*buffer != '/')
+               *--buffer = '/';
 
-       return retval;
+out:
+       spin_unlock(&dcache_lock);
+       return buffer;
 
 global_root:
+       /*
+        * We went past the (vfsmount, dentry) we were looking for and have
+        * either hit a root dentry, a lazily unmounted dentry, an
+        * unconnected dentry, or the file is on a pseudo filesystem.
+        */
        namelen = dentry->d_name.len;
-       buflen -= namelen;
-       if (buflen < 0)
+       is_slash = (namelen == 1 && *dentry->d_name.name == '/');
+       if (is_slash || (dentry->d_sb->s_flags & MS_NOUSER)) {
+               /*
+                * Make sure we won't return a pathname starting with '/'.
+                *
+                * Historically, we also glue together the root dentry and
+                * remaining name for pseudo filesystems like pipefs, which
+                * have the MS_NOUSER flag set. This results in pathnames
+                * like "pipe:[439336]".
+                */
+               if (*buffer == '/') {
+                       buffer++;
+                       buflen++;
+               }
+               if (is_slash)
+                       goto out;
+       }
+       if (buflen < namelen)
                goto Elong;
-       retval -= namelen-1;    /* hit the slash */
-       memcpy(retval, dentry->d_name.name, namelen);
-       return retval;
+       buffer -= namelen;
+       memcpy(buffer, dentry->d_name.name, namelen);
+       goto out;
+
 Elong:
-       return ERR_PTR(-ENAMETOOLONG);
+       buffer = ERR_PTR(-ENAMETOOLONG);
+       goto out;
 }
 
 /* write full pathname into buffer and return start of pathname */
-char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
-                               char *buf, int buflen)
+char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf,
+            int buflen)
 {
        char *res;
        struct vfsmount *rootmnt;
@@ -1618,9 +1850,7 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
        rootmnt = mntget(current->fs->rootmnt);
        root = dget(current->fs->root);
        read_unlock(&current->fs->lock);
-       spin_lock(&dcache_lock);
-       res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
-       spin_unlock(&dcache_lock);
+       res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0);
        dput(root);
        mntput(rootmnt);
        return res;
@@ -1646,10 +1876,10 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
  */
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
 {
-       int error;
+       int error, len;
        struct vfsmount *pwdmnt, *rootmnt;
        struct dentry *pwd, *root;
-       char *page = (char *) __get_free_page(GFP_USER);
+       char *page = (char *) __get_free_page(GFP_USER), *cwd;
 
        if (!page)
                return -ENOMEM;
@@ -1661,29 +1891,18 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
        root = dget(current->fs->root);
        read_unlock(&current->fs->lock);
 
-       error = -ENOENT;
-       /* Has the current directory has been unlinked? */
-       spin_lock(&dcache_lock);
-       if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
-               unsigned long len;
-               char * cwd;
-
-               cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
-               spin_unlock(&dcache_lock);
-
-               error = PTR_ERR(cwd);
-               if (IS_ERR(cwd))
-                       goto out;
+       cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1);
+       error = PTR_ERR(cwd);
+       if (IS_ERR(cwd))
+               goto out;
 
-               error = -ERANGE;
-               len = PAGE_SIZE + page - cwd;
-               if (len <= size) {
-                       error = len;
-                       if (copy_to_user(buf, cwd, len))
-                               error = -EFAULT;
-               }
-       } else
-               spin_unlock(&dcache_lock);
+       error = -ERANGE;
+       len = PAGE_SIZE + page - cwd;
+       if (len <= size) {
+               error = len;
+               if (copy_to_user(buf, cwd, len))
+                       error = -EFAULT;
+       }
 
 out:
        dput(pwd);
@@ -1871,10 +2090,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep __read_mostly;
+struct kmem_cache *names_cachep __read_mostly;
 
 /* SLAB cache for file structures */
-kmem_cache_t *filp_cachep __read_mostly;
+struct kmem_cache *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);