Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Aug 2012 17:26:23 +0000 (10:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Aug 2012 17:26:23 +0000 (10:26 -0700)
Pull second vfs pile from Al Viro:
 "The stuff in there: fsfreeze deadlock fixes by Jan (essentially, the
  deadlock reproduced by xfstests 068), symlink and hardlink restriction
  patches, plus assorted cleanups and fixes.

  Note that another fsfreeze deadlock (emergency thaw one) is *not*
  dealt with - the series by Fernando conflicts a lot with Jan's, breaks
  userland ABI (FIFREEZE semantics gets changed) and trades the deadlock
  for massive vfsmount leak; this is going to be handled next cycle.
  There probably will be another pull request, but that stuff won't be
  in it."

Fix up trivial conflicts due to unrelated changes next to each other in
drivers/{staging/gdm72xx/usb_boot.c, usb/gadget/storage_common.c}

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (54 commits)
  delousing target_core_file a bit
  Documentation: Correct s_umount state for freeze_fs/unfreeze_fs
  fs: Remove old freezing mechanism
  ext2: Implement freezing
  btrfs: Convert to new freezing mechanism
  nilfs2: Convert to new freezing mechanism
  ntfs: Convert to new freezing mechanism
  fuse: Convert to new freezing mechanism
  gfs2: Convert to new freezing mechanism
  ocfs2: Convert to new freezing mechanism
  xfs: Convert to new freezing code
  ext4: Convert to new freezing mechanism
  fs: Protect write paths by sb_start_write - sb_end_write
  fs: Skip atime update on frozen filesystem
  fs: Add freezing handling to mnt_want_write() / mnt_drop_write()
  fs: Improve filesystem freezing handling
  switch the protection of percpu_counter list to spinlock
  nfsd: Push mnt_want_write() outside of i_mutex
  btrfs: Push mnt_want_write() outside of i_mutex
  fat: Push mnt_want_write() outside of i_mutex
  ...

84 files changed:
Documentation/filesystems/Locking
Documentation/sysctl/fs.txt
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/cell/spufs/syscalls.c
drivers/base/devtmpfs.c
drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
drivers/staging/bcm/Misc.c
drivers/staging/gdm72xx/sdio_boot.c
drivers/staging/gdm72xx/usb_boot.c
drivers/target/target_core_file.c
drivers/usb/gadget/storage_common.c
drivers/usb/gadget/u_uac1.c
drivers/video/fb_defio.c
fs/9p/vfs_file.c
fs/btrfs/disk-io.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/transaction.c
fs/buffer.c
fs/ceph/addr.c
fs/ecryptfs/inode.c
fs/exec.c
fs/ext2/inode.c
fs/ext2/super.c
fs/ext4/inode.c
fs/ext4/mmp.c
fs/ext4/super.c
fs/fat/file.c
fs/file_table.c
fs/fuse/file.c
fs/gfs2/file.c
fs/gfs2/trans.c
fs/inode.c
fs/internal.h
fs/lockd/clntproc.c
fs/lockd/svc4proc.c
fs/lockd/svclock.c
fs/lockd/svcproc.c
fs/namei.c
fs/namespace.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfsfh.c
fs/nfsd/nfsproc.c
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nilfs2/file.c
fs/nilfs2/ioctl.c
fs/nilfs2/segment.c
fs/ntfs/file.c
fs/ocfs2/file.c
fs/ocfs2/ioctl.c
fs/ocfs2/journal.c
fs/ocfs2/mmap.c
fs/ocfs2/refcounttree.c
fs/open.c
fs/pipe.c
fs/splice.c
fs/super.c
fs/sysfs/bin.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_file.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_sync.c
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
include/linux/audit.h
include/linux/fs.h
include/linux/mm.h
include/linux/namei.h
include/linux/nfsd/nfsfh.h
include/linux/pipe_fs_i.h
kernel/audit.c
kernel/sysctl.c
lib/percpu_counter.c
mm/filemap.c
mm/filemap_xip.c
mm/memory.c
net/unix/af_unix.c
sound/sound_firmware.c

index 7f647e17830cc097023fa6fea5b14ac971da6437..0f103e39b4f6a025cb486cbdfb21126c78992738 100644 (file)
@@ -138,8 +138,8 @@ evict_inode:
 put_super:             write
 write_super:           read
 sync_fs:               read
-freeze_fs:             read
-unfreeze_fs:           read
+freeze_fs:             write
+unfreeze_fs:           write
 statfs:                        maybe(read)     (see below)
 remount_fs:            write
 umount_begin:          no
index 8c235b6e42460c5ed7eb485a1898cbcfb97793a0..88152f214f48cb69c643d4bf2ff2ac9a61ad2eb0 100644 (file)
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
 - nr_open
 - overflowuid
 - overflowgid
+- protected_hardlinks
+- protected_symlinks
 - suid_dumpable
 - super-max
 - super-nr
@@ -157,6 +159,46 @@ The default is 65534.
 
 ==============================================================
 
+protected_hardlinks:
+
+A long-standing class of security issues is the hardlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given hardlink (i.e. a
+root process follows a hardlink created by another user). Additionally,
+on systems without separated partitions, this stops unauthorized users
+from "pinning" vulnerable setuid/setgid files against being upgraded by
+the administrator, or linking to special files.
+
+When set to "0", hardlink creation behavior is unrestricted.
+
+When set to "1" hardlinks cannot be created by users if they do not
+already own the source file, or do not have read/write access to it.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+==============================================================
+
+protected_symlinks:
+
+A long-standing class of security issues is the symlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given symlink (i.e. a
+root process follows a symlink belonging to another user). For a likely
+incomplete list of hundreds of examples across the years, please see:
+http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
+
+When set to "0", symlink following behavior is unrestricted.
+
+When set to "1" symlinks are permitted to be followed only when outside
+a sticky world-writable directory, or when the uid of the symlink and
+follower match, or when the directory owner matches the symlink's owner.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+==============================================================
+
 suid_dumpable:
 
 This value can be used to query and set the core dump mode for setuid
index d544d7816df3229780e4f2c58a1a28355c5c4aae..dba1ce235da59e23172cd5d6474d9ce4a72900ac 100644 (file)
@@ -186,10 +186,13 @@ static void spufs_prune_dir(struct dentry *dir)
 static int spufs_rmdir(struct inode *parent, struct dentry *dir)
 {
        /* remove all entries */
+       int res;
        spufs_prune_dir(dir);
        d_drop(dir);
-
-       return simple_rmdir(parent, dir);
+       res = simple_rmdir(parent, dir);
+       /* We have to give up the mm_struct */
+       spu_forget(SPUFS_I(dir->d_inode)->i_ctx);
+       return res;
 }
 
 static int spufs_fill_dir(struct dentry *dir,
@@ -245,9 +248,6 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
        mutex_unlock(&parent->i_mutex);
        WARN_ON(ret);
 
-       /* We have to give up the mm_struct */
-       spu_forget(ctx);
-
        return dcache_dir_close(inode, file);
 }
 
@@ -450,28 +450,24 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
        struct spu_context *neighbor;
        struct path path = {.mnt = mnt, .dentry = dentry};
 
-       ret = -EPERM;
        if ((flags & SPU_CREATE_NOSCHED) &&
            !capable(CAP_SYS_NICE))
-               goto out_unlock;
+               return -EPERM;
 
-       ret = -EINVAL;
        if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
            == SPU_CREATE_ISOLATE)
-               goto out_unlock;
+               return -EINVAL;
 
-       ret = -ENODEV;
        if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
-               goto out_unlock;
+               return -ENODEV;
 
        gang = NULL;
        neighbor = NULL;
        affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
        if (affinity) {
                gang = SPUFS_I(inode)->i_gang;
-               ret = -EINVAL;
                if (!gang)
-                       goto out_unlock;
+                       return -EINVAL;
                mutex_lock(&gang->aff_mutex);
                neighbor = spufs_assert_affinity(flags, gang, aff_filp);
                if (IS_ERR(neighbor)) {
@@ -492,22 +488,12 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
        }
 
        ret = spufs_context_open(&path);
-       if (ret < 0) {
+       if (ret < 0)
                WARN_ON(spufs_rmdir(inode, dentry));
-               if (affinity)
-                       mutex_unlock(&gang->aff_mutex);
-               mutex_unlock(&inode->i_mutex);
-               spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
-               goto out;
-       }
 
 out_aff_unlock:
        if (affinity)
                mutex_unlock(&gang->aff_mutex);
-out_unlock:
-       mutex_unlock(&inode->i_mutex);
-out:
-       dput(dentry);
        return ret;
 }
 
@@ -580,18 +566,13 @@ static int spufs_create_gang(struct inode *inode,
        int ret;
 
        ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
-       if (ret)
-               goto out;
-
-       ret = spufs_gang_open(&path);
-       if (ret < 0) {
-               int err = simple_rmdir(inode, dentry);
-               WARN_ON(err);
+       if (!ret) {
+               ret = spufs_gang_open(&path);
+               if (ret < 0) {
+                       int err = simple_rmdir(inode, dentry);
+                       WARN_ON(err);
+               }
        }
-
-out:
-       mutex_unlock(&inode->i_mutex);
-       dput(dentry);
        return ret;
 }
 
@@ -601,40 +582,32 @@ static struct file_system_type spufs_type;
 long spufs_create(struct path *path, struct dentry *dentry,
                unsigned int flags, umode_t mode, struct file *filp)
 {
+       struct inode *dir = path->dentry->d_inode;
        int ret;
 
-       ret = -EINVAL;
        /* check if we are on spufs */
        if (path->dentry->d_sb->s_type != &spufs_type)
-               goto out;
+               return -EINVAL;
 
        /* don't accept undefined flags */
        if (flags & (~SPU_CREATE_FLAG_ALL))
-               goto out;
+               return -EINVAL;
 
        /* only threads can be underneath a gang */
-       if (path->dentry != path->dentry->d_sb->s_root) {
-               if ((flags & SPU_CREATE_GANG) ||
-                   !SPUFS_I(path->dentry->d_inode)->i_gang)
-                       goto out;
-       }
+       if (path->dentry != path->dentry->d_sb->s_root)
+               if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+                       return -EINVAL;
 
        mode &= ~current_umask();
 
        if (flags & SPU_CREATE_GANG)
-               ret = spufs_create_gang(path->dentry->d_inode,
-                                        dentry, path->mnt, mode);
+               ret = spufs_create_gang(dir, dentry, path->mnt, mode);
        else
-               ret = spufs_create_context(path->dentry->d_inode,
-                                           dentry, path->mnt, flags, mode,
+               ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
                                            filp);
        if (ret >= 0)
-               fsnotify_mkdir(path->dentry->d_inode, dentry);
-       return ret;
+               fsnotify_mkdir(dir, dentry);
 
-out:
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
-       dput(dentry);
        return ret;
 }
 
index 5665dcc382c7350208e5c2b084a0e94761ce5051..5b7d8ffbf8907933b21f9be8e4e4333c81a02e9d 100644 (file)
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
        ret = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                ret = spufs_create(&path, dentry, flags, mode, neighbor);
-               path_put(&path);
+               done_path_create(&path, dentry);
        }
 
        return ret;
index d91a3a0b23258a516f352162a5d051df17959e80..deb4a456cf8365ac3a45480135786c160473964f 100644 (file)
@@ -156,9 +156,7 @@ static int dev_mkdir(const char *name, umode_t mode)
        if (!err)
                /* mark as kernel-created inode */
                dentry->d_inode->i_private = &thread;
-       dput(dentry);
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
-       path_put(&path);
+       done_path_create(&path, dentry);
        return err;
 }
 
@@ -218,10 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev)
                /* mark as kernel-created inode */
                dentry->d_inode->i_private = &thread;
        }
-       dput(dentry);
-
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
-       path_put(&path);
+       done_path_create(&path, dentry);
        return err;
 }
 
index 57bf1d7ee80fc76591621f51ffd4fbdf56a656b7..9ab24528f9b9c0e54d40854ae6e43e8ffd9f654c 100644 (file)
@@ -1188,7 +1188,7 @@ exit:
        kfree(buf);
        /* close file before return */
        if (fp)
-               filp_close(fp, current->files);
+               filp_close(fp, NULL);
        /* restore previous address limit */
        set_fs(old_fs);
 
index 9a60d4cd2184aa188e80b946e5ba398bac04943f..f545716c666d06fac279d87bf8cfe69664eb595e 100644 (file)
@@ -157,12 +157,7 @@ static int create_worker_threads(struct bcm_mini_adapter *psAdapter)
 
 static struct file *open_firmware_file(struct bcm_mini_adapter *Adapter, const char *path)
 {
-       struct file *flp = NULL;
-       mm_segment_t oldfs;
-       oldfs = get_fs();
-       set_fs(get_ds());
-       flp = filp_open(path, O_RDONLY, S_IRWXU);
-       set_fs(oldfs);
+       struct file *flp = filp_open(path, O_RDONLY, S_IRWXU);
        if (IS_ERR(flp)) {
                pr_err(DRV_NAME "Unable To Open File %s, err %ld", path, PTR_ERR(flp));
                flp = NULL;
@@ -183,14 +178,12 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
 {
        int errorno = 0;
        struct file *flp = NULL;
-       mm_segment_t oldfs;
        struct timeval tv = {0};
 
        flp = open_firmware_file(Adapter, path);
        if (!flp) {
-               errorno = -ENOENT;
                BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Unable to Open %s\n", path);
-               goto exit_download;
+               return -ENOENT;
        }
        BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)flp->f_dentry->d_inode->i_size, loc);
        do_gettimeofday(&tv);
@@ -201,10 +194,7 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
                errorno = -EIO;
                goto exit_download;
        }
-       oldfs = get_fs();
-       set_fs(get_ds());
        vfs_llseek(flp, 0, 0);
-       set_fs(oldfs);
        if (Adapter->bcm_file_readback_from_chip(Adapter->pvInterfaceAdapter, flp, loc)) {
                BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Failed to read back firmware!");
                errorno = -EIO;
@@ -212,12 +202,7 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
        }
 
 exit_download:
-       oldfs = get_fs();
-       set_fs(get_ds());
-       if (flp && !(IS_ERR(flp)))
-               filp_close(flp, current->files);
-       set_fs(oldfs);
-
+       filp_close(flp, NULL);
        return errorno;
 }
 
@@ -1056,10 +1041,8 @@ OUT:
 static int bcm_parse_target_params(struct bcm_mini_adapter *Adapter)
 {
        struct file *flp = NULL;
-       mm_segment_t oldfs = {0};
        char *buff;
        int len = 0;
-       loff_t pos = 0;
 
        buff = kmalloc(BUFFER_1K, GFP_KERNEL);
        if (!buff)
@@ -1079,20 +1062,16 @@ static int bcm_parse_target_params(struct bcm_mini_adapter *Adapter)
                Adapter->pstargetparams = NULL;
                return -ENOENT;
        }
-       oldfs = get_fs();
-       set_fs(get_ds());
-       len = vfs_read(flp, (void __user __force *)buff, BUFFER_1K, &pos);
-       set_fs(oldfs);
+       len = kernel_read(flp, 0, buff, BUFFER_1K);
+       filp_close(flp, NULL);
 
        if (len != sizeof(STARGETPARAMS)) {
                BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Mismatch in Target Param Structure!\n");
                kfree(buff);
                kfree(Adapter->pstargetparams);
                Adapter->pstargetparams = NULL;
-               filp_close(flp, current->files);
                return -ENOENT;
        }
-       filp_close(flp, current->files);
 
        /* Check for autolink in config params */
        /*
index 760efee23d4a875ff1f3d402812af161f7f10f29..65624bca8b3ab354f0cfb312bff1f69229a59357 100644 (file)
@@ -66,9 +66,8 @@ static int download_image(struct sdio_func *func, char *img_name)
                return -ENOENT;
        }
 
-       if (filp->f_dentry)
-               inode = filp->f_dentry->d_inode;
-       if (!inode || !S_ISREG(inode->i_mode)) {
+       inode = filp->f_dentry->d_inode;
+       if (!S_ISREG(inode->i_mode)) {
                printk(KERN_ERR "Invalid file type: %s\n", img_name);
                ret = -EINVAL;
                goto out;
@@ -123,7 +122,7 @@ static int download_image(struct sdio_func *func, char *img_name)
                pno++;
        }
 out:
-       filp_close(filp, current->files);
+       filp_close(filp, NULL);
        return ret;
 }
 
index fef290c38db67ae8620a8ff92179275fb564730b..e3dbd5a552ca00c1349f2ea7f20fc629a7ddc5e5 100644 (file)
@@ -173,14 +173,12 @@ int usb_boot(struct usb_device *usbdev, u16 pid)
        filp = filp_open(img_name, O_RDONLY | O_LARGEFILE, 0);
        if (IS_ERR(filp)) {
                printk(KERN_ERR "Can't find %s.\n", img_name);
-               set_fs(fs);
                ret = PTR_ERR(filp);
                goto restore_fs;
        }
 
-       if (filp->f_dentry)
-               inode = filp->f_dentry->d_inode;
-       if (!inode || !S_ISREG(inode->i_mode)) {
+       inode = filp->f_dentry->d_inode;
+       if (!S_ISREG(inode->i_mode)) {
                printk(KERN_ERR "Invalid file type: %s\n", img_name);
                ret = -EINVAL;
                goto out;
@@ -262,7 +260,7 @@ int usb_boot(struct usb_device *usbdev, u16 pid)
                ret = -EINVAL;
        }
 out:
-       filp_close(filp, current->files);
+       filp_close(filp, NULL);
 
 restore_fs:
        set_fs(fs);
@@ -322,13 +320,11 @@ static int em_download_image(struct usb_device *usbdev, char *path,
                goto restore_fs;
        }
 
-       if (filp->f_dentry) {
-               inode = filp->f_dentry->d_inode;
-               if (!inode || !S_ISREG(inode->i_mode)) {
-                       printk(KERN_ERR "Invalid file type: %s\n", path);
-                       ret = -EINVAL;
-                       goto out;
-               }
+       inode = filp->f_dentry->d_inode;
+       if (!S_ISREG(inode->i_mode)) {
+               printk(KERN_ERR "Invalid file type: %s\n", path);
+               ret = -EINVAL;
+               goto out;
        }
 
        buf = kmalloc(DOWNLOAD_CHUCK + pad_size, GFP_KERNEL);
@@ -364,7 +360,7 @@ static int em_download_image(struct usb_device *usbdev, char *path,
                goto out;
 
 out:
-       filp_close(filp, current->files);
+       filp_close(filp, NULL);
 
 restore_fs:
        set_fs(fs);
index 9e2100551c789f70b033e420d0840b1f2e340f5d..cbb5aaf3e567f5d52f552cc940f3e9556ac8ee40 100644 (file)
@@ -109,46 +109,29 @@ static struct se_device *fd_create_virtdevice(
        struct se_subsystem_dev *se_dev,
        void *p)
 {
-       char *dev_p = NULL;
        struct se_device *dev;
        struct se_dev_limits dev_limits;
        struct queue_limits *limits;
        struct fd_dev *fd_dev = p;
        struct fd_host *fd_host = hba->hba_ptr;
-       mm_segment_t old_fs;
        struct file *file;
        struct inode *inode = NULL;
        int dev_flags = 0, flags, ret = -EINVAL;
 
        memset(&dev_limits, 0, sizeof(struct se_dev_limits));
 
-       old_fs = get_fs();
-       set_fs(get_ds());
-       dev_p = getname(fd_dev->fd_dev_name);
-       set_fs(old_fs);
-
-       if (IS_ERR(dev_p)) {
-               pr_err("getname(%s) failed: %lu\n",
-                       fd_dev->fd_dev_name, IS_ERR(dev_p));
-               ret = PTR_ERR(dev_p);
-               goto fail;
-       }
        /*
         * Use O_DSYNC by default instead of O_SYNC to forgo syncing
         * of pure timestamp updates.
         */
        flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC;
 
-       file = filp_open(dev_p, flags, 0600);
+       file = filp_open(fd_dev->fd_dev_name, flags, 0600);
        if (IS_ERR(file)) {
-               pr_err("filp_open(%s) failed\n", dev_p);
+               pr_err("filp_open(%s) failed\n", fd_dev->fd_dev_name);
                ret = PTR_ERR(file);
                goto fail;
        }
-       if (!file || !file->f_dentry) {
-               pr_err("filp_open(%s) failed\n", dev_p);
-               goto fail;
-       }
        fd_dev->fd_file = file;
        /*
         * If using a block backend with this struct file, we extract
@@ -212,14 +195,12 @@ static struct se_device *fd_create_virtdevice(
                " %llu total bytes\n", fd_host->fd_host_id, fd_dev->fd_dev_id,
                        fd_dev->fd_dev_name, fd_dev->fd_dev_size);
 
-       putname(dev_p);
        return dev;
 fail:
        if (fd_dev->fd_file) {
                filp_close(fd_dev->fd_file, NULL);
                fd_dev->fd_file = NULL;
        }
-       putname(dev_p);
        return ERR_PTR(ret);
 }
 
@@ -452,14 +433,11 @@ static ssize_t fd_set_configfs_dev_params(
                token = match_token(ptr, tokens, args);
                switch (token) {
                case Opt_fd_dev_name:
-                       arg_p = match_strdup(&args[0]);
-                       if (!arg_p) {
-                               ret = -ENOMEM;
+                       if (match_strlcpy(fd_dev->fd_dev_name, &args[0],
+                               FD_MAX_DEV_NAME) == 0) {
+                               ret = -EINVAL;
                                break;
                        }
-                       snprintf(fd_dev->fd_dev_name, FD_MAX_DEV_NAME,
-                                       "%s", arg_p);
-                       kfree(arg_p);
                        pr_debug("FILEIO: Referencing Path: %s\n",
                                        fd_dev->fd_dev_name);
                        fd_dev->fbd_flags |= FBDF_HAS_PATH;
index ae8b18869b8c0189b6ef2d2c68760cb12e2307e5..8d9bcd8207c8d1fee7e47813d7d6ee7f76ca0b6b 100644 (file)
@@ -656,9 +656,8 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
        if (!(filp->f_mode & FMODE_WRITE))
                ro = 1;
 
-       if (filp->f_path.dentry)
-               inode = filp->f_path.dentry->d_inode;
-       if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
+       inode = filp->f_path.dentry->d_inode;
+       if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
                LINFO(curlun, "invalid file type: %s\n", filename);
                goto out;
        }
@@ -667,7 +666,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
         * If we can't read the file, it's no good.
         * If we can't write the file, use it read-only.
         */
-       if (!filp->f_op || !(filp->f_op->read || filp->f_op->aio_read)) {
+       if (!(filp->f_op->read || filp->f_op->aio_read)) {
                LINFO(curlun, "file not readable: %s\n", filename);
                goto out;
        }
@@ -712,7 +711,6 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
        if (fsg_lun_is_open(curlun))
                fsg_lun_close(curlun);
 
-       get_file(filp);
        curlun->blksize = blksize;
        curlun->blkbits = blkbits;
        curlun->ro = ro;
@@ -720,10 +718,10 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
        curlun->file_length = size;
        curlun->num_sectors = num_sectors;
        LDBG(curlun, "open backing file: %s\n", filename);
-       rc = 0;
+       return 0;
 
 out:
-       filp_close(filp, current->files);
+       fput(filp);
        return rc;
 }
 
index af989898205989ca46e443b6e8d496d9c8ba459b..e0c5e88e03ed30dc02c533d0eae3a41b94bcb00b 100644 (file)
@@ -275,17 +275,17 @@ static int gaudio_close_snd_dev(struct gaudio *gau)
        /* Close control device */
        snd = &gau->control;
        if (snd->filp)
-               filp_close(snd->filp, current->files);
+               filp_close(snd->filp, NULL);
 
        /* Close PCM playback device and setup substream */
        snd = &gau->playback;
        if (snd->filp)
-               filp_close(snd->filp, current->files);
+               filp_close(snd->filp, NULL);
 
        /* Close PCM capture device and setup substream */
        snd = &gau->capture;
        if (snd->filp)
-               filp_close(snd->filp, current->files);
+               filp_close(snd->filp, NULL);
 
        return 0;
 }
index 1ddeb11659d4db9e0023f07d98e5c7c189e97535..64cda560c488358c9205d66e064657bd6ba7203e 100644 (file)
@@ -104,6 +104,8 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
        deferred framebuffer IO. then if userspace touches a page
        again, we repeat the same scheme */
 
+       file_update_time(vma->vm_file);
+
        /* protect against the workqueue changing the page list */
        mutex_lock(&fbdefio->lock);
 
index fc06fd27065eb3cade2f5e5f60f7155d3487ae80..dd6f7ee1e31258d94dfa1e038d2a272f17ecb541 100644 (file)
@@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
                 page, (unsigned long)filp->private_data);
 
+       /* Update file times before taking page lock */
+       file_update_time(filp);
+
        v9inode = V9FS_I(inode);
        /* make sure the cache has finished storing the page */
        v9fs_fscache_wait_on_page_write(inode, page);
index fadeba6a5db964c61ab7d795feb18f31347671c2..62e0cafd6e250d5d1717656d3d5821e2d1bfec42 100644 (file)
@@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg)
        struct btrfs_root *root = arg;
 
        do {
-               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
-
                if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
                    mutex_trylock(&root->fs_info->cleaner_mutex)) {
                        btrfs_run_delayed_iputs(root);
@@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg)
        do {
                cannot_commit = false;
                delay = HZ * 30;
-               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                mutex_lock(&root->fs_info->transaction_kthread_mutex);
 
                spin_lock(&root->fs_info->trans_lock);
index 9aa01ec2138d466f3d1726ccd6291d054c5e5c98..5caf285c6e4d0f1cb7adf82d8af911ab614a807a 100644 (file)
@@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        ssize_t err = 0;
        size_t count, ocount;
 
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+       sb_start_write(inode->i_sb);
 
        mutex_lock(&inode->i_mutex);
 
@@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                        num_written = err;
        }
 out:
+       sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
 }
index 48bdfd2591c2bb4c23fd141b66dd555a37ef6cc7..83baec24946d7449194e94893f92f918b14c79b5 100644 (file)
@@ -6629,6 +6629,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 page_start;
        u64 page_end;
 
+       sb_start_pagefault(inode->i_sb);
        ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
        if (!ret) {
                ret = file_update_time(vma->vm_file);
@@ -6718,12 +6719,15 @@ again:
        unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
 
 out_unlock:
-       if (!ret)
+       if (!ret) {
+               sb_end_pagefault(inode->i_sb);
                return VM_FAULT_LOCKED;
+       }
        unlock_page(page);
 out:
        btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
 out_noreserve:
+       sb_end_pagefault(inode->i_sb);
        return ret;
 }
 
index 43f0012016e3e6291cb79579a60940f52ec94a9c..bc2f6ffff3cfb004099a5d4eb84f8a9aa688d7cd 100644 (file)
@@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (!inode_owner_or_capable(inode))
                return -EACCES;
 
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+
        mutex_lock(&inode->i_mutex);
 
        ip_oldflags = ip->flags;
@@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                }
        }
 
-       ret = mnt_want_write_file(file);
-       if (ret)
-               goto out_unlock;
-
        if (flags & FS_SYNC_FL)
                ip->flags |= BTRFS_INODE_SYNC;
        else
@@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                inode->i_flags = i_oldflags;
        }
 
-       mnt_drop_write_file(file);
  out_unlock:
        mutex_unlock(&inode->i_mutex);
+       mnt_drop_write_file(file);
        return ret;
 }
 
@@ -664,6 +664,10 @@ static noinline int btrfs_mksubvol(struct path *parent,
        struct dentry *dentry;
        int error;
 
+       error = mnt_want_write(parent->mnt);
+       if (error)
+               return error;
+
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 
        dentry = lookup_one_len(name, parent->dentry, namelen);
@@ -699,6 +703,7 @@ out_dput:
        dput(dentry);
 out_unlock:
        mutex_unlock(&dir->i_mutex);
+       mnt_drop_write(parent->mnt);
        return error;
 }
 
index 7ac7cdcc294e5dc561bc6916efec59dacf320884..17be3dedacbab1c47084270153ed059719984470 100644 (file)
@@ -335,6 +335,8 @@ again:
        if (!h)
                return ERR_PTR(-ENOMEM);
 
+       sb_start_intwrite(root->fs_info->sb);
+
        if (may_wait_transaction(root, type))
                wait_current_trans(root);
 
@@ -345,6 +347,7 @@ again:
        } while (ret == -EBUSY);
 
        if (ret < 0) {
+               sb_end_intwrite(root->fs_info->sb);
                kmem_cache_free(btrfs_trans_handle_cachep, h);
                return ERR_PTR(ret);
        }
@@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        btrfs_trans_release_metadata(trans, root);
        trans->block_rsv = NULL;
 
+       sb_end_intwrite(root->fs_info->sb);
+
        if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
            should_end_transaction(trans, root)) {
                trans->transaction->blocked = 1;
@@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
+       sb_end_intwrite(root->fs_info->sb);
+
        trace_btrfs_transaction_commit(root);
 
        btrfs_scrub_continue(root);
index c7062c896d7c20489f41ea838640a5cfa5387ca9..9f6d2e41281d69752f77d9c68772a46c855a8453 100644 (file)
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
  *
- * Direct callers of this function should call vfs_check_frozen() so that page
- * fault does not busyloop until the fs is thawed.
+ * Direct callers of this function should protect against filesystem freezing
+ * using sb_start_write() - sb_end_write() functions.
  */
 int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                         get_block_t get_block)
@@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        loff_t size;
        int ret;
 
+       /*
+        * Update file times before taking page lock. We may end up failing the
+        * fault so this update may be superfluous but who really cares...
+        */
+       file_update_time(vma->vm_file);
+
        lock_page(page);
        size = i_size_read(inode);
        if ((page->mapping != inode->i_mapping) ||
@@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 
        if (unlikely(ret < 0))
                goto out_unlock;
-       /*
-        * Freezing in progress? We check after the page is marked dirty and
-        * with page lock held so if the test here fails, we are sure freezing
-        * code will wait during syncing until the page fault is done - at that
-        * point page will be dirty and unlocked so freezing code will write it
-        * and writeprotect it again.
-        */
        set_page_dirty(page);
-       if (inode->i_sb->s_frozen != SB_UNFROZEN) {
-               ret = -EAGAIN;
-               goto out_unlock;
-       }
        wait_on_page_writeback(page);
        return 0;
 out_unlock:
@@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        int ret;
        struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
 
-       /*
-        * This check is racy but catches the common case. The check in
-        * __block_page_mkwrite() is reliable.
-        */
-       vfs_check_frozen(sb, SB_FREEZE_WRITE);
+       sb_start_pagefault(sb);
        ret = __block_page_mkwrite(vma, vmf, get_block);
+       sb_end_pagefault(sb);
        return block_page_mkwrite_return(ret);
 }
 EXPORT_SYMBOL(block_page_mkwrite);
index 8b67304e4b8079efe80eecd716fb1e98b5486842..452e71a1b75388b2dc06d3ad1665f36a1ed1176f 100644 (file)
@@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        loff_t size, len;
        int ret;
 
+       /* Update time before taking page lock */
+       file_update_time(vma->vm_file);
+
        size = i_size_read(inode);
        if (off + PAGE_CACHE_SIZE <= size)
                len = PAGE_CACHE_SIZE;
index ffa2be57804dddcd21b89a712cc9107d027b3c3b..c3ca12c33ca2aa3c504bcc3e415595d72b96c5d4 100644 (file)
@@ -318,21 +318,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
        struct vfsmount *lower_mnt;
        int rc = 0;
 
-       lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
-       fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
-       BUG_ON(!lower_dentry->d_count);
-
        dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
-       ecryptfs_set_dentry_private(dentry, dentry_info);
        if (!dentry_info) {
                printk(KERN_ERR "%s: Out of memory whilst attempting "
                       "to allocate ecryptfs_dentry_info struct\n",
                        __func__);
                dput(lower_dentry);
-               mntput(lower_mnt);
-               d_drop(dentry);
                return -ENOMEM;
        }
+
+       lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
+       fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
+       BUG_ON(!lower_dentry->d_count);
+
+       ecryptfs_set_dentry_private(dentry, dentry_info);
        ecryptfs_set_dentry_lower(dentry, lower_dentry);
        ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
 
@@ -381,12 +380,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        struct dentry *lower_dir_dentry, *lower_dentry;
        int rc = 0;
 
-       if ((ecryptfs_dentry->d_name.len == 1
-            && !strcmp(ecryptfs_dentry->d_name.name, "."))
-           || (ecryptfs_dentry->d_name.len == 2
-               && !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
-               goto out_d_drop;
-       }
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
@@ -397,8 +390,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
                                "[%d] on lower_dentry = [%s]\n", __func__, rc,
-                               encrypted_and_encoded_name);
-               goto out_d_drop;
+                               ecryptfs_dentry->d_name.name);
+               goto out;
        }
        if (lower_dentry->d_inode)
                goto interpose;
@@ -415,7 +408,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        if (rc) {
                printk(KERN_ERR "%s: Error attempting to encrypt and encode "
                       "filename; rc = [%d]\n", __func__, rc);
-               goto out_d_drop;
+               goto out;
        }
        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
@@ -427,14 +420,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
                                "[%d] on lower_dentry = [%s]\n", __func__, rc,
                                encrypted_and_encoded_name);
-               goto out_d_drop;
+               goto out;
        }
 interpose:
        rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry,
                                       ecryptfs_dir_inode);
-       goto out;
-out_d_drop:
-       d_drop(ecryptfs_dentry);
 out:
        kfree(encrypted_and_encoded_name);
        return ERR_PTR(rc);
index 3684353ebd5f612a82f7e8981fb2ce80441e8565..574cf4de4ec38ba2c64115ca85d78e80f0e31dd2 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file)
  */
 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
-       struct file *rp, *wp;
+       struct file *files[2];
        struct fdtable *fdt;
        struct coredump_params *cp = (struct coredump_params *)info->data;
        struct files_struct *cf = current->files;
+       int err = create_pipe_files(files, 0);
+       if (err)
+               return err;
 
-       wp = create_write_pipe(0);
-       if (IS_ERR(wp))
-               return PTR_ERR(wp);
-
-       rp = create_read_pipe(wp, 0);
-       if (IS_ERR(rp)) {
-               free_write_pipe(wp);
-               return PTR_ERR(rp);
-       }
-
-       cp->file = wp;
+       cp->file = files[1];
 
        sys_close(0);
-       fd_install(0, rp);
+       fd_install(0, files[0]);
        spin_lock(&cf->file_lock);
        fdt = files_fdtable(cf);
        __set_open_fd(0, fdt);
index 264d315f6c4753d6bee17f28ea86c6497b560449..6363ac66fafa48c1c697d993a948fa1a5ff7cf78 100644 (file)
@@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode)
        truncate_inode_pages(&inode->i_data, 0);
 
        if (want_delete) {
+               sb_start_intwrite(inode->i_sb);
                /* set dtime */
                EXT2_I(inode)->i_dtime  = get_seconds();
                mark_inode_dirty(inode);
@@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode)
        if (unlikely(rsv))
                kfree(rsv);
 
-       if (want_delete)
+       if (want_delete) {
                ext2_free_inode(inode);
+               sb_end_intwrite(inode->i_sb);
+       }
 }
 
 typedef struct {
index 9f311d27b16f5a804150e5cce66ce0b1724e7cb8..af74d9e27b71b0cf569cac645e362c174c44389e 100644 (file)
@@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb,
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
 static int ext2_sync_fs(struct super_block *sb, int wait);
+static int ext2_freeze(struct super_block *sb);
+static int ext2_unfreeze(struct super_block *sb);
 
 void ext2_error(struct super_block *sb, const char *function,
                const char *fmt, ...)
@@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = {
        .evict_inode    = ext2_evict_inode,
        .put_super      = ext2_put_super,
        .sync_fs        = ext2_sync_fs,
+       .freeze_fs      = ext2_freeze,
+       .unfreeze_fs    = ext2_unfreeze,
        .statfs         = ext2_statfs,
        .remount_fs     = ext2_remount,
        .show_options   = ext2_show_options,
@@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
        return 0;
 }
 
+static int ext2_freeze(struct super_block *sb)
+{
+       struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+       /*
+        * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared
+        * because we have unattached inodes and thus filesystem is not fully
+        * consistent.
+        */
+       if (atomic_long_read(&sb->s_remove_count)) {
+               ext2_sync_fs(sb, 1);
+               return 0;
+       }
+       /* Set EXT2_FS_VALID flag */
+       spin_lock(&sbi->s_lock);
+       sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state);
+       spin_unlock(&sbi->s_lock);
+       ext2_sync_super(sb, sbi->s_es, 1);
+
+       return 0;
+}
+
+static int ext2_unfreeze(struct super_block *sb)
+{
+       /* Just write sb to clear EXT2_VALID_FS flag */
+       ext2_write_super(sb);
+
+       return 0;
+}
 
 void ext2_write_super(struct super_block *sb)
 {
index 89b59cb7f9b89cc47003295db7c2d69766e62b97..6324f74e03424e9ac21ceb01f6fbee07caa546df 100644 (file)
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode)
        if (is_bad_inode(inode))
                goto no_delete;
 
+       /*
+        * Protect us against freezing - iput() caller didn't have to have any
+        * protection against it
+        */
+       sb_start_intwrite(inode->i_sb);
        handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
        if (IS_ERR(handle)) {
                ext4_std_error(inode->i_sb, PTR_ERR(handle));
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode)
                 * cleaned up.
                 */
                ext4_orphan_del(NULL, inode);
+               sb_end_intwrite(inode->i_sb);
                goto no_delete;
        }
 
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode)
                stop_handle:
                        ext4_journal_stop(handle);
                        ext4_orphan_del(NULL, inode);
+                       sb_end_intwrite(inode->i_sb);
                        goto no_delete;
                }
        }
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode)
        else
                ext4_free_inode(handle, inode);
        ext4_journal_stop(handle);
+       sb_end_intwrite(inode->i_sb);
        return;
 no_delete:
        ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
@@ -4779,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        get_block_t *get_block;
        int retries = 0;
 
-       /*
-        * This check is racy but catches the common case. We rely on
-        * __block_page_mkwrite() to do a reliable check.
-        */
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+       sb_start_pagefault(inode->i_sb);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -4851,5 +4855,6 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+       sb_end_pagefault(inode->i_sb);
        return ret;
 }
index f99a1311e84765296b0a0a04534e0be0536915bc..fe7c63f4717e09616b92f7d3b4850ab99671cff6 100644 (file)
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
 {
        struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
 
+       /*
+        * We protect against freezing so that we don't create dirty buffers
+        * on frozen filesystem.
+        */
+       sb_start_write(sb);
        ext4_mmp_csum_set(sb, mmp);
        mark_buffer_dirty(bh);
        lock_buffer(bh);
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
        get_bh(bh);
        submit_bh(WRITE_SYNC, bh);
        wait_on_buffer(bh);
+       sb_end_write(sb);
        if (unlikely(!buffer_uptodate(bh)))
                return 1;
 
index 2d51cd9af22559394e1aa9ce303922dd64ccc083..d76ec8277d3fca660e619d097711cc0057349823 100644 (file)
@@ -331,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle)
  * journal_end calls result in the superblock being marked dirty, so
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
  */
 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 {
        journal_t *journal;
-       handle_t  *handle;
 
        trace_ext4_journal_start(sb, nblocks, _RET_IP_);
        if (sb->s_flags & MS_RDONLY)
                return ERR_PTR(-EROFS);
 
+       WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
        journal = EXT4_SB(sb)->s_journal;
-       handle = ext4_journal_current_handle();
-
-       /*
-        * If a handle has been started, it should be allowed to
-        * finish, otherwise deadlock could happen between freeze
-        * and others(e.g. truncate) due to the restart of the
-        * journal handle if the filesystem is forzen and active
-        * handles are not stopped.
-        */
-       if (!handle)
-               vfs_check_frozen(sb, SB_FREEZE_TRANS);
-
        if (!journal)
                return ext4_get_nojournal();
        /*
@@ -2747,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
        sb = elr->lr_super;
        ngroups = EXT4_SB(sb)->s_groups_count;
 
+       sb_start_write(sb);
        for (group = elr->lr_next_group; group < ngroups; group++) {
                gdp = ext4_get_group_desc(sb, group, NULL);
                if (!gdp) {
@@ -2773,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
                elr->lr_next_sched = jiffies + elr->lr_timeout;
                elr->lr_next_group = group + 1;
        }
+       sb_end_write(sb);
 
        return ret;
 }
@@ -4460,10 +4446,8 @@ int ext4_force_commit(struct super_block *sb)
                return 0;
 
        journal = EXT4_SB(sb)->s_journal;
-       if (journal) {
-               vfs_check_frozen(sb, SB_FREEZE_TRANS);
+       if (journal)
                ret = ext4_journal_force_commit(journal);
-       }
 
        return ret;
 }
@@ -4493,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
  * gives us a chance to flush the journal completely and mark the fs clean.
  *
  * Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
- * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
- * the upper layer.
+ * state independently. It relies on upper layer to stop all data & metadata
+ * modifications.
  */
 static int ext4_freeze(struct super_block *sb)
 {
@@ -4522,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb)
        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        error = ext4_commit_super(sb, 1);
 out:
-       /* we rely on s_frozen to stop further updates */
+       /* we rely on upper layer to stop further updates */
        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        return error;
 }
index a71fe3715ee818b016fdc956e061399d734725d0..e007b8bd8e5ec1d93f29c65f877e083d3ec10b6e 100644 (file)
@@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        if (err)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
        err = mnt_want_write_file(file);
        if (err)
-               goto out_unlock_inode;
+               goto out;
+       mutex_lock(&inode->i_mutex);
 
        /*
         * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        /* The root directory has no attributes */
        if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
                err = -EINVAL;
-               goto out_drop_write;
+               goto out_unlock_inode;
        }
 
        if (sbi->options.sys_immutable &&
            ((attr | oldattr) & ATTR_SYS) &&
            !capable(CAP_LINUX_IMMUTABLE)) {
                err = -EPERM;
-               goto out_drop_write;
+               goto out_unlock_inode;
        }
 
        /*
@@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
         */
        err = security_inode_setattr(file->f_path.dentry, &ia);
        if (err)
-               goto out_drop_write;
+               goto out_unlock_inode;
 
        /* This MUST be done before doing anything irreversible... */
        err = fat_setattr(file->f_path.dentry, &ia);
        if (err)
-               goto out_drop_write;
+               goto out_unlock_inode;
 
        fsnotify_change(file->f_path.dentry, ia.ia_valid);
        if (sbi->options.sys_immutable) {
@@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
 
        fat_save_attrs(inode, attr);
        mark_inode_dirty(inode);
-out_drop_write:
-       mnt_drop_write_file(file);
 out_unlock_inode:
        mutex_unlock(&inode->i_mutex);
+       mnt_drop_write_file(file);
 out:
        return err;
 }
index b3fc4d67a26b31243474520bfbcddacce3733ded..701985e4ccda4fc5afc05977f511bd6ca11ba3af 100644 (file)
@@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly;
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
-static inline void file_free_rcu(struct rcu_head *head)
+static void file_free_rcu(struct rcu_head *head)
 {
        struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
 
@@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file)
                return;
        if (file_check_writeable(file) != 0)
                return;
-       mnt_drop_write(mnt);
+       __mnt_drop_write(mnt);
        file_release_write(file);
 }
 
index b321a688cde79aa0f2923f3440330d03f0e5ee1e..93d8d6c9494dbd5737b0842a3ebb55ef972e0d87 100644 (file)
@@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                return err;
 
        count = ocount;
-
+       sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
@@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 out:
        current->backing_dev_info = NULL;
        mutex_unlock(&inode->i_mutex);
+       sb_end_write(inode->i_sb);
 
        return written ? written : err;
 }
index 9aa6af13823c5b75d3e8d2723d281dc79cee7a47..d1d791ef38de2188852551254a63f974a605feff 100644 (file)
@@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        loff_t size;
        int ret;
 
-       /* Wait if fs is frozen. This is racy so we check again later on
-        * and retry if the fs has been frozen after the page lock has
-        * been acquired
-        */
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+       sb_start_pagefault(inode->i_sb);
+
+       /* Update file times before taking page lock */
+       file_update_time(vma->vm_file);
 
        ret = gfs2_rs_alloc(ip);
        if (ret)
@@ -462,14 +461,9 @@ out:
        gfs2_holder_uninit(&gh);
        if (ret == 0) {
                set_page_dirty(page);
-               /* This check must be post dropping of transaction lock */
-               if (inode->i_sb->s_frozen == SB_UNFROZEN) {
-                       wait_on_page_writeback(page);
-               } else {
-                       ret = -EAGAIN;
-                       unlock_page(page);
-               }
+               wait_on_page_writeback(page);
        }
+       sb_end_pagefault(inode->i_sb);
        return block_page_mkwrite_return(ret);
 }
 
index ad3e2fb763d73792d048ac0c86915be90653e1f3..adbd27875ef957e5b54846d8494865d51ff156be 100644 (file)
@@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
        if (revokes)
                tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
                                                   sizeof(u64));
+       sb_start_intwrite(sdp->sd_vfs);
        gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
 
        error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -68,6 +69,7 @@ fail_gunlock:
        gfs2_glock_dq(&tr->tr_t_gh);
 
 fail_holder_uninit:
+       sb_end_intwrite(sdp->sd_vfs);
        gfs2_holder_uninit(&tr->tr_t_gh);
        kfree(tr);
 
@@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
                        gfs2_holder_uninit(&tr->tr_t_gh);
                        kfree(tr);
                }
+               sb_end_intwrite(sdp->sd_vfs);
                return;
        }
 
@@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 
        if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
                gfs2_log_flush(sdp, NULL);
+       sb_end_intwrite(sdp->sd_vfs);
 }
 
 /**
index 3cc50432046796df30834106966c4a295294b7fb..ac8d904b3f1624bfa945ba088d2909e740e16a6c 100644 (file)
@@ -1542,9 +1542,11 @@ void touch_atime(struct path *path)
        if (timespec_equal(&inode->i_atime, &now))
                return;
 
-       if (mnt_want_write(mnt))
+       if (!sb_start_write_trylock(inode->i_sb))
                return;
 
+       if (__mnt_want_write(mnt))
+               goto skip_update;
        /*
         * File systems can error out when updating inodes if they need to
         * allocate new space to modify an inode (such is the case for
@@ -1555,7 +1557,9 @@ void touch_atime(struct path *path)
         * of the fs read only, e.g. subvolumes in Btrfs.
         */
        update_time(inode, &now, S_ATIME);
-       mnt_drop_write(mnt);
+       __mnt_drop_write(mnt);
+skip_update:
+       sb_end_write(inode->i_sb);
 }
 EXPORT_SYMBOL(touch_atime);
 
@@ -1662,11 +1666,11 @@ int file_update_time(struct file *file)
                return 0;
 
        /* Finally allowed to write? Takes lock. */
-       if (mnt_want_write_file(file))
+       if (__mnt_want_write_file(file))
                return 0;
 
        ret = update_time(inode, &now, sync_it);
-       mnt_drop_write_file(file);
+       __mnt_drop_write_file(file);
 
        return ret;
 }
index a6fd56c68b1164928d380fdc787a988107c15a55..371bcc4b1697df808e506a6f88735209dec17dd8 100644 (file)
@@ -61,6 +61,10 @@ extern void __init mnt_init(void);
 
 extern struct lglock vfsmount_lock;
 
+extern int __mnt_want_write(struct vfsmount *);
+extern int __mnt_want_write_file(struct file *);
+extern void __mnt_drop_write(struct vfsmount *);
+extern void __mnt_drop_write_file(struct file *);
 
 /*
  * fs_struct.c
index 8392cb85bd54401d27028f0ed2932dceb4dd98bf..05d29124c6ab4150c9a5381cbc98b9a472c65b25 100644 (file)
@@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
        struct nlm_rqst         *call;
        int                     status;
 
-       nlm_get_host(host);
        call = nlm_alloc_call(host);
        if (call == NULL)
                return -ENOMEM;
 
        nlmclnt_locks_init_private(fl, host);
+       if (!fl->fl_u.nfs_fl.owner) {
+               /* lockowner allocation has failed */
+               nlmclnt_release_call(call);
+               return -ENOMEM;
+       }
        /* Set up the argument struct */
        nlmclnt_setlockargs(call, fl);
 
@@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
- *
- * Note: the caller must hold a reference to host. In case of failure,
- * this reference will be released.
  */
 struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
@@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
                        atomic_set(&call->a_count, 1);
                        locks_init_lock(&call->a_args.lock.fl);
                        locks_init_lock(&call->a_res.lock.fl);
-                       call->a_host = host;
+                       call->a_host = nlm_get_host(host);
                        return call;
                }
                if (signalled())
@@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
                printk("nlm_alloc_call: failed, waiting for memory\n");
                schedule_timeout_interruptible(5*HZ);
        }
-       nlmclnt_release_host(host);
        return NULL;
 }
 
@@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
        dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
                "       Attempting to cancel lock.\n");
 
-       req = nlm_alloc_call(nlm_get_host(host));
+       req = nlm_alloc_call(host);
        if (!req)
                return -ENOMEM;
        req->a_flags = RPC_TASK_ASYNC;
index 4a43d253c045d0c50f1ddf63eabffc133774c314..b147d1ae71fd9ec3c21ea69f847be1b2b047896d 100644 (file)
@@ -257,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
                return rpc_system_err;
 
        call = nlm_alloc_call(host);
+       nlmsvc_release_host(host);
        if (call == NULL)
                return rpc_system_err;
 
index afe4488c33d886a3c1bf58c1b827c05f09c3aa77..fb1a2bedbe9789a8fcca5618b4637d70b25fb8ab 100644 (file)
@@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
        struct nlm_block        *block;
        struct nlm_rqst         *call = NULL;
 
-       nlm_get_host(host);
        call = nlm_alloc_call(host);
        if (call == NULL)
                return NULL;
index de8f2caa22359f8fa11dbd178fc1e78c5f1cc7ef..3009a365e082e37764ea817d070165c3b828146b 100644 (file)
@@ -297,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
                return rpc_system_err;
 
        call = nlm_alloc_call(host);
+       nlmsvc_release_host(host);
        if (call == NULL)
                return rpc_system_err;
 
index 2ccc35c4dc24d95e2a5c1bf5528528fa6f22827d..1b464390dde85ecd783eb45f64aee1f89f6c3ff1 100644 (file)
@@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
        path_put(link);
 }
 
+int sysctl_protected_symlinks __read_mostly = 1;
+int sysctl_protected_hardlinks __read_mostly = 1;
+
+/**
+ * may_follow_link - Check symlink following for unsafe situations
+ * @link: The path of the symlink
+ *
+ * In the case of the sysctl_protected_symlinks sysctl being enabled,
+ * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
+ * in a sticky world-writable directory. This is to protect privileged
+ * processes from failing races against path names that may change out
+ * from under them by way of other users creating malicious symlinks.
+ * It will permit symlinks to be followed only when outside a sticky
+ * world-writable directory, or when the uid of the symlink and follower
+ * match, or when the directory owner matches the symlink's owner.
+ *
+ * Returns 0 if following the symlink is allowed, -ve on error.
+ */
+static inline int may_follow_link(struct path *link, struct nameidata *nd)
+{
+       const struct inode *inode;
+       const struct inode *parent;
+
+       if (!sysctl_protected_symlinks)
+               return 0;
+
+       /* Allowed if owner and follower match. */
+       inode = link->dentry->d_inode;
+       if (current_cred()->fsuid == inode->i_uid)
+               return 0;
+
+       /* Allowed if parent directory not sticky and world-writable. */
+       parent = nd->path.dentry->d_inode;
+       if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
+               return 0;
+
+       /* Allowed if parent directory and link owner match. */
+       if (parent->i_uid == inode->i_uid)
+               return 0;
+
+       path_put_conditional(link, nd);
+       path_put(&nd->path);
+       audit_log_link_denied("follow_link", link);
+       return -EACCES;
+}
+
+/**
+ * safe_hardlink_source - Check for safe hardlink conditions
+ * @inode: the source inode to hardlink from
+ *
+ * Return false if at least one of the following conditions:
+ *    - inode is not a regular file
+ *    - inode is setuid
+ *    - inode is setgid and group-exec
+ *    - access failure for read and write
+ *
+ * Otherwise returns true.
+ */
+static bool safe_hardlink_source(struct inode *inode)
+{
+       umode_t mode = inode->i_mode;
+
+       /* Special files should not get pinned to the filesystem. */
+       if (!S_ISREG(mode))
+               return false;
+
+       /* Setuid files should not get pinned to the filesystem. */
+       if (mode & S_ISUID)
+               return false;
+
+       /* Executable setgid files should not get pinned to the filesystem. */
+       if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
+               return false;
+
+       /* Hardlinking to unreadable or unwritable sources is dangerous. */
+       if (inode_permission(inode, MAY_READ | MAY_WRITE))
+               return false;
+
+       return true;
+}
+
+/**
+ * may_linkat - Check permissions for creating a hardlink
+ * @link: the source to hardlink from
+ *
+ * Block hardlink when all of:
+ *  - sysctl_protected_hardlinks enabled
+ *  - fsuid does not match inode
+ *  - hardlink source is unsafe (see safe_hardlink_source() above)
+ *  - not CAP_FOWNER
+ *
+ * Returns 0 if successful, -ve on error.
+ */
+static int may_linkat(struct path *link)
+{
+       const struct cred *cred;
+       struct inode *inode;
+
+       if (!sysctl_protected_hardlinks)
+               return 0;
+
+       cred = current_cred();
+       inode = link->dentry->d_inode;
+
+       /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
+        * otherwise, it must be a safe source.
+        */
+       if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) ||
+           capable(CAP_FOWNER))
+               return 0;
+
+       audit_log_link_denied("linkat", link);
+       return -EPERM;
+}
+
 static __always_inline int
 follow_link(struct path *link, struct nameidata *nd, void **p)
 {
@@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name,
                while (err > 0) {
                        void *cookie;
                        struct path link = path;
+                       err = may_follow_link(&link, nd);
+                       if (unlikely(err))
+                               break;
                        nd->flags |= LOOKUP_PARENT;
                        err = follow_link(&link, nd, &cookie);
                        if (err)
@@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 static int atomic_open(struct nameidata *nd, struct dentry *dentry,
                        struct path *path, struct file *file,
                        const struct open_flags *op,
-                       bool *want_write, bool need_lookup,
+                       bool got_write, bool need_lookup,
                        int *opened)
 {
        struct inode *dir =  nd->path.dentry->d_inode;
@@ -2300,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
        if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
                mode &= ~current_umask();
 
-       if (open_flag & O_EXCL) {
+       if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
                open_flag &= ~O_TRUNC;
                *opened |= FILE_CREATED;
        }
@@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
         * Another problem is returing the "right" error value (e.g. for an
         * O_EXCL open we want to return EEXIST not EROFS).
         */
-       if ((open_flag & (O_CREAT | O_TRUNC)) ||
-           (open_flag & O_ACCMODE) != O_RDONLY) {
-               error = mnt_want_write(nd->path.mnt);
-               if (!error) {
-                       *want_write = true;
-               } else if (!(open_flag & O_CREAT)) {
+       if (((open_flag & (O_CREAT | O_TRUNC)) ||
+           (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
+               if (!(open_flag & O_CREAT)) {
                        /*
                         * No O_CREATE -> atomicity not a requirement -> fall
                         * back to lookup + open
@@ -2327,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
                        goto no_open;
                } else if (open_flag & (O_EXCL | O_TRUNC)) {
                        /* Fall back and fail with the right error */
-                       create_error = error;
+                       create_error = -EROFS;
                        goto no_open;
                } else {
                        /* No side effects, safe to clear O_CREAT */
-                       create_error = error;
+                       create_error = -EROFS;
                        open_flag &= ~O_CREAT;
                }
        }
@@ -2438,7 +2553,7 @@ looked_up:
 static int lookup_open(struct nameidata *nd, struct path *path,
                        struct file *file,
                        const struct open_flags *op,
-                       bool *want_write, int *opened)
+                       bool got_write, int *opened)
 {
        struct dentry *dir = nd->path.dentry;
        struct inode *dir_inode = dir->d_inode;
@@ -2456,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
                goto out_no_open;
 
        if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
-               return atomic_open(nd, dentry, path, file, op, want_write,
+               return atomic_open(nd, dentry, path, file, op, got_write,
                                   need_lookup, opened);
        }
 
@@ -2480,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path,
                 * a permanent write count is taken through
                 * the 'struct file' in finish_open().
                 */
-               error = mnt_want_write(nd->path.mnt);
-               if (error)
+               if (!got_write) {
+                       error = -EROFS;
                        goto out_dput;
-               *want_write = true;
+               }
                *opened |= FILE_CREATED;
                error = security_path_mknod(&nd->path, dentry, mode, 0);
                if (error)
@@ -2513,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path,
        struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
        bool will_truncate = (open_flag & O_TRUNC) != 0;
-       bool want_write = false;
+       bool got_write = false;
        int acc_mode = op->acc_mode;
        struct inode *inode;
        bool symlink_ok = false;
@@ -2582,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path,
        }
 
 retry_lookup:
+       if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
+               error = mnt_want_write(nd->path.mnt);
+               if (!error)
+                       got_write = true;
+               /*
+                * do _not_ fail yet - we might not need that or fail with
+                * a different error; let lookup_open() decide; we'll be
+                * dropping this one anyway.
+                */
+       }
        mutex_lock(&dir->d_inode->i_mutex);
-       error = lookup_open(nd, path, file, op, &want_write, opened);
+       error = lookup_open(nd, path, file, op, got_write, opened);
        mutex_unlock(&dir->d_inode->i_mutex);
 
        if (error <= 0) {
@@ -2608,22 +2733,23 @@ retry_lookup:
        }
 
        /*
-        * It already exists.
+        * create/update audit record if it already exists.
         */
-       audit_inode(pathname, path->dentry);
+       if (path->dentry->d_inode)
+               audit_inode(pathname, path->dentry);
 
        /*
         * If atomic_open() acquired write access it is dropped now due to
         * possible mount and symlink following (this might be optimized away if
         * necessary...)
         */
-       if (want_write) {
+       if (got_write) {
                mnt_drop_write(nd->path.mnt);
-               want_write = false;
+               got_write = false;
        }
 
        error = -EEXIST;
-       if (open_flag & O_EXCL)
+       if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
                goto exit_dput;
 
        error = follow_managed(path, nd->flags);
@@ -2684,7 +2810,7 @@ finish_open:
                error = mnt_want_write(nd->path.mnt);
                if (error)
                        goto out;
-               want_write = true;
+               got_write = true;
        }
 finish_open_created:
        error = may_open(&nd->path, acc_mode, open_flag);
@@ -2711,7 +2837,7 @@ opened:
                        goto exit_fput;
        }
 out:
-       if (want_write)
+       if (got_write)
                mnt_drop_write(nd->path.mnt);
        path_put(&save_parent);
        terminate_walk(nd);
@@ -2735,9 +2861,9 @@ stale_open:
        nd->inode = dir->d_inode;
        save_parent.mnt = NULL;
        save_parent.dentry = NULL;
-       if (want_write) {
+       if (got_write) {
                mnt_drop_write(nd->path.mnt);
-               want_write = false;
+               got_write = false;
        }
        retried = true;
        goto retry_lookup;
@@ -2777,6 +2903,9 @@ static struct file *path_openat(int dfd, const char *pathname,
                        error = -ELOOP;
                        break;
                }
+               error = may_follow_link(&link, nd);
+               if (unlikely(error))
+                       break;
                nd->flags |= LOOKUP_PARENT;
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
                error = follow_link(&link, nd, &cookie);
@@ -2846,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
 {
        struct dentry *dentry = ERR_PTR(-EEXIST);
        struct nameidata nd;
+       int err2;
        int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
        if (error)
                return ERR_PTR(error);
@@ -2859,16 +2989,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
        nd.flags &= ~LOOKUP_PARENT;
        nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
 
+       /* don't fail immediately if it's r/o, at least try to report other errors */
+       err2 = mnt_want_write(nd.path.mnt);
        /*
         * Do the final lookup.
         */
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
        if (IS_ERR(dentry))
-               goto fail;
+               goto unlock;
 
+       error = -EEXIST;
        if (dentry->d_inode)
-               goto eexist;
+               goto fail;
        /*
         * Special case - lookup gave negative, but... we had foo/bar/
         * From the vfs_mknod() POV we just have a negative dentry -
@@ -2876,23 +3009,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
         * been asking for (non-existent) directory. -ENOENT for you.
         */
        if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
-               dput(dentry);
-               dentry = ERR_PTR(-ENOENT);
+               error = -ENOENT;
+               goto fail;
+       }
+       if (unlikely(err2)) {
+               error = err2;
                goto fail;
        }
        *path = nd.path;
        return dentry;
-eexist:
-       dput(dentry);
-       dentry = ERR_PTR(-EEXIST);
 fail:
+       dput(dentry);
+       dentry = ERR_PTR(error);
+unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+       if (!err2)
+               mnt_drop_write(nd.path.mnt);
 out:
        path_put(&nd.path);
        return dentry;
 }
 EXPORT_SYMBOL(kern_path_create);
 
+void done_path_create(struct path *path, struct dentry *dentry)
+{
+       dput(dentry);
+       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       mnt_drop_write(path->mnt);
+       path_put(path);
+}
+EXPORT_SYMBOL(done_path_create);
+
 struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
 {
        char *tmp = getname(pathname);
@@ -2956,8 +3103,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
        struct path path;
        int error;
 
-       if (S_ISDIR(mode))
-               return -EPERM;
+       error = may_mknod(mode);
+       if (error)
+               return error;
 
        dentry = user_path_create(dfd, filename, &path, 0);
        if (IS_ERR(dentry))
@@ -2965,15 +3113,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
 
        if (!IS_POSIXACL(path.dentry->d_inode))
                mode &= ~current_umask();
-       error = may_mknod(mode);
-       if (error)
-               goto out_dput;
-       error = mnt_want_write(path.mnt);
-       if (error)
-               goto out_dput;
        error = security_path_mknod(&path, dentry, mode, dev);
        if (error)
-               goto out_drop_write;
+               goto out;
        switch (mode & S_IFMT) {
                case 0: case S_IFREG:
                        error = vfs_create(path.dentry->d_inode,dentry,mode,true);
@@ -2986,13 +3128,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
                        error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
                        break;
        }
-out_drop_write:
-       mnt_drop_write(path.mnt);
-out_dput:
-       dput(dentry);
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
-       path_put(&path);
-
+out:
+       done_path_create(&path, dentry);
        return error;
 }
 
@@ -3038,19 +3175,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 
        if (!IS_POSIXACL(path.dentry->d_inode))
                mode &= ~current_umask();
-       error = mnt_want_write(path.mnt);
-       if (error)
-               goto out_dput;
        error = security_path_mkdir(&path, dentry, mode);
-       if (error)
-               goto out_drop_write;
-       error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
-out_drop_write:
-       mnt_drop_write(path.mnt);
-out_dput:
-       dput(dentry);
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
-       path_put(&path);
+       if (!error)
+               error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+       done_path_create(&path, dentry);
        return error;
 }
 
@@ -3144,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
        }
 
        nd.flags &= ~LOOKUP_PARENT;
+       error = mnt_want_write(nd.path.mnt);
+       if (error)
+               goto exit1;
 
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
@@ -3154,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname)
                error = -ENOENT;
                goto exit3;
        }
-       error = mnt_want_write(nd.path.mnt);
-       if (error)
-               goto exit3;
        error = security_path_rmdir(&nd.path, dentry);
        if (error)
-               goto exit4;
+               goto exit3;
        error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
-exit4:
-       mnt_drop_write(nd.path.mnt);
 exit3:
        dput(dentry);
 exit2:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+       mnt_drop_write(nd.path.mnt);
 exit1:
        path_put(&nd.path);
        putname(name);
@@ -3233,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                goto exit1;
 
        nd.flags &= ~LOOKUP_PARENT;
+       error = mnt_want_write(nd.path.mnt);
+       if (error)
+               goto exit1;
 
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
@@ -3245,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                if (!inode)
                        goto slashes;
                ihold(inode);
-               error = mnt_want_write(nd.path.mnt);
-               if (error)
-                       goto exit2;
                error = security_path_unlink(&nd.path, dentry);
                if (error)
-                       goto exit3;
+                       goto exit2;
                error = vfs_unlink(nd.path.dentry->d_inode, dentry);
-exit3:
-               mnt_drop_write(nd.path.mnt);
-       exit2:
+exit2:
                dput(dentry);
        }
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        if (inode)
                iput(inode);    /* truncate the inode here */
+       mnt_drop_write(nd.path.mnt);
 exit1:
        path_put(&nd.path);
        putname(name);
@@ -3324,19 +3450,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
        if (IS_ERR(dentry))
                goto out_putname;
 
-       error = mnt_want_write(path.mnt);
-       if (error)
-               goto out_dput;
        error = security_path_symlink(&path, dentry, from);
-       if (error)
-               goto out_drop_write;
-       error = vfs_symlink(path.dentry->d_inode, dentry, from);
-out_drop_write:
-       mnt_drop_write(path.mnt);
-out_dput:
-       dput(dentry);
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
-       path_put(&path);
+       if (!error)
+               error = vfs_symlink(path.dentry->d_inode, dentry, from);
+       done_path_create(&path, dentry);
 out_putname:
        putname(from);
        return error;
@@ -3436,19 +3553,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
        error = -EXDEV;
        if (old_path.mnt != new_path.mnt)
                goto out_dput;
-       error = mnt_want_write(new_path.mnt);
-       if (error)
+       error = may_linkat(&old_path);
+       if (unlikely(error))
                goto out_dput;
        error = security_path_link(old_path.dentry, &new_path, new_dentry);
        if (error)
-               goto out_drop_write;
+               goto out_dput;
        error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
-out_drop_write:
-       mnt_drop_write(new_path.mnt);
 out_dput:
-       dput(new_dentry);
-       mutex_unlock(&new_path.dentry->d_inode->i_mutex);
-       path_put(&new_path);
+       done_path_create(&new_path, new_dentry);
 out:
        path_put(&old_path);
 
@@ -3644,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
        if (newnd.last_type != LAST_NORM)
                goto exit2;
 
+       error = mnt_want_write(oldnd.path.mnt);
+       if (error)
+               goto exit2;
+
        oldnd.flags &= ~LOOKUP_PARENT;
        newnd.flags &= ~LOOKUP_PARENT;
        newnd.flags |= LOOKUP_RENAME_TARGET;
@@ -3679,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
        if (new_dentry == trap)
                goto exit5;
 
-       error = mnt_want_write(oldnd.path.mnt);
-       if (error)
-               goto exit5;
        error = security_path_rename(&oldnd.path, old_dentry,
                                     &newnd.path, new_dentry);
        if (error)
-               goto exit6;
+               goto exit5;
        error = vfs_rename(old_dir->d_inode, old_dentry,
                                   new_dir->d_inode, new_dentry);
-exit6:
-       mnt_drop_write(oldnd.path.mnt);
 exit5:
        dput(new_dentry);
 exit4:
        dput(old_dentry);
 exit3:
        unlock_rename(new_dir, old_dir);
+       mnt_drop_write(oldnd.path.mnt);
 exit2:
        path_put(&newnd.path);
        putname(to);
index c53d3381b0d0043d91e2f7c47e4cafbdaaaf13d6..4d31f73e2561d4d0e19d85becc0223dda27b4710 100644 (file)
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt)
 }
 
 /*
- * Most r/o checks on a fs are for operations that take
- * discrete amounts of time, like a write() or unlink().
- * We must keep track of when those operations start
- * (for permission checks) and when they end, so that
- * we can determine when writes are able to occur to
- * a filesystem.
+ * Most r/o & frozen checks on a fs are for operations that take discrete
+ * amounts of time, like a write() or unlink().  We must keep track of when
+ * those operations start (for permission checks) and when they end, so that we
+ * can determine when writes are able to occur to a filesystem.
  */
 /**
- * mnt_want_write - get write access to a mount
+ * __mnt_want_write - get write access to a mount without freeze protection
  * @m: the mount on which to take a write
  *
- * This tells the low-level filesystem that a write is
- * about to be performed to it, and makes sure that
- * writes are allowed before returning success.  When
- * the write operation is finished, mnt_drop_write()
- * must be called.  This is effectively a refcount.
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mnt it read-write) before
+ * returning success. This operation does not protect against filesystem being
+ * frozen. When the write operation is finished, __mnt_drop_write() must be
+ * called. This is effectively a refcount.
  */
-int mnt_want_write(struct vfsmount *m)
+int __mnt_want_write(struct vfsmount *m)
 {
        struct mount *mnt = real_mount(m);
        int ret = 0;
@@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m)
                ret = -EROFS;
        }
        preempt_enable();
+
+       return ret;
+}
+
+/**
+ * mnt_want_write - get write access to a mount
+ * @m: the mount on which to take a write
+ *
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mount is read-write, filesystem
+ * is not frozen) before returning success.  When the write operation is
+ * finished, mnt_drop_write() must be called.  This is effectively a refcount.
+ */
+int mnt_want_write(struct vfsmount *m)
+{
+       int ret;
+
+       sb_start_write(m->mnt_sb);
+       ret = __mnt_want_write(m);
+       if (ret)
+               sb_end_write(m->mnt_sb);
        return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt)
 EXPORT_SYMBOL_GPL(mnt_clone_write);
 
 /**
- * mnt_want_write_file - get write access to a file's mount
+ * __mnt_want_write_file - get write access to a file's mount
  * @file: the file who's mount on which to take a write
  *
- * This is like mnt_want_write, but it takes a file and can
+ * This is like __mnt_want_write, but it takes a file and can
  * do some optimisations if the file is open for write already
  */
-int mnt_want_write_file(struct file *file)
+int __mnt_want_write_file(struct file *file)
 {
        struct inode *inode = file->f_dentry->d_inode;
+
        if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
-               return mnt_want_write(file->f_path.mnt);
+               return __mnt_want_write(file->f_path.mnt);
        else
                return mnt_clone_write(file->f_path.mnt);
 }
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+       int ret;
+
+       sb_start_write(file->f_path.mnt->mnt_sb);
+       ret = __mnt_want_write_file(file);
+       if (ret)
+               sb_end_write(file->f_path.mnt->mnt_sb);
+       return ret;
+}
 EXPORT_SYMBOL_GPL(mnt_want_write_file);
 
 /**
- * mnt_drop_write - give up write access to a mount
+ * __mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
  *
  * Tells the low-level filesystem that we are done
  * performing writes to it.  Must be matched with
- * mnt_want_write() call above.
+ * __mnt_want_write() call above.
  */
-void mnt_drop_write(struct vfsmount *mnt)
+void __mnt_drop_write(struct vfsmount *mnt)
 {
        preempt_disable();
        mnt_dec_writers(real_mount(mnt));
        preempt_enable();
 }
+
+/**
+ * mnt_drop_write - give up write access to a mount
+ * @mnt: the mount on which to give up write access
+ *
+ * Tells the low-level filesystem that we are done performing writes to it and
+ * also allows filesystem to be frozen again.  Must be matched with
+ * mnt_want_write() call above.
+ */
+void mnt_drop_write(struct vfsmount *mnt)
+{
+       __mnt_drop_write(mnt);
+       sb_end_write(mnt->mnt_sb);
+}
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
+void __mnt_drop_write_file(struct file *file)
+{
+       __mnt_drop_write(file->f_path.mnt);
+}
+
 void mnt_drop_write_file(struct file *file)
 {
        mnt_drop_write(file->f_path.mnt);
index 5ff0b7b9fc08f22f39cc1f2d83062baceb773bdc..43295d45cc2b553e2afda4d11d7b2d429b8676ef 100644 (file)
@@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
        if (status < 0)
                return;
 
+       status = mnt_want_write_file(rec_file);
+       if (status)
+               return;
+
        dir = rec_file->f_path.dentry;
        /* lock the parent */
        mutex_lock(&dir->d_inode->i_mutex);
@@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                 * as well be forgiving and just succeed silently.
                 */
                goto out_put;
-       status = mnt_want_write_file(rec_file);
-       if (status)
-               goto out_put;
        status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
-       mnt_drop_write_file(rec_file);
 out_put:
        dput(dentry);
 out_unlock:
@@ -189,6 +189,7 @@ out_unlock:
                                " (err %d); please check that %s exists"
                                " and is writeable", status,
                                user_recovery_dirname);
+       mnt_drop_write_file(rec_file);
        nfs4_reset_creds(original_cred);
 }
 
index cc793005a87cb4b5a79b7074c4861ca651085d1c..032af381b3aa4bd1298b084c57d3c4022293668c 100644 (file)
@@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp)
                fhp->fh_post_saved = 0;
 #endif
        }
+       fh_drop_write(fhp);
        if (exp) {
                exp_put(exp);
                fhp->fh_export = NULL;
index e15dc45fc5ec01c09a05d42b8ac600ed39e3892f..aad6d457b9e8ef75d410d01de3dd6bac692c96fd 100644 (file)
@@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
        struct dentry   *dchild;
        int             type, mode;
        __be32          nfserr;
+       int             hosterr;
        dev_t           rdev = 0, wanted = new_decode_dev(attr->ia_size);
 
        dprintk("nfsd: CREATE   %s %.*s\n",
@@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
        nfserr = nfserr_exist;
        if (isdotent(argp->name, argp->len))
                goto done;
+       hosterr = fh_want_write(dirfhp);
+       if (hosterr) {
+               nfserr = nfserrno(hosterr);
+               goto done;
+       }
+
        fh_lock_nested(dirfhp, I_MUTEX_PARENT);
        dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
        if (IS_ERR(dchild)) {
@@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 out_unlock:
        /* We don't really need to unlock, as fh_put does it. */
        fh_unlock(dirfhp);
-
+       fh_drop_write(dirfhp);
 done:
        fh_put(dirfhp);
        return nfsd_return_dirop(nfserr, resp);
index 702f64e820c301992afda8adecf6d06dc4746c22..a9269f142cc481ec451c681397f520c1a1cb97f7 100644 (file)
@@ -1284,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
         * If it has, the parent directory should already be locked.
         */
        if (!resfhp->fh_dentry) {
+               host_err = fh_want_write(fhp);
+               if (host_err)
+                       goto out_nfserr;
+
                /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
                fh_lock_nested(fhp, I_MUTEX_PARENT);
                dchild = lookup_one_len(fname, dentry, flen);
@@ -1327,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
        }
 
-       host_err = fh_want_write(fhp);
-       if (host_err)
-               goto out_nfserr;
-
        /*
         * Get the dir op function pointer.
         */
        err = 0;
+       host_err = 0;
        switch (type) {
        case S_IFREG:
                host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
@@ -1351,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
                break;
        }
-       if (host_err < 0) {
-               fh_drop_write(fhp);
+       if (host_err < 0)
                goto out_nfserr;
-       }
 
        err = nfsd_create_setattr(rqstp, resfhp, iap);
 
@@ -1366,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err2 = nfserrno(commit_metadata(fhp));
        if (err2)
                err = err2;
-       fh_drop_write(fhp);
        /*
         * Update the file handle to get the new inode info.
         */
@@ -1425,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = nfserr_notdir;
        if (!dirp->i_op->lookup)
                goto out;
+
+       host_err = fh_want_write(fhp);
+       if (host_err)
+               goto out_nfserr;
+
        fh_lock_nested(fhp, I_MUTEX_PARENT);
 
        /*
@@ -1457,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                v_atime = verifier[1]&0x7fffffff;
        }
        
-       host_err = fh_want_write(fhp);
-       if (host_err)
-               goto out_nfserr;
        if (dchild->d_inode) {
                err = 0;
 
@@ -1530,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (!err)
                err = nfserrno(commit_metadata(fhp));
 
-       fh_drop_write(fhp);
        /*
         * Update the filehandle to get the new inode info.
         */
@@ -1541,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        fh_unlock(fhp);
        if (dchild && !IS_ERR(dchild))
                dput(dchild);
+       fh_drop_write(fhp);
        return err;
  
  out_nfserr:
@@ -1621,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
+
+       host_err = fh_want_write(fhp);
+       if (host_err)
+               goto out_nfserr;
+
        fh_lock(fhp);
        dentry = fhp->fh_dentry;
        dnew = lookup_one_len(fname, dentry, flen);
@@ -1628,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dnew))
                goto out_nfserr;
 
-       host_err = fh_want_write(fhp);
-       if (host_err)
-               goto out_nfserr;
-
        if (unlikely(path[plen] != 0)) {
                char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
                if (path_alloced == NULL)
@@ -1691,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        if (isdotent(name, len))
                goto out;
 
+       host_err = fh_want_write(tfhp);
+       if (host_err) {
+               err = nfserrno(host_err);
+               goto out;
+       }
+
        fh_lock_nested(ffhp, I_MUTEX_PARENT);
        ddir = ffhp->fh_dentry;
        dirp = ddir->d_inode;
@@ -1702,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 
        dold = tfhp->fh_dentry;
 
-       host_err = fh_want_write(tfhp);
-       if (host_err) {
-               err = nfserrno(host_err);
-               goto out_dput;
-       }
        err = nfserr_noent;
        if (!dold->d_inode)
-               goto out_drop_write;
+               goto out_dput;
        host_err = nfsd_break_lease(dold->d_inode);
        if (host_err) {
                err = nfserrno(host_err);
-               goto out_drop_write;
+               goto out_dput;
        }
        host_err = vfs_link(dold, dirp, dnew);
        if (!host_err) {
@@ -1726,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                else
                        err = nfserrno(host_err);
        }
-out_drop_write:
-       fh_drop_write(tfhp);
 out_dput:
        dput(dnew);
 out_unlock:
        fh_unlock(ffhp);
+       fh_drop_write(tfhp);
 out:
        return err;
 
@@ -1774,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
                goto out;
 
+       host_err = fh_want_write(ffhp);
+       if (host_err) {
+               err = nfserrno(host_err);
+               goto out;
+       }
+
        /* cannot use fh_lock as we need deadlock protective ordering
         * so do it by hand */
        trap = lock_rename(tdentry, fdentry);
@@ -1804,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        host_err = -EXDEV;
        if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
                goto out_dput_new;
-       host_err = fh_want_write(ffhp);
-       if (host_err)
-               goto out_dput_new;
 
        host_err = nfsd_break_lease(odentry->d_inode);
        if (host_err)
-               goto out_drop_write;
+               goto out_dput_new;
        if (ndentry->d_inode) {
                host_err = nfsd_break_lease(ndentry->d_inode);
                if (host_err)
-                       goto out_drop_write;
+                       goto out_dput_new;
        }
        host_err = vfs_rename(fdir, odentry, tdir, ndentry);
        if (!host_err) {
@@ -1822,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                if (!host_err)
                        host_err = commit_metadata(ffhp);
        }
-out_drop_write:
-       fh_drop_write(ffhp);
  out_dput_new:
        dput(ndentry);
  out_dput_old:
@@ -1839,6 +1841,7 @@ out_drop_write:
        fill_post_wcc(tfhp);
        unlock_rename(tdentry, fdentry);
        ffhp->fh_locked = tfhp->fh_locked = 0;
+       fh_drop_write(ffhp);
 
 out:
        return err;
@@ -1864,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (err)
                goto out;
 
+       host_err = fh_want_write(fhp);
+       if (host_err)
+               goto out_nfserr;
+
        fh_lock_nested(fhp, I_MUTEX_PARENT);
        dentry = fhp->fh_dentry;
        dirp = dentry->d_inode;
@@ -1882,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = rdentry->d_inode->i_mode & S_IFMT;
 
-       host_err = fh_want_write(fhp);
-       if (host_err)
-               goto out_put;
-
        host_err = nfsd_break_lease(rdentry->d_inode);
        if (host_err)
-               goto out_drop_write;
+               goto out_put;
        if (type != S_IFDIR)
                host_err = vfs_unlink(dirp, rdentry);
        else
                host_err = vfs_rmdir(dirp, rdentry);
        if (!host_err)
                host_err = commit_metadata(fhp);
-out_drop_write:
-       fh_drop_write(fhp);
 out_put:
        dput(rdentry);
 
index ec0611b2b738468fbc261b35f2190048680e67dd..359594c393d273f20b5fe3ef0a9a75379ab1edc2 100644 (file)
@@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 
 static inline int fh_want_write(struct svc_fh *fh)
 {
-       return mnt_want_write(fh->fh_export->ex_path.mnt);
+       int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
+
+       if (!ret)
+               fh->fh_want_write = 1;
+       return ret;
 }
 
 static inline void fh_drop_write(struct svc_fh *fh)
 {
-       mnt_drop_write(fh->fh_export->ex_path.mnt);
+       if (fh->fh_want_write) {
+               fh->fh_want_write = 0;
+               mnt_drop_write(fh->fh_export->ex_path.mnt);
+       }
 }
 
 #endif /* LINUX_NFSD_VFS_H */
index 62cebc8e1a1fd49ceec5684de547bd8115eedac2..a4d56ac02e6cf075b81dfaee284175d86776219d 100644 (file)
@@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_dentry->d_inode;
        struct nilfs_transaction_info ti;
-       int ret;
+       int ret = 0;
 
        if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
                return VM_FAULT_SIGBUS; /* -ENOSPC */
 
+       sb_start_pagefault(inode->i_sb);
        lock_page(page);
        if (page->mapping != inode->i_mapping ||
            page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
                unlock_page(page);
-               return VM_FAULT_NOPAGE; /* make the VM retry the fault */
+               ret = -EFAULT;  /* make the VM retry the fault */
+               goto out;
        }
 
        /*
@@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
        /* never returns -ENOMEM, but may return -ENOSPC */
        if (unlikely(ret))
-               return VM_FAULT_SIGBUS;
+               goto out;
 
-       ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
-       if (ret != VM_FAULT_LOCKED) {
+       ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
+       if (ret) {
                nilfs_transaction_abort(inode->i_sb);
-               return ret;
+               goto out;
        }
        nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
        nilfs_transaction_commit(inode->i_sb);
 
  mapped:
        wait_on_page_writeback(page);
-       return VM_FAULT_LOCKED;
+ out:
+       sb_end_pagefault(inode->i_sb);
+       return block_page_mkwrite_return(ret);
 }
 
 static const struct vm_operations_struct nilfs_file_vm_ops = {
index 0b6387c67e6ca601d91fe75684fede7826c6f11f..fdb180769485f95c78a0a5b5f33d5752baf0590f 100644 (file)
@@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
                goto out_free;
        }
 
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
        ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]);
        if (ret < 0)
                printk(KERN_ERR "NILFS: GC failed during preparation: "
index 88e11fb346b6d0fd6e81fb03994fe18cac514bb7..a5752a589932d936b12cfdda27a6fb718e6cfb51 100644 (file)
@@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb,
        if (ret > 0)
                return 0;
 
-       vfs_check_frozen(sb, SB_FREEZE_WRITE);
+       sb_start_intwrite(sb);
 
        nilfs = sb->s_fs_info;
        down_read(&nilfs->ns_segctor_sem);
@@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb,
        current->journal_info = ti->ti_save;
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
+       sb_end_intwrite(sb);
        return ret;
 }
 
@@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb)
                err = nilfs_construct_segment(sb);
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
+       sb_end_intwrite(sb);
        return err;
 }
 
@@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb)
        current->journal_info = ti->ti_save;
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
+       sb_end_intwrite(sb);
 }
 
 void nilfs_relax_pressure_in_lock(struct super_block *sb)
index 7389d2d5e51d257c72f9fb0c1468c38a28b309e4..1ecf46448f858d477c040e313989cc9116315124 100644 (file)
@@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
        if (err)
                return err;
        pos = *ppos;
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
        /* We can write back this queue in page reclaim. */
        current->backing_dev_info = mapping->backing_dev_info;
        written = 0;
@@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
        BUG_ON(iocb->ki_pos != pos);
 
+       sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
        ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
@@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (err < 0)
                        ret = err;
        }
+       sb_end_write(inode->i_sb);
        return ret;
 }
 
index 7602783d7f41c9a01827f16446da43e036e6a7cb..46a1f6d7510405bf14a0dbc837d2585cfd64e75d 100644 (file)
@@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       int ret;
 
        if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
            !ocfs2_writes_unwritten_extents(osb))
@@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
 
-       return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+       ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
+       mnt_drop_write_file(file);
+       return ret;
 }
 
 static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
@@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
        if (iocb->ki_left == 0)
                return 0;
 
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+       sb_start_write(inode->i_sb);
 
        appending = file->f_flags & O_APPEND ? 1 : 0;
        direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2436,6 +2442,7 @@ out_sems:
                ocfs2_iocb_clear_sem_locked(iocb);
 
        mutex_unlock(&inode->i_mutex);
+       sb_end_write(inode->i_sb);
 
        if (written)
                ret = written;
index d96f7f81d8dd3257f49bb02885db296af22881cb..f20edcbfe700c83a12bb90b697535ea7a4435e98 100644 (file)
@@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (get_user(new_clusters, (int __user *)arg))
                        return -EFAULT;
 
-               return ocfs2_group_extend(inode, new_clusters);
+               status = mnt_want_write_file(filp);
+               if (status)
+                       return status;
+               status = ocfs2_group_extend(inode, new_clusters);
+               mnt_drop_write_file(filp);
+               return status;
        case OCFS2_IOC_GROUP_ADD:
        case OCFS2_IOC_GROUP_ADD64:
                if (!capable(CAP_SYS_RESOURCE))
@@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
                        return -EFAULT;
 
-               return ocfs2_group_add(inode, &input);
+               status = mnt_want_write_file(filp);
+               if (status)
+                       return status;
+               status = ocfs2_group_add(inode, &input);
+               mnt_drop_write_file(filp);
+               return status;
        case OCFS2_IOC_REFLINK:
                if (copy_from_user(&args, argp, sizeof(args)))
                        return -EFAULT;
index 0a42ae96dca7d4a0f662505e0e51206895ce4156..2dd36af79e262f8de2051525e1baf8585786cba8 100644 (file)
@@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
        if (journal_current_handle())
                return jbd2_journal_start(journal, max_buffs);
 
+       sb_start_intwrite(osb->sb);
+
        down_read(&osb->journal->j_trans_barrier);
 
        handle = jbd2_journal_start(journal, max_buffs);
        if (IS_ERR(handle)) {
                up_read(&osb->journal->j_trans_barrier);
+               sb_end_intwrite(osb->sb);
 
                mlog_errno(PTR_ERR(handle));
 
@@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
        if (ret < 0)
                mlog_errno(ret);
 
-       if (!nested)
+       if (!nested) {
                up_read(&journal->j_trans_barrier);
+               sb_end_intwrite(osb->sb);
+       }
 
        return ret;
 }
index 9cd41083e99123eca1c48085fb39809e6b906b40..d150372fd81da7967eb3a8f3d2a3b0d9c8c41c82 100644 (file)
@@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        sigset_t oldset;
        int ret;
 
+       sb_start_pagefault(inode->i_sb);
        ocfs2_block_signals(&oldset);
 
        /*
@@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 out:
        ocfs2_unblock_signals(&oldset);
+       sb_end_pagefault(inode->i_sb);
        return ret;
 }
 
index 9f32d7cbb7a3701f1f28d938b17ae4d057dc4e84..30a055049e1662eb382e3ec328918e2cd143c98f 100644 (file)
@@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
                goto out_dput;
        }
 
-       error = mnt_want_write(new_path.mnt);
-       if (error) {
-               mlog_errno(error);
-               goto out_dput;
-       }
-
        error = ocfs2_vfs_reflink(old_path.dentry,
                                  new_path.dentry->d_inode,
                                  new_dentry, preserve);
-       mnt_drop_write(new_path.mnt);
 out_dput:
-       dput(new_dentry);
-       mutex_unlock(&new_path.dentry->d_inode->i_mutex);
-       path_put(&new_path);
+       done_path_create(&new_path, new_dentry);
 out:
        path_put(&old_path);
 
index 1e914b397e129f311875eb15d0c7679ace93aa3e..f3d96e7e7b19d4254924a59dfc4121aa5d8635b4 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
        if (IS_APPEND(inode))
                goto out_putf;
 
+       sb_start_write(inode->i_sb);
        error = locks_verify_truncate(inode, file, length);
        if (!error)
                error = security_path_truncate(&file->f_path);
        if (!error)
                error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+       sb_end_write(inode->i_sb);
 out_putf:
        fput(file);
 out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
 
-       return file->f_op->fallocate(file, mode, offset, len);
+       sb_start_write(inode->i_sb);
+       ret = file->f_op->fallocate(file, mode, offset, len);
+       sb_end_write(inode->i_sb);
+       return ret;
 }
 
 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
@@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode,
                /*
                 * Balanced in __fput()
                 */
-               error = mnt_want_write(mnt);
+               error = __mnt_want_write(mnt);
                if (error)
                        put_write_access(inode);
        }
@@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f,
        if (unlikely(f->f_flags & O_PATH))
                f->f_mode = FMODE_PATH;
 
+       path_get(&f->f_path);
        inode = f->f_path.dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
                error = __get_file_write_access(inode, f->f_path.mnt);
@@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry,
        int error;
        BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
-       mntget(file->f_path.mnt);
-       file->f_path.dentry = dget(dentry);
-
+       file->f_path.dentry = dentry;
        error = do_dentry_open(file, open, current_cred());
        if (!error)
                *opened |= FILE_OPENED;
@@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags,
 
        f->f_flags = flags;
        f->f_path = *path;
-       path_get(&f->f_path);
        error = do_dentry_open(f, NULL, cred);
        if (!error) {
                error = open_check_o_direct(f);
index 95cbd6b227e6fb05735126a00296bb7230224429..8d85d7068c1e8a0f028a7a72dba9c04c24e3ee91 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,18 +1016,16 @@ fail_inode:
        return NULL;
 }
 
-struct file *create_write_pipe(int flags)
+int create_pipe_files(struct file **res, int flags)
 {
        int err;
-       struct inode *inode;
+       struct inode *inode = get_pipe_inode();
        struct file *f;
        struct path path;
-       struct qstr name = { .name = "" };
+       static struct qstr name = { .name = "" };
 
-       err = -ENFILE;
-       inode = get_pipe_inode();
        if (!inode)
-               goto err;
+               return -ENFILE;
 
        err = -ENOMEM;
        path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
@@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags)
        f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
        if (!f)
                goto err_dentry;
-       f->f_mapping = inode->i_mapping;
 
        f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
-       f->f_version = 0;
 
-       return f;
+       res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
+       if (!res[0])
+               goto err_file;
+
+       path_get(&path);
+       res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+       res[1] = f;
+       return 0;
 
- err_dentry:
+err_file:
+       put_filp(f);
+err_dentry:
        free_pipe_info(inode);
        path_put(&path);
-       return ERR_PTR(err);
+       return err;
 
- err_inode:
+err_inode:
        free_pipe_info(inode);
        iput(inode);
- err:
-       return ERR_PTR(err);
-}
-
-void free_write_pipe(struct file *f)
-{
-       free_pipe_info(f->f_dentry->d_inode);
-       path_put(&f->f_path);
-       put_filp(f);
-}
-
-struct file *create_read_pipe(struct file *wrf, int flags)
-{
-       /* Grab pipe from the writer */
-       struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
-                                   &read_pipefifo_fops);
-       if (!f)
-               return ERR_PTR(-ENFILE);
-
-       path_get(&wrf->f_path);
-       f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-
-       return f;
+       return err;
 }
 
 int do_pipe_flags(int *fd, int flags)
 {
-       struct file *fw, *fr;
+       struct file *files[2];
        int error;
        int fdw, fdr;
 
        if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
                return -EINVAL;
 
-       fw = create_write_pipe(flags);
-       if (IS_ERR(fw))
-               return PTR_ERR(fw);
-       fr = create_read_pipe(fw, flags);
-       error = PTR_ERR(fr);
-       if (IS_ERR(fr))
-               goto err_write_pipe;
+       error = create_pipe_files(files, flags);
+       if (error)
+               return error;
 
        error = get_unused_fd_flags(flags);
        if (error < 0)
@@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags)
        fdw = error;
 
        audit_fd_pair(fdr, fdw);
-       fd_install(fdr, fr);
-       fd_install(fdw, fw);
+       fd_install(fdr, files[0]);
+       fd_install(fdw, files[1]);
        fd[0] = fdr;
        fd[1] = fdw;
 
@@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags)
  err_fdr:
        put_unused_fd(fdr);
  err_read_pipe:
-       path_put(&fr->f_path);
-       put_filp(fr);
- err_write_pipe:
-       free_write_pipe(fw);
+       fput(files[0]);
+       fput(files[1]);
        return error;
 }
 
index 7bf08fa22ec9ab122bad5438a02bd78db6f1e885..41514dd89462d73d4d1c7b8a68e431d5cabc08a9 100644 (file)
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
        };
        ssize_t ret;
 
+       sb_start_write(inode->i_sb);
+
        pipe_lock(pipe);
 
        splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        *ppos += ret;
                balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
        }
+       sb_end_write(inode->i_sb);
 
        return ret;
 }
index 4bf714459a4b4f589fe8b8e6a423bc755c0e5ac2..b05cf47463d0c2347e8eca1597001f96dd2a97fc 100644 (file)
 #include <linux/rculist_bl.h>
 #include <linux/cleancache.h>
 #include <linux/fsnotify.h>
+#include <linux/lockdep.h>
 #include "internal.h"
 
 
 LIST_HEAD(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
 
+static char *sb_writers_name[SB_FREEZE_LEVELS] = {
+       "sb_writers",
+       "sb_pagefaults",
+       "sb_internal",
+};
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
        return total_objects;
 }
 
+static int init_sb_writers(struct super_block *s, struct file_system_type *type)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < SB_FREEZE_LEVELS; i++) {
+               err = percpu_counter_init(&s->s_writers.counter[i], 0);
+               if (err < 0)
+                       goto err_out;
+               lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
+                                &type->s_writers_key[i], 0);
+       }
+       init_waitqueue_head(&s->s_writers.wait);
+       init_waitqueue_head(&s->s_writers.wait_unfrozen);
+       return 0;
+err_out:
+       while (--i >= 0)
+               percpu_counter_destroy(&s->s_writers.counter[i]);
+       return err;
+}
+
+static void destroy_sb_writers(struct super_block *s)
+{
+       int i;
+
+       for (i = 0; i < SB_FREEZE_LEVELS; i++)
+               percpu_counter_destroy(&s->s_writers.counter[i]);
+}
+
 /**
  *     alloc_super     -       create new superblock
  *     @type:  filesystem type superblock should belong to
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 
        if (s) {
                if (security_sb_alloc(s)) {
+                       /*
+                        * We cannot call security_sb_free() without
+                        * security_sb_alloc() succeeding. So bail out manually
+                        */
                        kfree(s);
                        s = NULL;
                        goto out;
                }
 #ifdef CONFIG_SMP
                s->s_files = alloc_percpu(struct list_head);
-               if (!s->s_files) {
-                       security_sb_free(s);
-                       kfree(s);
-                       s = NULL;
-                       goto out;
-               } else {
+               if (!s->s_files)
+                       goto err_out;
+               else {
                        int i;
 
                        for_each_possible_cpu(i)
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 #else
                INIT_LIST_HEAD(&s->s_files);
 #endif
+               if (init_sb_writers(s, type))
+                       goto err_out;
                s->s_flags = flags;
                s->s_bdi = &default_backing_dev_info;
                INIT_HLIST_NODE(&s->s_instances);
@@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                mutex_init(&s->s_dquot.dqio_mutex);
                mutex_init(&s->s_dquot.dqonoff_mutex);
                init_rwsem(&s->s_dquot.dqptr_sem);
-               init_waitqueue_head(&s->s_wait_unfrozen);
                s->s_maxbytes = MAX_NON_LFS;
                s->s_op = &default_op;
                s->s_time_gran = 1000000000;
@@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
        }
 out:
        return s;
+err_out:
+       security_sb_free(s);
+#ifdef CONFIG_SMP
+       if (s->s_files)
+               free_percpu(s->s_files);
+#endif
+       destroy_sb_writers(s);
+       kfree(s);
+       s = NULL;
+       goto out;
 }
 
 /**
@@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s)
 #ifdef CONFIG_SMP
        free_percpu(s->s_files);
 #endif
+       destroy_sb_writers(s);
        security_sb_free(s);
        WARN_ON(!list_empty(&s->s_mounts));
        kfree(s->s_subtype);
@@ -651,10 +700,11 @@ struct super_block *get_super_thawed(struct block_device *bdev)
 {
        while (1) {
                struct super_block *s = get_super(bdev);
-               if (!s || s->s_frozen == SB_UNFROZEN)
+               if (!s || s->s_writers.frozen == SB_UNFROZEN)
                        return s;
                up_read(&s->s_umount);
-               vfs_check_frozen(s, SB_FREEZE_WRITE);
+               wait_event(s->s_writers.wait_unfrozen,
+                          s->s_writers.frozen == SB_UNFROZEN);
                put_super(s);
        }
 }
@@ -732,7 +782,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
        int retval;
        int remount_ro;
 
-       if (sb->s_frozen != SB_UNFROZEN)
+       if (sb->s_writers.frozen != SB_UNFROZEN)
                return -EBUSY;
 
 #ifdef CONFIG_BLOCK
@@ -1163,6 +1213,120 @@ out:
        return ERR_PTR(error);
 }
 
+/*
+ * This is an internal function, please use sb_end_{write,pagefault,intwrite}
+ * instead.
+ */
+void __sb_end_write(struct super_block *sb, int level)
+{
+       percpu_counter_dec(&sb->s_writers.counter[level-1]);
+       /*
+        * Make sure s_writers are updated before we wake up waiters in
+        * freeze_super().
+        */
+       smp_mb();
+       if (waitqueue_active(&sb->s_writers.wait))
+               wake_up(&sb->s_writers.wait);
+       rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+}
+EXPORT_SYMBOL(__sb_end_write);
+
+#ifdef CONFIG_LOCKDEP
+/*
+ * We want lockdep to tell us about possible deadlocks with freezing but
+ * it's it bit tricky to properly instrument it. Getting a freeze protection
+ * works as getting a read lock but there are subtle problems. XFS for example
+ * gets freeze protection on internal level twice in some cases, which is OK
+ * only because we already hold a freeze protection also on higher level. Due
+ * to these cases we have to tell lockdep we are doing trylock when we
+ * already hold a freeze protection for a higher freeze level.
+ */
+static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
+                               unsigned long ip)
+{
+       int i;
+
+       if (!trylock) {
+               for (i = 0; i < level - 1; i++)
+                       if (lock_is_held(&sb->s_writers.lock_map[i])) {
+                               trylock = true;
+                               break;
+                       }
+       }
+       rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
+}
+#endif
+
+/*
+ * This is an internal function, please use sb_start_{write,pagefault,intwrite}
+ * instead.
+ */
+int __sb_start_write(struct super_block *sb, int level, bool wait)
+{
+retry:
+       if (unlikely(sb->s_writers.frozen >= level)) {
+               if (!wait)
+                       return 0;
+               wait_event(sb->s_writers.wait_unfrozen,
+                          sb->s_writers.frozen < level);
+       }
+
+#ifdef CONFIG_LOCKDEP
+       acquire_freeze_lock(sb, level, !wait, _RET_IP_);
+#endif
+       percpu_counter_inc(&sb->s_writers.counter[level-1]);
+       /*
+        * Make sure counter is updated before we check for frozen.
+        * freeze_super() first sets frozen and then checks the counter.
+        */
+       smp_mb();
+       if (unlikely(sb->s_writers.frozen >= level)) {
+               __sb_end_write(sb, level);
+               goto retry;
+       }
+       return 1;
+}
+EXPORT_SYMBOL(__sb_start_write);
+
+/**
+ * sb_wait_write - wait until all writers to given file system finish
+ * @sb: the super for which we wait
+ * @level: type of writers we wait for (normal vs page fault)
+ *
+ * This function waits until there are no writers of given type to given file
+ * system. Caller of this function should make sure there can be no new writers
+ * of type @level before calling this function. Otherwise this function can
+ * livelock.
+ */
+static void sb_wait_write(struct super_block *sb, int level)
+{
+       s64 writers;
+
+       /*
+        * We just cycle-through lockdep here so that it does not complain
+        * about returning with lock to userspace
+        */
+       rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
+       rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
+
+       do {
+               DEFINE_WAIT(wait);
+
+               /*
+                * We use a barrier in prepare_to_wait() to separate setting
+                * of frozen and checking of the counter
+                */
+               prepare_to_wait(&sb->s_writers.wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+
+               writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
+               if (writers)
+                       schedule();
+
+               finish_wait(&sb->s_writers.wait, &wait);
+       } while (writers);
+}
+
 /**
  * freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
@@ -1170,6 +1334,31 @@ out:
  * Syncs the super to make sure the filesystem is consistent and calls the fs's
  * freeze_fs.  Subsequent calls to this without first thawing the fs will return
  * -EBUSY.
+ *
+ * During this function, sb->s_writers.frozen goes through these values:
+ *
+ * SB_UNFROZEN: File system is normal, all writes progress as usual.
+ *
+ * SB_FREEZE_WRITE: The file system is in the process of being frozen.  New
+ * writes should be blocked, though page faults are still allowed. We wait for
+ * all writes to complete and then proceed to the next stage.
+ *
+ * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
+ * but internal fs threads can still modify the filesystem (although they
+ * should not dirty new pages or inodes), writeback can run etc. After waiting
+ * for all running page faults we sync the filesystem which will clean all
+ * dirty pages and inodes (no new dirty pages or inodes can be created when
+ * sync is running).
+ *
+ * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
+ * modification are blocked (e.g. XFS preallocation truncation on inode
+ * reclaim). This is usually implemented by blocking new transactions for
+ * filesystems that have them and need this additional guard. After all
+ * internal writers are finished we call ->freeze_fs() to finish filesystem
+ * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
+ * mostly auxiliary for filesystems to verify they do not modify frozen fs.
+ *
+ * sb->s_writers.frozen is protected by sb->s_umount.
  */
 int freeze_super(struct super_block *sb)
 {
@@ -1177,7 +1366,7 @@ int freeze_super(struct super_block *sb)
 
        atomic_inc(&sb->s_active);
        down_write(&sb->s_umount);
-       if (sb->s_frozen) {
+       if (sb->s_writers.frozen != SB_UNFROZEN) {
                deactivate_locked_super(sb);
                return -EBUSY;
        }
@@ -1188,33 +1377,53 @@ int freeze_super(struct super_block *sb)
        }
 
        if (sb->s_flags & MS_RDONLY) {
-               sb->s_frozen = SB_FREEZE_TRANS;
-               smp_wmb();
+               /* Nothing to do really... */
+               sb->s_writers.frozen = SB_FREEZE_COMPLETE;
                up_write(&sb->s_umount);
                return 0;
        }
 
-       sb->s_frozen = SB_FREEZE_WRITE;
+       /* From now on, no new normal writers can start */
+       sb->s_writers.frozen = SB_FREEZE_WRITE;
+       smp_wmb();
+
+       /* Release s_umount to preserve sb_start_write -> s_umount ordering */
+       up_write(&sb->s_umount);
+
+       sb_wait_write(sb, SB_FREEZE_WRITE);
+
+       /* Now we go and block page faults... */
+       down_write(&sb->s_umount);
+       sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
        smp_wmb();
 
+       sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
+
+       /* All writers are done so after syncing there won't be dirty data */
        sync_filesystem(sb);
 
-       sb->s_frozen = SB_FREEZE_TRANS;
+       /* Now wait for internal filesystem counter */
+       sb->s_writers.frozen = SB_FREEZE_FS;
        smp_wmb();
+       sb_wait_write(sb, SB_FREEZE_FS);
 
-       sync_blockdev(sb->s_bdev);
        if (sb->s_op->freeze_fs) {
                ret = sb->s_op->freeze_fs(sb);
                if (ret) {
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
-                       sb->s_frozen = SB_UNFROZEN;
+                       sb->s_writers.frozen = SB_UNFROZEN;
                        smp_wmb();
-                       wake_up(&sb->s_wait_unfrozen);
+                       wake_up(&sb->s_writers.wait_unfrozen);
                        deactivate_locked_super(sb);
                        return ret;
                }
        }
+       /*
+        * This is just for debugging purposes so that fs can warn if it
+        * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+        */
+       sb->s_writers.frozen = SB_FREEZE_COMPLETE;
        up_write(&sb->s_umount);
        return 0;
 }
@@ -1231,7 +1440,7 @@ int thaw_super(struct super_block *sb)
        int error;
 
        down_write(&sb->s_umount);
-       if (sb->s_frozen == SB_UNFROZEN) {
+       if (sb->s_writers.frozen == SB_UNFROZEN) {
                up_write(&sb->s_umount);
                return -EINVAL;
        }
@@ -1244,16 +1453,15 @@ int thaw_super(struct super_block *sb)
                if (error) {
                        printk(KERN_ERR
                                "VFS:Filesystem thaw failed\n");
-                       sb->s_frozen = SB_FREEZE_TRANS;
                        up_write(&sb->s_umount);
                        return error;
                }
        }
 
 out:
-       sb->s_frozen = SB_UNFROZEN;
+       sb->s_writers.frozen = SB_UNFROZEN;
        smp_wmb();
-       wake_up(&sb->s_wait_unfrozen);
+       wake_up(&sb->s_writers.wait_unfrozen);
        deactivate_locked_super(sb);
 
        return 0;
index a4759833d62d3be4067c816c32ef32e434cad5bd..614b2b544880c002cf650b9510302ede56939921 100644 (file)
@@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = 0;
        if (bb->vm_ops->page_mkwrite)
                ret = bb->vm_ops->page_mkwrite(vma, vmf);
+       else
+               file_update_time(file);
 
        sysfs_put_active(attr_sd);
        return ret;
index 15052ff916ec4fbef6059486f251a07ed3105b71..e562dd43f41fe762eb4d49c7814f1f002fe965c9 100644 (file)
@@ -123,6 +123,12 @@ xfs_setfilesize_trans_alloc(
 
        ioend->io_append_trans = tp;
 
+       /*
+        * We will pass freeze protection with a transaction.  So tell lockdep
+        * we released it.
+        */
+       rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                     1, _THIS_IP_);
        /*
         * We hand off the transaction to the completion thread now, so
         * clear the flag here.
@@ -199,6 +205,15 @@ xfs_end_io(
        struct xfs_inode *ip = XFS_I(ioend->io_inode);
        int             error = 0;
 
+       if (ioend->io_append_trans) {
+               /*
+                * We've got freeze protection passed with the transaction.
+                * Tell lockdep about it.
+                */
+               rwsem_acquire_read(
+                       &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                       0, 1, _THIS_IP_);
+       }
        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                ioend->io_error = -EIO;
                goto done;
@@ -1425,6 +1440,9 @@ out_trans_cancel:
        if (ioend->io_append_trans) {
                current_set_flags_nested(&ioend->io_append_trans->t_pflags,
                                         PF_FSTRANS);
+               rwsem_acquire_read(
+                       &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                       0, 1, _THIS_IP_);
                xfs_trans_cancel(ioend->io_append_trans, 0);
        }
 out_destroy_ioend:
index c4559c6e6f2cffbf9783f035dbcdd6e1b39fdcdf..56afcdb2377d57c03bce9dc40b78c991184735a1 100644 (file)
@@ -770,10 +770,12 @@ xfs_file_aio_write(
        if (ocount == 0)
                return 0;
 
-       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+       sb_start_write(inode->i_sb);
 
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               ret = -EIO;
+               goto out;
+       }
 
        if (unlikely(file->f_flags & O_DIRECT))
                ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
@@ -792,6 +794,8 @@ xfs_file_aio_write(
                        ret = err;
        }
 
+out:
+       sb_end_write(inode->i_sb);
        return ret;
 }
 
index 1f1535d25a9bb726b96742fa82c59dec74183f51..0e0232c3b6d98039eb2b73fb556faec1b9069016 100644 (file)
@@ -364,9 +364,15 @@ xfs_fssetdm_by_handle(
        if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
                return -XFS_ERROR(EFAULT);
 
+       error = mnt_want_write_file(parfilp);
+       if (error)
+               return error;
+
        dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
+       if (IS_ERR(dentry)) {
+               mnt_drop_write_file(parfilp);
                return PTR_ERR(dentry);
+       }
 
        if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
                error = -XFS_ERROR(EPERM);
@@ -382,6 +388,7 @@ xfs_fssetdm_by_handle(
                                 fsd.fsd_dmstate);
 
  out:
+       mnt_drop_write_file(parfilp);
        dput(dentry);
        return error;
 }
@@ -634,7 +641,11 @@ xfs_ioc_space(
        if (ioflags & IO_INVIS)
                attr_flags |= XFS_ATTR_DMI;
 
+       error = mnt_want_write_file(filp);
+       if (error)
+               return error;
        error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+       mnt_drop_write_file(filp);
        return -error;
 }
 
@@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr(
 {
        struct fsxattr          fa;
        unsigned int            mask;
+       int error;
 
        if (copy_from_user(&fa, arg, sizeof(fa)))
                return -EFAULT;
@@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr(
        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
                mask |= FSX_NONBLOCK;
 
-       return -xfs_ioctl_setattr(ip, &fa, mask);
+       error = mnt_want_write_file(filp);
+       if (error)
+               return error;
+       error = xfs_ioctl_setattr(ip, &fa, mask);
+       mnt_drop_write_file(filp);
+       return -error;
 }
 
 STATIC int
@@ -1196,6 +1213,7 @@ xfs_ioc_setxflags(
        struct fsxattr          fa;
        unsigned int            flags;
        unsigned int            mask;
+       int error;
 
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
@@ -1210,7 +1228,12 @@ xfs_ioc_setxflags(
                mask |= FSX_NONBLOCK;
        fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
 
-       return -xfs_ioctl_setattr(ip, &fa, mask);
+       error = mnt_want_write_file(filp);
+       if (error)
+               return error;
+       error = xfs_ioctl_setattr(ip, &fa, mask);
+       mnt_drop_write_file(filp);
+       return -error;
 }
 
 STATIC int
@@ -1385,8 +1408,13 @@ xfs_file_ioctl(
                if (copy_from_user(&dmi, arg, sizeof(dmi)))
                        return -XFS_ERROR(EFAULT);
 
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
+
                error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
                                dmi.fsd_dmstate);
+               mnt_drop_write_file(filp);
                return -error;
        }
 
@@ -1434,7 +1462,11 @@ xfs_file_ioctl(
 
                if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
                        return -XFS_ERROR(EFAULT);
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_swapext(&sxp);
+               mnt_drop_write_file(filp);
                return -error;
        }
 
@@ -1463,9 +1495,14 @@ xfs_file_ioctl(
                if (copy_from_user(&inout, arg, sizeof(inout)))
                        return -XFS_ERROR(EFAULT);
 
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
+
                /* input parameter is passed in resblks field of structure */
                in = inout.resblks;
                error = xfs_reserve_blocks(mp, &in, &inout);
+               mnt_drop_write_file(filp);
                if (error)
                        return -error;
 
@@ -1496,7 +1533,11 @@ xfs_file_ioctl(
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
 
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_growfs_data(mp, &in);
+               mnt_drop_write_file(filp);
                return -error;
        }
 
@@ -1506,7 +1547,11 @@ xfs_file_ioctl(
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
 
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_growfs_log(mp, &in);
+               mnt_drop_write_file(filp);
                return -error;
        }
 
@@ -1516,7 +1561,11 @@ xfs_file_ioctl(
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
 
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_growfs_rt(mp, &in);
+               mnt_drop_write_file(filp);
                return -error;
        }
 
index c4f2da0d2bf5ef1ec4f04aafe1a58034d5255375..1244274a56741404b36e0afc4cfc7d5d7f043ac7 100644 (file)
@@ -600,7 +600,11 @@ xfs_file_compat_ioctl(
 
                if (xfs_compat_growfs_data_copyin(&in, arg))
                        return -XFS_ERROR(EFAULT);
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_growfs_data(mp, &in);
+               mnt_drop_write_file(filp);
                return -error;
        }
        case XFS_IOC_FSGROWFSRT_32: {
@@ -608,7 +612,11 @@ xfs_file_compat_ioctl(
 
                if (xfs_compat_growfs_rt_copyin(&in, arg))
                        return -XFS_ERROR(EFAULT);
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_growfs_rt(mp, &in);
+               mnt_drop_write_file(filp);
                return -error;
        }
 #endif
@@ -627,7 +635,11 @@ xfs_file_compat_ioctl(
                                   offsetof(struct xfs_swapext, sx_stat)) ||
                    xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
                        return -XFS_ERROR(EFAULT);
+               error = mnt_want_write_file(filp);
+               if (error)
+                       return error;
                error = xfs_swapext(&sxp);
+               mnt_drop_write_file(filp);
                return -error;
        }
        case XFS_IOC_FSBULKSTAT_32:
index 915edf6639f0267a1a34b145c868d2023e111daf..973dff6ad93526292871d3452d758e86b3f077ac 100644 (file)
@@ -680,9 +680,9 @@ xfs_iomap_write_unwritten(
                 * the same inode that we complete here and might deadlock
                 * on the iolock.
                 */
-               xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
+               sb_start_intwrite(mp->m_super);
                tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
-               tp->t_flags |= XFS_TRANS_RESERVE;
+               tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
                error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
index 711ca51ca3d70b61d2abae52abcc4b4a29fdde02..29c2f83d4147c6824f8e8b67f5a9b4968c80c92b 100644 (file)
@@ -1551,7 +1551,7 @@ xfs_unmountfs(
 int
 xfs_fs_writable(xfs_mount_t *mp)
 {
-       return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) ||
+       return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) ||
                (mp->m_flags & XFS_MOUNT_RDONLY));
 }
 
index 8724336a9a08ce2528ffd3bb521f89ace2c97428..05a05a7b611963f03d0249a97cb5edbc50c152a6 100644 (file)
@@ -311,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 #define SHUTDOWN_REMOTE_REQ    0x0010  /* shutdown came from remote cell */
 #define SHUTDOWN_DEVICE_REQ    0x0020  /* failed all paths to the device */
 
-#define xfs_test_for_freeze(mp)                ((mp)->m_super->s_frozen)
-#define xfs_wait_for_freeze(mp,l)      vfs_check_frozen((mp)->m_super, (l))
-
 /*
  * Flags for xfs_mountfs
  */
index 97304f10e78a4970516e3d235ab8d1585c94d40b..96548176db80c1ee4557190e5d21d32d8c347de3 100644 (file)
@@ -403,7 +403,7 @@ xfs_sync_worker(
        if (!(mp->m_super->s_flags & MS_ACTIVE) &&
            !(mp->m_flags & XFS_MOUNT_RDONLY)) {
                /* dgc: errors ignored here */
-               if (mp->m_super->s_frozen == SB_UNFROZEN &&
+               if (mp->m_super->s_writers.frozen == SB_UNFROZEN &&
                    xfs_log_need_covered(mp))
                        error = xfs_fs_log_dummy(mp);
                else
index fdf324508c5ee467c6055f0866e1be88c387942d..06ed520a767f99f06c43cd7cb30055d009b87ebd 100644 (file)
@@ -576,8 +576,12 @@ xfs_trans_alloc(
        xfs_mount_t     *mp,
        uint            type)
 {
-       xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
-       return _xfs_trans_alloc(mp, type, KM_SLEEP);
+       xfs_trans_t     *tp;
+
+       sb_start_intwrite(mp->m_super);
+       tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
+       tp->t_flags |= XFS_TRANS_FREEZE_PROT;
+       return tp;
 }
 
 xfs_trans_t *
@@ -588,6 +592,7 @@ _xfs_trans_alloc(
 {
        xfs_trans_t     *tp;
 
+       WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
        atomic_inc(&mp->m_active_trans);
 
        tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
@@ -611,6 +616,8 @@ xfs_trans_free(
        xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
        atomic_dec(&tp->t_mountp->m_active_trans);
+       if (tp->t_flags & XFS_TRANS_FREEZE_PROT)
+               sb_end_intwrite(tp->t_mountp->m_super);
        xfs_trans_free_dqinfo(tp);
        kmem_zone_free(xfs_trans_zone, tp);
 }
@@ -643,7 +650,11 @@ xfs_trans_dup(
        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
        ASSERT(tp->t_ticket != NULL);
 
-       ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
+       ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
+                      (tp->t_flags & XFS_TRANS_RESERVE) |
+                      (tp->t_flags & XFS_TRANS_FREEZE_PROT);
+       /* We gave our writer reference to the new transaction */
+       tp->t_flags &= ~XFS_TRANS_FREEZE_PROT;
        ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
        ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
        tp->t_blk_res = tp->t_blk_res_used;
index bc2afd52a0b7eed5d3e039c7474f9dcac3ed41cb..db056544cbb5ecaa2360d9ac1319f171fbbe1e27 100644 (file)
@@ -179,6 +179,8 @@ struct xfs_log_item_desc {
 #define        XFS_TRANS_SYNC          0x08    /* make commit synchronous */
 #define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
 #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
+#define XFS_TRANS_FREEZE_PROT  0x40    /* Transaction has elevated writer
+                                          count in superblock */
 
 /*
  * Values for call flags parameter.
index 22f292a917a3f6c13eb4353c50ec9cf95acffa3f..36abf2aa7e680e24afe8c52b87ccf8f88707ee17 100644 (file)
 #define AUDIT_LAST_KERN_ANOM_MSG    1799
 #define AUDIT_ANOM_PROMISCUOUS      1700 /* Device changed promiscuous mode */
 #define AUDIT_ANOM_ABEND            1701 /* Process ended abnormally */
+#define AUDIT_ANOM_LINK                    1702 /* Suspicious use of file links */
 #define AUDIT_INTEGRITY_DATA       1800 /* Data integrity verification */
 #define AUDIT_INTEGRITY_METADATA    1801 /* Metadata integrity verification */
 #define AUDIT_INTEGRITY_STATUS     1802 /* Integrity enable status */
@@ -687,6 +688,8 @@ extern void             audit_log_d_path(struct audit_buffer *ab,
                                             const struct path *path);
 extern void                audit_log_key(struct audit_buffer *ab,
                                          char *key);
+extern void                audit_log_link_denied(const char *operation,
+                                                 struct path *link);
 extern void                audit_log_lost(const char *message);
 #ifdef CONFIG_SECURITY
 extern void                audit_log_secctx(struct audit_buffer *ab, u32 secid);
@@ -716,6 +719,7 @@ extern int audit_enabled;
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_d_path(b, p, d) do { ; } while (0)
 #define audit_log_key(b, k) do { ; } while (0)
+#define audit_log_link_denied(o, l) do { ; } while (0)
 #define audit_log_secctx(b,s) do { ; } while (0)
 #define audit_enabled 0
 #endif
index 4ba5c871552352deb7644c5607287d632c49fbe1..38dba16c417646ff28f9ea78dea28a268d65d125 100644 (file)
@@ -414,6 +414,7 @@ struct inodes_stat_t {
 #include <linux/shrinker.h>
 #include <linux/migrate_mode.h>
 #include <linux/uidgid.h>
+#include <linux/lockdep.h>
 
 #include <asm/byteorder.h>
 
@@ -440,6 +441,8 @@ extern unsigned long get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
+extern int sysctl_protected_symlinks;
+extern int sysctl_protected_hardlinks;
 
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -1445,6 +1448,8 @@ extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
 
+struct mm_struct;
+
 /*
  *     Umount options
  */
@@ -1458,6 +1463,31 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
+/* Possible states of 'frozen' field */
+enum {
+       SB_UNFROZEN = 0,                /* FS is unfrozen */
+       SB_FREEZE_WRITE = 1,            /* Writes, dir ops, ioctls frozen */
+       SB_FREEZE_PAGEFAULT = 2,        /* Page faults stopped as well */
+       SB_FREEZE_FS = 3,               /* For internal FS use (e.g. to stop
+                                        * internal threads if needed) */
+       SB_FREEZE_COMPLETE = 4,         /* ->freeze_fs finished successfully */
+};
+
+#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
+
+struct sb_writers {
+       /* Counters for counting writers at each level */
+       struct percpu_counter   counter[SB_FREEZE_LEVELS];
+       wait_queue_head_t       wait;           /* queue for waiting for
+                                                  writers / faults to finish */
+       int                     frozen;         /* Is sb frozen? */
+       wait_queue_head_t       wait_unfrozen;  /* queue for waiting for
+                                                  sb to be thawed */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map      lock_map[SB_FREEZE_LEVELS];
+#endif
+};
+
 struct super_block {
        struct list_head        s_list;         /* Keep this first */
        dev_t                   s_dev;          /* search index; _not_ kdev_t */
@@ -1505,8 +1535,7 @@ struct super_block {
        struct hlist_node       s_instances;
        struct quota_info       s_dquot;        /* Diskquota specific options */
 
-       int                     s_frozen;
-       wait_queue_head_t       s_wait_unfrozen;
+       struct sb_writers       s_writers;
 
        char s_id[32];                          /* Informational name */
        u8 s_uuid[16];                          /* UUID */
@@ -1561,14 +1590,117 @@ extern struct timespec current_fs_time(struct super_block *sb);
 /*
  * Snapshotting support.
  */
-enum {
-       SB_UNFROZEN = 0,
-       SB_FREEZE_WRITE = 1,
-       SB_FREEZE_TRANS = 2,
-};
 
-#define vfs_check_frozen(sb, level) \
-       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
+void __sb_end_write(struct super_block *sb, int level);
+int __sb_start_write(struct super_block *sb, int level, bool wait);
+
+/**
+ * sb_end_write - drop write access to a superblock
+ * @sb: the super we wrote to
+ *
+ * Decrement number of writers to the filesystem. Wake up possible waiters
+ * wanting to freeze the filesystem.
+ */
+static inline void sb_end_write(struct super_block *sb)
+{
+       __sb_end_write(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_end_pagefault - drop write access to a superblock from a page fault
+ * @sb: the super we wrote to
+ *
+ * Decrement number of processes handling write page fault to the filesystem.
+ * Wake up possible waiters wanting to freeze the filesystem.
+ */
+static inline void sb_end_pagefault(struct super_block *sb)
+{
+       __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
+}
+
+/**
+ * sb_end_intwrite - drop write access to a superblock for internal fs purposes
+ * @sb: the super we wrote to
+ *
+ * Decrement fs-internal number of writers to the filesystem.  Wake up possible
+ * waiters wanting to freeze the filesystem.
+ */
+static inline void sb_end_intwrite(struct super_block *sb)
+{
+       __sb_end_write(sb, SB_FREEZE_FS);
+}
+
+/**
+ * sb_start_write - get write access to a superblock
+ * @sb: the super we write to
+ *
+ * When a process wants to write data or metadata to a file system (i.e. dirty
+ * a page or an inode), it should embed the operation in a sb_start_write() -
+ * sb_end_write() pair to get exclusion against file system freezing. This
+ * function increments number of writers preventing freezing. If the file
+ * system is already frozen, the function waits until the file system is
+ * thawed.
+ *
+ * Since freeze protection behaves as a lock, users have to preserve
+ * ordering of freeze protection and other filesystem locks. Generally,
+ * freeze protection should be the outermost lock. In particular, we have:
+ *
+ * sb_start_write
+ *   -> i_mutex                        (write path, truncate, directory ops, ...)
+ *   -> s_umount               (freeze_super, thaw_super)
+ */
+static inline void sb_start_write(struct super_block *sb)
+{
+       __sb_start_write(sb, SB_FREEZE_WRITE, true);
+}
+
+static inline int sb_start_write_trylock(struct super_block *sb)
+{
+       return __sb_start_write(sb, SB_FREEZE_WRITE, false);
+}
+
+/**
+ * sb_start_pagefault - get write access to a superblock from a page fault
+ * @sb: the super we write to
+ *
+ * When a process starts handling write page fault, it should embed the
+ * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
+ * exclusion against file system freezing. This is needed since the page fault
+ * is going to dirty a page. This function increments number of running page
+ * faults preventing freezing. If the file system is already frozen, the
+ * function waits until the file system is thawed.
+ *
+ * Since page fault freeze protection behaves as a lock, users have to preserve
+ * ordering of freeze protection and other filesystem locks. It is advised to
+ * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
+ * handling code implies lock dependency:
+ *
+ * mmap_sem
+ *   -> sb_start_pagefault
+ */
+static inline void sb_start_pagefault(struct super_block *sb)
+{
+       __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
+}
+
+/*
+ * sb_start_intwrite - get write access to a superblock for internal fs purposes
+ * @sb: the super we write to
+ *
+ * This is the third level of protection against filesystem freezing. It is
+ * free for use by a filesystem. The only requirement is that it must rank
+ * below sb_start_pagefault.
+ *
+ * For example filesystem can call sb_start_intwrite() when starting a
+ * transaction which somewhat eases handling of freezing for internal sources
+ * of filesystem changes (internal fs threads, discarding preallocation on file
+ * close, etc.).
+ */
+static inline void sb_start_intwrite(struct super_block *sb)
+{
+       __sb_start_write(sb, SB_FREEZE_FS, true);
+}
+
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
@@ -1892,6 +2024,7 @@ struct file_system_type {
        struct lock_class_key s_lock_key;
        struct lock_class_key s_umount_key;
        struct lock_class_key s_vfs_rename_key;
+       struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
 
        struct lock_class_key i_lock_key;
        struct lock_class_key i_mutex_key;
@@ -2334,9 +2467,6 @@ static inline void i_readcount_inc(struct inode *inode)
 }
 #endif
 extern int do_pipe_flags(int *, int);
-extern struct file *create_read_pipe(struct file *f, int flags);
-extern struct file *create_write_pipe(int flags);
-extern void free_write_pipe(struct file *);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern struct file * open_exec(const char *);
index bd079a1b0fdca27cf896e86f7ffb212302f6be73..311be906b57d8498d5c3529d0e42de0996a0c1ec 100644 (file)
@@ -1441,6 +1441,7 @@ extern void truncate_inode_pages_range(struct address_space *,
 
 /* generic vm_area_ops exported for stackable file systems */
 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
index d2ef8b34b96722078c5ab7a3914579d5b50a8ca6..4bf19d8174ed49301470eff1c6593ad1192a6e31 100644 (file)
@@ -67,6 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, int);
 extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
+extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
                           const char *, unsigned int, struct path *);
index ce4743a260152724e8609fb96883177ad92c4875..fa63048fecff82e8e5849c1487d583c49194313c 100644 (file)
@@ -143,6 +143,7 @@ typedef struct svc_fh {
        int                     fh_maxsize;     /* max size for fh_handle */
 
        unsigned char           fh_locked;      /* inode locked by us */
+       unsigned char           fh_want_write;  /* remount protection taken */
 
 #ifdef CONFIG_NFSD_V3
        unsigned char           fh_post_saved;  /* post-op attrs saved */
index e11d1c0fc60fc582ef2fd988642d61796566df4c..ad1a427b5267b3faef96444289eacbaf62880f75 100644 (file)
@@ -160,4 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
 struct pipe_inode_info *get_pipe_info(struct file *file);
 
+int create_pipe_files(struct file **, int);
+
 #endif
index 4a3f28d2ca65239db79eee58b0e85366aa98669f..ea3b7b6191c7af3347dce055a88af200c55d1f5d 100644 (file)
@@ -1455,6 +1455,27 @@ void audit_log_key(struct audit_buffer *ab, char *key)
                audit_log_format(ab, "(null)");
 }
 
+/**
+ * audit_log_link_denied - report a link restriction denial
+ * @operation: specific link opreation
+ * @link: the path that triggered the restriction
+ */
+void audit_log_link_denied(const char *operation, struct path *link)
+{
+       struct audit_buffer *ab;
+
+       ab = audit_log_start(current->audit_context, GFP_KERNEL,
+                            AUDIT_ANOM_LINK);
+       audit_log_format(ab, "op=%s action=denied", operation);
+       audit_log_format(ab, " pid=%d comm=", current->pid);
+       audit_log_untrustedstring(ab, current->comm);
+       audit_log_d_path(ab, " path=", link);
+       audit_log_format(ab, " dev=");
+       audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id);
+       audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino);
+       audit_log_end(ab);
+}
+
 /**
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
index 6502d35a25bafb9de258d5f9946c77648c55f368..87174ef59161eb32ce298f5ef9c2b0e9feb1b3f1 100644 (file)
@@ -1497,6 +1497,24 @@ static struct ctl_table fs_table[] = {
        },
 #endif
 #endif
+       {
+               .procname       = "protected_symlinks",
+               .data           = &sysctl_protected_symlinks,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .procname       = "protected_hardlinks",
+               .data           = &sysctl_protected_hardlinks,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        {
                .procname       = "suid_dumpable",
                .data           = &suid_dumpable,
index f8a3f1a829b8a9d767bed39c5138c7015380196b..ba6085d9c7411f33bbb6aa0077423e822ad1c3dc 100644 (file)
@@ -12,7 +12,7 @@
 
 #ifdef CONFIG_HOTPLUG_CPU
 static LIST_HEAD(percpu_counters);
-static DEFINE_MUTEX(percpu_counters_lock);
+static DEFINE_SPINLOCK(percpu_counters_lock);
 #endif
 
 #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
@@ -123,9 +123,9 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 
 #ifdef CONFIG_HOTPLUG_CPU
        INIT_LIST_HEAD(&fbc->list);
-       mutex_lock(&percpu_counters_lock);
+       spin_lock(&percpu_counters_lock);
        list_add(&fbc->list, &percpu_counters);
-       mutex_unlock(&percpu_counters_lock);
+       spin_unlock(&percpu_counters_lock);
 #endif
        return 0;
 }
@@ -139,9 +139,9 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
        debug_percpu_counter_deactivate(fbc);
 
 #ifdef CONFIG_HOTPLUG_CPU
-       mutex_lock(&percpu_counters_lock);
+       spin_lock(&percpu_counters_lock);
        list_del(&fbc->list);
-       mutex_unlock(&percpu_counters_lock);
+       spin_unlock(&percpu_counters_lock);
 #endif
        free_percpu(fbc->counters);
        fbc->counters = NULL;
@@ -170,7 +170,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
                return NOTIFY_OK;
 
        cpu = (unsigned long)hcpu;
-       mutex_lock(&percpu_counters_lock);
+       spin_lock(&percpu_counters_lock);
        list_for_each_entry(fbc, &percpu_counters, list) {
                s32 *pcount;
                unsigned long flags;
@@ -181,7 +181,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
                *pcount = 0;
                raw_spin_unlock_irqrestore(&fbc->lock, flags);
        }
-       mutex_unlock(&percpu_counters_lock);
+       spin_unlock(&percpu_counters_lock);
 #endif
        return NOTIFY_OK;
 }
index a4a5260b0279b77b37738540b1e8c24fb446a3e5..fa5ca304148e7b4756bf2e52cd6afde1dba2b5c3 100644 (file)
@@ -1712,8 +1712,35 @@ page_not_uptodate:
 }
 EXPORT_SYMBOL(filemap_fault);
 
+int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct page *page = vmf->page;
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       int ret = VM_FAULT_LOCKED;
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       lock_page(page);
+       if (page->mapping != inode->i_mapping) {
+               unlock_page(page);
+               ret = VM_FAULT_NOPAGE;
+               goto out;
+       }
+       /*
+        * We mark the page dirty already here so that when freeze is in
+        * progress, we are guaranteed that writeback during freezing will
+        * see the dirty page and writeprotect it again.
+        */
+       set_page_dirty(page);
+out:
+       sb_end_pagefault(inode->i_sb);
+       return ret;
+}
+EXPORT_SYMBOL(filemap_page_mkwrite);
+
 const struct vm_operations_struct generic_file_vm_ops = {
        .fault          = filemap_fault,
+       .page_mkwrite   = filemap_page_mkwrite,
 };
 
 /* This is used for a general mmap of a disk file */
@@ -2407,8 +2434,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        count = ocount;
        pos = *ppos;
 
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
        written = 0;
@@ -2507,6 +2532,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
        BUG_ON(iocb->ki_pos != pos);
 
+       sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
        blk_start_plug(&plug);
        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
@@ -2520,6 +2546,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                        ret = err;
        }
        blk_finish_plug(&plug);
+       sb_end_write(inode->i_sb);
        return ret;
 }
 EXPORT_SYMBOL(generic_file_aio_write);
index 213ca1f5340980e1ce6fad8d4f12e50858d61397..13e013b1270c6c240ba19eadb6e091e8c6afb13d 100644 (file)
@@ -304,6 +304,7 @@ out:
 
 static const struct vm_operations_struct xip_file_vm_ops = {
        .fault  = xip_file_fault,
+       .page_mkwrite   = filemap_page_mkwrite,
 };
 
 int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
@@ -401,6 +402,8 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
        loff_t pos;
        ssize_t ret;
 
+       sb_start_write(inode->i_sb);
+
        mutex_lock(&inode->i_mutex);
 
        if (!access_ok(VERIFY_READ, buf, len)) {
@@ -411,8 +414,6 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
        pos = *ppos;
        count = len;
 
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
 
@@ -436,6 +437,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
        current->backing_dev_info = NULL;
  out_up:
        mutex_unlock(&inode->i_mutex);
+       sb_end_write(inode->i_sb);
        return ret;
 }
 EXPORT_SYMBOL_GPL(xip_file_write);
index 482f089765ff3a5fae87e8491c5361e893fb688a..57361708d1a57d7bc11c8f34d269a35c50317dbb 100644 (file)
@@ -2650,6 +2650,9 @@ reuse:
                if (!page_mkwrite) {
                        wait_on_page_locked(dirty_page);
                        set_page_dirty_balance(dirty_page, page_mkwrite);
+                       /* file_update_time outside page_lock */
+                       if (vma->vm_file)
+                               file_update_time(vma->vm_file);
                }
                put_page(dirty_page);
                if (page_mkwrite) {
@@ -2667,10 +2670,6 @@ reuse:
                        }
                }
 
-               /* file_update_time outside page_lock */
-               if (vma->vm_file)
-                       file_update_time(vma->vm_file);
-
                return ret;
        }
 
@@ -3339,12 +3338,13 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
        if (dirty_page) {
                struct address_space *mapping = page->mapping;
+               int dirtied = 0;
 
                if (set_page_dirty(dirty_page))
-                       page_mkwrite = 1;
+                       dirtied = 1;
                unlock_page(dirty_page);
                put_page(dirty_page);
-               if (page_mkwrite && mapping) {
+               if ((dirtied || page_mkwrite) && mapping) {
                        /*
                         * Some device drivers do not set page.mapping but still
                         * dirty their pages
@@ -3353,7 +3353,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                }
 
                /* file_update_time outside page_lock */
-               if (vma->vm_file)
+               if (vma->vm_file && !page_mkwrite)
                        file_update_time(vma->vm_file);
        } else {
                unlock_page(vmf.page);
index 79981d97bc9c013904062b1bedfd1ebcae97f845..e4768c180da237c176e2e9491091a94420f4febe 100644 (file)
@@ -823,6 +823,34 @@ fail:
        return NULL;
 }
 
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+{
+       struct dentry *dentry;
+       struct path path;
+       int err = 0;
+       /*
+        * Get the parent directory, calculate the hash for last
+        * component.
+        */
+       dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+       err = PTR_ERR(dentry);
+       if (IS_ERR(dentry))
+               return err;
+
+       /*
+        * All right, let's create it.
+        */
+       err = security_path_mknod(&path, dentry, mode, 0);
+       if (!err) {
+               err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+               if (!err) {
+                       res->mnt = mntget(path.mnt);
+                       res->dentry = dget(dentry);
+               }
+       }
+       done_path_create(&path, dentry);
+       return err;
+}
 
 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -831,8 +859,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct unix_sock *u = unix_sk(sk);
        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
        char *sun_path = sunaddr->sun_path;
-       struct dentry *dentry = NULL;
-       struct path path;
        int err;
        unsigned int hash;
        struct unix_address *addr;
@@ -869,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        atomic_set(&addr->refcnt, 1);
 
        if (sun_path[0]) {
-               umode_t mode;
-               err = 0;
-               /*
-                * Get the parent directory, calculate the hash for last
-                * component.
-                */
-               dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-               err = PTR_ERR(dentry);
-               if (IS_ERR(dentry))
-                       goto out_mknod_parent;
-
-               /*
-                * All right, let's create it.
-                */
-               mode = S_IFSOCK |
+               struct path path;
+               umode_t mode = S_IFSOCK |
                       (SOCK_INODE(sock)->i_mode & ~current_umask());
-               err = mnt_want_write(path.mnt);
-               if (err)
-                       goto out_mknod_dput;
-               err = security_path_mknod(&path, dentry, mode, 0);
-               if (err)
-                       goto out_mknod_drop_write;
-               err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
-out_mknod_drop_write:
-               mnt_drop_write(path.mnt);
-               if (err)
-                       goto out_mknod_dput;
-               mutex_unlock(&path.dentry->d_inode->i_mutex);
-               dput(path.dentry);
-               path.dentry = dentry;
-
+               err = unix_mknod(sun_path, mode, &path);
+               if (err) {
+                       if (err == -EEXIST)
+                               err = -EADDRINUSE;
+                       unix_release_addr(addr);
+                       goto out_up;
+               }
                addr->hash = UNIX_HASH_SIZE;
-       }
-
-       spin_lock(&unix_table_lock);
-
-       if (!sun_path[0]) {
+               hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+               spin_lock(&unix_table_lock);
+               u->path = path;
+               list = &unix_socket_table[hash];
+       } else {
+               spin_lock(&unix_table_lock);
                err = -EADDRINUSE;
                if (__unix_find_socket_byname(net, sunaddr, addr_len,
                                              sk->sk_type, hash)) {
@@ -914,9 +920,6 @@ out_mknod_drop_write:
                }
 
                list = &unix_socket_table[addr->hash];
-       } else {
-               list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
-               u->path = path;
        }
 
        err = 0;
@@ -930,16 +933,6 @@ out_up:
        mutex_unlock(&u->readlock);
 out:
        return err;
-
-out_mknod_dput:
-       dput(dentry);
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
-       path_put(&path);
-out_mknod_parent:
-       if (err == -EEXIST)
-               err = -EADDRINUSE;
-       unix_release_addr(addr);
-       goto out_up;
 }
 
 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
index 7e96249536b4e092ac52115efa547bacc8f86cc7..37711a5d0d6b174b015e646effad1082adea481a 100644 (file)
@@ -23,14 +23,14 @@ static int do_mod_firmware_load(const char *fn, char **fp)
        if (l <= 0 || l > 131072)
        {
                printk(KERN_INFO "Invalid firmware '%s'\n", fn);
-               filp_close(filp, current->files);
+               filp_close(filp, NULL);
                return 0;
        }
        dp = vmalloc(l);
        if (dp == NULL)
        {
                printk(KERN_INFO "Out of memory loading '%s'.\n", fn);
-               filp_close(filp, current->files);
+               filp_close(filp, NULL);
                return 0;
        }
        pos = 0;
@@ -38,10 +38,10 @@ static int do_mod_firmware_load(const char *fn, char **fp)
        {
                printk(KERN_INFO "Failed to read '%s'.\n", fn);
                vfree(dp);
-               filp_close(filp, current->files);
+               filp_close(filp, NULL);
                return 0;
        }
-       filp_close(filp, current->files);
+       filp_close(filp, NULL);
        *fp = dp;
        return (int) l;
 }