Merge tag 'v3.12'

author Eric Paris <eparis@redhat.com>

Fri, 22 Nov 2013 23:57:08 +0000 (18:57 -0500)

committer Eric Paris <eparis@redhat.com>

Fri, 22 Nov 2013 23:57:54 +0000 (18:57 -0500)
author Eric Paris <eparis@redhat.com>
Fri, 22 Nov 2013 23:57:08 +0000 (18:57 -0500)
committer Eric Paris <eparis@redhat.com>
Fri, 22 Nov 2013 23:57:54 +0000 (18:57 -0500)
diff --combined fs/exec.c

index c5c24f2fc44ad604b1ada2438fa1fa2158384321,8875dd10ae7ac77444db95e33c9fde83dde67512..47d7edb9bf029441c9653f41be56cf67fd16d99b
--- 1/fs/exec.c
--- 2/fs/exec.c
+++ b/fs/exec.c
@@@ -74,6 -74,8 +74,8 @@@ static DEFINE_RWLOCK(binfmt_lock)
   void __register_binfmt(struct linux_binfmt * fmt, int insert)
   {
         BUG_ON(!fmt);
+       if (WARN_ON(!fmt->load_binary))
+               return;
         write_lock(&binfmt_lock);
         insert ? list_add(&fmt->lh, &formats) :
                  list_add_tail(&fmt->lh, &formats);
@@@ -266,7 -268,7 +268,7 @@@ static int __bprm_mm_init(struct linux_
         BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
         vma->vm_end = STACK_TOP_MAX;
         vma->vm_start = vma->vm_end - PAGE_SIZE;
-       vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
+       vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
         vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
         INIT_LIST_HEAD(&vma->anon_vma_chain);
   
@@@ -1365,90 -1367,89 +1367,86 @@@ out
   }
   EXPORT_SYMBOL(remove_arg_zero);
   
+ #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
   /*
    * cycle the list of binary formats handler, until one recognizes the image
    */
   int search_binary_handler(struct linux_binprm *bprm)
   {
-       unsigned int depth = bprm->recursion_depth;
-       int try,retval;
+       bool need_retry = IS_ENABLED(CONFIG_MODULES);
         struct linux_binfmt *fmt;
-       pid_t old_pid, old_vpid;
+       int retval;
   
         /* This allows 4 levels of binfmt rewrites before failing hard. */
-       if (depth > 5)
+       if (bprm->recursion_depth > 5)
                 return -ELOOP;
   
         retval = security_bprm_check(bprm);
         if (retval)
                 return retval;
   
- -      retval = audit_bprm(bprm);
- -      if (retval)
- -              return retval;
- -
+       retval = -ENOENT;
+  retry:
+       read_lock(&binfmt_lock);
+       list_for_each_entry(fmt, &formats, lh) {
+               if (!try_module_get(fmt->module))
+                       continue;
+               read_unlock(&binfmt_lock);
+               bprm->recursion_depth++;
+               retval = fmt->load_binary(bprm);
+               bprm->recursion_depth--;
+               if (retval >= 0 || retval != -ENOEXEC ||
+                   bprm->mm == NULL || bprm->file == NULL) {
+                       put_binfmt(fmt);
+                       return retval;
+               }
+               read_lock(&binfmt_lock);
+               put_binfmt(fmt);
+       }
+       read_unlock(&binfmt_lock);
+ 
+       if (need_retry && retval == -ENOEXEC) {
+               if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
+                   printable(bprm->buf[2]) && printable(bprm->buf[3]))
+                       return retval;
+               if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
+                       return retval;
+               need_retry = false;
+               goto retry;
+       }
+ 
+       return retval;
+ }
+ EXPORT_SYMBOL(search_binary_handler);
+ 
+ static int exec_binprm(struct linux_binprm *bprm)
+ {
+       pid_t old_pid, old_vpid;
+       int ret;
+ 
         /* Need to fetch pid before load_binary changes it */
         old_pid = current->pid;
         rcu_read_lock();
         old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
         rcu_read_unlock();
   
-       retval = -ENOENT;
-       for (try=0; try<2; try++) {
-               read_lock(&binfmt_lock);
-               list_for_each_entry(fmt, &formats, lh) {
-                       int (*fn)(struct linux_binprm *) = fmt->load_binary;
-                       if (!fn)
-                               continue;
-                       if (!try_module_get(fmt->module))
-                               continue;
-                       read_unlock(&binfmt_lock);
-                       bprm->recursion_depth = depth + 1;
-                       retval = fn(bprm);
-                       bprm->recursion_depth = depth;
-                       if (retval >= 0) {
-                               if (depth == 0) {
-                                       audit_bprm(bprm);
-                                       trace_sched_process_exec(current, old_pid, bprm);
-                                       ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
-                               }
-                               put_binfmt(fmt);
-                               allow_write_access(bprm->file);
-                               if (bprm->file)
-                                       fput(bprm->file);
-                               bprm->file = NULL;
-                               current->did_exec = 1;
-                               proc_exec_connector(current);
-                               return retval;
-                       }
-                       read_lock(&binfmt_lock);
-                       put_binfmt(fmt);
-                       if (retval != -ENOEXEC || bprm->mm == NULL)
-                               break;
-                       if (!bprm->file) {
-                               read_unlock(&binfmt_lock);
-                               return retval;
-                       }
+       ret = search_binary_handler(bprm);
+       if (ret >= 0) {
++              audit_bprm(bprm);
+               trace_sched_process_exec(current, old_pid, bprm);
+               ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
+               current->did_exec = 1;
+               proc_exec_connector(current);
+ 
+               if (bprm->file) {
+                       allow_write_access(bprm->file);
+                       fput(bprm->file);
+                       bprm->file = NULL; /* to catch use-after-free */
                 }
-               read_unlock(&binfmt_lock);
- #ifdef CONFIG_MODULES
-               if (retval != -ENOEXEC || bprm->mm == NULL) {
-                       break;
-               } else {
- #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
-                       if (printable(bprm->buf[0]) &&
-                           printable(bprm->buf[1]) &&
-                           printable(bprm->buf[2]) &&
-                           printable(bprm->buf[3]))
-                               break; /* -ENOEXEC */
-                       if (try)
-                               break; /* -ENOEXEC */
-                       request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
-               }
- #else
-               break;
- #endif
         }
-       return retval;
- }
   
- EXPORT_SYMBOL(search_binary_handler);
+       return ret;
+ }
   
   /*
    * sys_execve() executes a new program.
@@@ -1538,7 -1539,7 +1536,7 @@@ static int do_execve_common(const char 
         if (retval < 0)
                 goto out;
   
-       retval = search_binary_handler(bprm);
+       retval = exec_binprm(bprm);
         if (retval < 0)
                 goto out;
   
diff --combined fs/namei.c

index df9946e83db44caab4126059c679d2dad8434f44,caa28051e197e898e3c2bc52afce37bcbb284853..23ac50f4ee4086374d256aaf96a8f3f4055d09bd
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -508,56 -508,78 +508,78 @@@ static int unlazy_walk(struct nameidat
   {
         struct fs_struct *fs = current->fs;
         struct dentry *parent = nd->path.dentry;
-       int want_root = 0;
   
         BUG_ON(!(nd->flags & LOOKUP_RCU));
-       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
-               want_root = 1;
-               spin_lock(&fs->lock);
-               if (nd->root.mnt != fs->root.mnt ||
-                               nd->root.dentry != fs->root.dentry)
-                       goto err_root;
-       }
-       spin_lock(&parent->d_lock);
+ 
+       /*
+        * Get a reference to the parent first: we're
+        * going to make "path_put(nd->path)" valid in
+        * non-RCU context for "terminate_walk()".
+        *
+        * If this doesn't work, return immediately with
+        * RCU walking still active (and then we will do
+        * the RCU walk cleanup in terminate_walk()).
+        */
+       if (!lockref_get_not_dead(&parent->d_lockref))
+               return -ECHILD;
+ 
+       /*
+        * After the mntget(), we terminate_walk() will do
+        * the right thing for non-RCU mode, and all our
+        * subsequent exit cases should unlock_rcu_walk()
+        * before returning.
+        */
+       mntget(nd->path.mnt);
+       nd->flags &= ~LOOKUP_RCU;
+ 
+       /*
+        * For a negative lookup, the lookup sequence point is the parents
+        * sequence point, and it only needs to revalidate the parent dentry.
+        *
+        * For a positive lookup, we need to move both the parent and the
+        * dentry from the RCU domain to be properly refcounted. And the
+        * sequence number in the dentry validates *both* dentry counters,
+        * since we checked the sequence number of the parent after we got
+        * the child sequence number. So we know the parent must still
+        * be valid if the child sequence number is still valid.
+        */
         if (!dentry) {
-               if (!__d_rcu_to_refcount(parent, nd->seq))
-                       goto err_parent;
+               if (read_seqcount_retry(&parent->d_seq, nd->seq))
+                       goto out;
                 BUG_ON(nd->inode != parent->d_inode);
         } else {
-               if (dentry->d_parent != parent)
-                       goto err_parent;
-               spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-               if (!__d_rcu_to_refcount(dentry, nd->seq))
-                       goto err_child;
-               /*
-                * If the sequence check on the child dentry passed, then
-                * the child has not been removed from its parent. This
-                * means the parent dentry must be valid and able to take
-                * a reference at this point.
-                */
-               BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
-               BUG_ON(!parent->d_lockref.count);
-               parent->d_lockref.count++;
-               spin_unlock(&dentry->d_lock);
+               if (!lockref_get_not_dead(&dentry->d_lockref))
+                       goto out;
+               if (read_seqcount_retry(&dentry->d_seq, nd->seq))
+                       goto drop_dentry;
         }
-       spin_unlock(&parent->d_lock);
-       if (want_root) {
+ 
+       /*
+        * Sequence counts matched. Now make sure that the root is
+        * still valid and get it if required.
+        */
+       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+               spin_lock(&fs->lock);
+               if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
+                       goto unlock_and_drop_dentry;
                 path_get(&nd->root);
                 spin_unlock(&fs->lock);
         }
-       mntget(nd->path.mnt);
   
         unlock_rcu_walk();
-       nd->flags &= ~LOOKUP_RCU;
         return 0;
   
- err_child:
-       spin_unlock(&dentry->d_lock);
- err_parent:
-       spin_unlock(&parent->d_lock);
- err_root:
-       if (want_root)
-               spin_unlock(&fs->lock);
+ unlock_and_drop_dentry:
+       spin_unlock(&fs->lock);
+ drop_dentry:
+       unlock_rcu_walk();
+       dput(dentry);
+       goto drop_root_mnt;
+ out:
+       unlock_rcu_walk();
+ drop_root_mnt:
+       if (!(nd->flags & LOOKUP_ROOT))
+               nd->root.mnt = NULL;
         return -ECHILD;
   }
   
@@@ -585,14 -607,16 +607,16 @@@ static int complete_walk(struct nameida
                 nd->flags &= ~LOOKUP_RCU;
                 if (!(nd->flags & LOOKUP_ROOT))
                         nd->root.mnt = NULL;
-               spin_lock(&dentry->d_lock);
-               if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
-                       spin_unlock(&dentry->d_lock);
+ 
+               if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
+                       unlock_rcu_walk();
+                       return -ECHILD;
+               }
+               if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
                         unlock_rcu_walk();
+                       dput(dentry);
                         return -ECHILD;
                 }
-               BUG_ON(nd->inode != dentry->d_inode);
-               spin_unlock(&dentry->d_lock);
                 mntget(nd->path.mnt);
                 unlock_rcu_walk();
         }
@@@ -636,29 -660,6 +660,6 @@@ static __always_inline void set_root_rc
         }
   }
   
- static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
- {
-       int ret;
- 
-       if (IS_ERR(link))
-               goto fail;
- 
-       if (*link == '/') {
-               set_root(nd);
-               path_put(&nd->path);
-               nd->path = nd->root;
-               path_get(&nd->root);
-               nd->flags |= LOOKUP_JUMPED;
-       }
-       nd->inode = nd->path.dentry->d_inode;
- 
-       ret = link_path_walk(link, nd);
-       return ret;
- fail:
-       path_put(&nd->path);
-       return PTR_ERR(link);
- }
- 
   static void path_put_conditional(struct path *path, struct nameidata *nd)
   {
         dput(path->dentry);
@@@ -850,7 -851,20 +851,20 @@@ follow_link(struct path *link, struct n
         error = 0;
         s = nd_get_link(nd);
         if (s) {
-               error = __vfs_follow_link(nd, s);
+               if (unlikely(IS_ERR(s))) {
+                       path_put(&nd->path);
+                       put_link(nd, link, *p);
+                       return PTR_ERR(s);
+               }
+               if (*s == '/') {
+                       set_root(nd);
+                       path_put(&nd->path);
+                       nd->path = nd->root;
+                       path_get(&nd->root);
+                       nd->flags |= LOOKUP_JUMPED;
+               }
+               nd->inode = nd->path.dentry->d_inode;
+               error = link_path_walk(s, nd);
                 if (unlikely(error))
                         put_link(nd, link, *p);
         }
@@@ -2184,6 -2198,198 +2198,198 @@@ user_path_parent(int dfd, const char __
         return s;
   }
   
+ /**
+  * mountpoint_last - look up last component for umount
+  * @nd:   pathwalk nameidata - currently pointing at parent directory of "last"
+  * @path: pointer to container for result
+  *
+  * This is a special lookup_last function just for umount. In this case, we
+  * need to resolve the path without doing any revalidation.
+  *
+  * The nameidata should be the result of doing a LOOKUP_PARENT pathwalk. Since
+  * mountpoints are always pinned in the dcache, their ancestors are too. Thus,
+  * in almost all cases, this lookup will be served out of the dcache. The only
+  * cases where it won't are if nd->last refers to a symlink or the path is
+  * bogus and it doesn't exist.
+  *
+  * Returns:
+  * -error: if there was an error during lookup. This includes -ENOENT if the
+  *         lookup found a negative dentry. The nd->path reference will also be
+  *         put in this case.
+  *
+  * 0:      if we successfully resolved nd->path and found it to not to be a
+  *         symlink that needs to be followed. "path" will also be populated.
+  *         The nd->path reference will also be put.
+  *
+  * 1:      if we successfully resolved nd->last and found it to be a symlink
+  *         that needs to be followed. "path" will be populated with the path
+  *         to the link, and nd->path will *not* be put.
+  */
+ static int
+ mountpoint_last(struct nameidata *nd, struct path *path)
+ {
+       int error = 0;
+       struct dentry *dentry;
+       struct dentry *dir = nd->path.dentry;
+ 
+       /* If we're in rcuwalk, drop out of it to handle last component */
+       if (nd->flags & LOOKUP_RCU) {
+               if (unlazy_walk(nd, NULL)) {
+                       error = -ECHILD;
+                       goto out;
+               }
+       }
+ 
+       nd->flags &= ~LOOKUP_PARENT;
+ 
+       if (unlikely(nd->last_type != LAST_NORM)) {
+               error = handle_dots(nd, nd->last_type);
+               if (error)
+                       goto out;
+               dentry = dget(nd->path.dentry);
+               goto done;
+       }
+ 
+       mutex_lock(&dir->d_inode->i_mutex);
+       dentry = d_lookup(dir, &nd->last);
+       if (!dentry) {
+               /*
+                * No cached dentry. Mounted dentries are pinned in the cache,
+                * so that means that this dentry is probably a symlink or the
+                * path doesn't actually point to a mounted dentry.
+                */
+               dentry = d_alloc(dir, &nd->last);
+               if (!dentry) {
+                       error = -ENOMEM;
+                       mutex_unlock(&dir->d_inode->i_mutex);
+                       goto out;
+               }
+               dentry = lookup_real(dir->d_inode, dentry, nd->flags);
+               error = PTR_ERR(dentry);
+               if (IS_ERR(dentry)) {
+                       mutex_unlock(&dir->d_inode->i_mutex);
+                       goto out;
+               }
+       }
+       mutex_unlock(&dir->d_inode->i_mutex);
+ 
+ done:
+       if (!dentry->d_inode) {
+               error = -ENOENT;
+               dput(dentry);
+               goto out;
+       }
+       path->dentry = dentry;
+       path->mnt = mntget(nd->path.mnt);
+       if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+               return 1;
+       follow_mount(path);
+       error = 0;
+ out:
+       terminate_walk(nd);
+       return error;
+ }
+ 
+ /**
+  * path_mountpoint - look up a path to be umounted
+  * @dfd:      directory file descriptor to start walk from
+  * @name:     full pathname to walk
+  * @path:     pointer to container for result
+  * @flags:    lookup flags
+  *
+  * Look up the given name, but don't attempt to revalidate the last component.
+  * Returns 0 and "path" will be valid on success; Returns error otherwise.
+  */
+ static int
+ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+ {
+       struct file *base = NULL;
+       struct nameidata nd;
+       int err;
+ 
+       err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
+       if (unlikely(err))
+               return err;
+ 
+       current->total_link_count = 0;
+       err = link_path_walk(name, &nd);
+       if (err)
+               goto out;
+ 
+       err = mountpoint_last(&nd, path);
+       while (err > 0) {
+               void *cookie;
+               struct path link = *path;
+               err = may_follow_link(&link, &nd);
+               if (unlikely(err))
+                       break;
+               nd.flags |= LOOKUP_PARENT;
+               err = follow_link(&link, &nd, &cookie);
+               if (err)
+                       break;
+               err = mountpoint_last(&nd, path);
+               put_link(&nd, &link, cookie);
+       }
+ out:
+       if (base)
+               fput(base);
+ 
+       if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT))
+               path_put(&nd.root);
+ 
+       return err;
+ }
+ 
+ static int
+ filename_mountpoint(int dfd, struct filename *s, struct path *path,
+                       unsigned int flags)
+ {
+       int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+       if (unlikely(error == -ECHILD))
+               error = path_mountpoint(dfd, s->name, path, flags);
+       if (unlikely(error == -ESTALE))
+               error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+       if (likely(!error))
+               audit_inode(s, path->dentry, 0);
+       return error;
+ }
+ 
+ /**
+  * user_path_mountpoint_at - lookup a path from userland in order to umount it
+  * @dfd:      directory file descriptor
+  * @name:     pathname from userland
+  * @flags:    lookup flags
+  * @path:     pointer to container to hold result
+  *
+  * A umount is a special case for path walking. We're not actually interested
+  * in the inode in this situation, and ESTALE errors can be a problem. We
+  * simply want track down the dentry and vfsmount attached at the mountpoint
+  * and avoid revalidating the last component.
+  *
+  * Returns 0 and populates "path" on success.
+  */
+ int
+ user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags,
+                       struct path *path)
+ {
+       struct filename *s = getname(name);
+       int error;
+       if (IS_ERR(s))
+               return PTR_ERR(s);
+       error = filename_mountpoint(dfd, s, path, flags);
+       putname(s);
+       return error;
+ }
+ 
+ int
+ kern_path_mountpoint(int dfd, const char *name, struct path *path,
+                       unsigned int flags)
+ {
+       struct filename s = {.name = name};
+       return filename_mountpoint(dfd, &s, path, flags);
+ }
+ EXPORT_SYMBOL(kern_path_mountpoint);
+ 
   /*
    * It's inline, so penalty for filesystems that don't use sticky bit is
    * minimal.
@@@ -2262,7 -2468,6 +2468,7 @@@ static int may_delete(struct inode *dir
    */
   static inline int may_create(struct inode *dir, struct dentry *child)
   {
+ +      audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
         if (child->d_inode)
                 return -EEXIST;
         if (IS_DEADDIR(dir))
@@@ -2452,6 -2657,7 +2658,7 @@@ static int atomic_open(struct nameidat
         int acc_mode;
         int create_error = 0;
         struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+       bool excl;
   
         BUG_ON(dentry->d_inode);
   
@@@ -2465,10 -2671,9 +2672,9 @@@
         if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
                 mode &= ~current_umask();
   
-       if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
+       excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT);
+       if (excl)
                 open_flag &= ~O_TRUNC;
-               *opened |= FILE_CREATED;
-       }
   
         /*
          * Checking write permission is tricky, bacuse we don't know if we are
@@@ -2521,12 -2726,6 +2727,6 @@@
                 goto out;
         }
   
-       acc_mode = op->acc_mode;
-       if (*opened & FILE_CREATED) {
-               fsnotify_create(dir, dentry);
-               acc_mode = MAY_OPEN;
-       }
- 
         if (error) {    /* returned 1, that is */
                 if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
                         error = -EIO;
@@@ -2536,9 -2735,19 +2736,19 @@@
                         dput(dentry);
                         dentry = file->f_path.dentry;
                 }
-               if (create_error && dentry->d_inode == NULL) {
-                       error = create_error;
-                       goto out;
+               if (*opened & FILE_CREATED)
+                       fsnotify_create(dir, dentry);
+               if (!dentry->d_inode) {
+                       WARN_ON(*opened & FILE_CREATED);
+                       if (create_error) {
+                               error = create_error;
+                               goto out;
+                       }
+               } else {
+                       if (excl && !(*opened & FILE_CREATED)) {
+                               error = -EEXIST;
+                               goto out;
+                       }
                 }
                 goto looked_up;
         }
@@@ -2547,6 -2756,12 +2757,12 @@@
          * We didn't have the inode before the open, so check open permission
          * here.
          */
+       acc_mode = op->acc_mode;
+       if (*opened & FILE_CREATED) {
+               WARN_ON(!(open_flag & O_CREAT));
+               fsnotify_create(dir, dentry);
+               acc_mode = MAY_OPEN;
+       }
         error = may_open(&file->f_path, acc_mode, open_flag);
         if (error)
                 fput(file);
@@@ -4025,11 -4240,6 +4241,6 @@@ int generic_readlink(struct dentry *den
         return res;
   }
   
- int vfs_follow_link(struct nameidata *nd, const char *link)
- {
-       return __vfs_follow_link(nd, link);
- }
- 
   /* get the link contents into pagecache */
   static char *page_getlink(struct dentry * dentry, struct page **ppage)
   {
@@@ -4141,7 -4351,6 +4352,6 @@@ EXPORT_SYMBOL(vfs_path_lookup)
   EXPORT_SYMBOL(inode_permission);
   EXPORT_SYMBOL(unlock_rename);
   EXPORT_SYMBOL(vfs_create);
- EXPORT_SYMBOL(vfs_follow_link);
   EXPORT_SYMBOL(vfs_link);
   EXPORT_SYMBOL(vfs_mkdir);
   EXPORT_SYMBOL(vfs_mknod);
diff --combined init/Kconfig

index 18a98c893d0783d59618f232356dc4be4e011058,3ecd8a1178f102d832cdf3b4af0a908997ea648b..d8fe0b87e713223b6ace8288de6ee63119266399
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -301,6 -301,20 +301,6 @@@ config AUDIT_TRE
         depends on AUDITSYSCALL
         select FSNOTIFY
   
- -config AUDIT_LOGINUID_IMMUTABLE
- -      bool "Make audit loginuid immutable"
- -      depends on AUDIT
- -      help
- -        The config option toggles if a task setting its loginuid requires
- -        CAP_SYS_AUDITCONTROL or if that task should require no special permissions
- -        but should instead only allow setting its loginuid if it was never
- -        previously set.  On systems which use systemd or a similar central
- -        process to restart login services this should be set to true.  On older
- -        systems in which an admin would typically have to directly stop and
- -        start processes this should be set to false.  Setting this to true allows
- -        one to drop potentially dangerous capabilites from the login tasks,
- -        but may not be backwards compatible with older init systems.
- -
   source "kernel/irq/Kconfig"
   source "kernel/time/Kconfig"
   
@@@ -456,6 -470,7 +456,7 @@@ config TREE_RC
   config TREE_PREEMPT_RCU
         bool "Preemptible tree-based hierarchical RCU"
         depends on PREEMPT
+       select IRQ_WORK
         help
           This option selects the RCU implementation that is
           designed for very large SMP systems with hundreds or
@@@ -513,13 -528,29 +514,29 @@@ config RCU_USER_Q
   config CONTEXT_TRACKING_FORCE
         bool "Force context tracking"
         depends on CONTEXT_TRACKING
-       default CONTEXT_TRACKING
+       default y if !NO_HZ_FULL
         help
-         Probe on user/kernel boundaries by default in order to
-         test the features that rely on it such as userspace RCU extended
-         quiescent states.
-         This test is there for debugging until we have a real user like the
-         full dynticks mode.
+         The major pre-requirement for full dynticks to work is to
+         support the context tracking subsystem. But there are also
+         other dependencies to provide in order to make the full
+         dynticks working.
+ 
+         This option stands for testing when an arch implements the
+         context tracking backend but doesn't yet fullfill all the
+         requirements to make the full dynticks feature working.
+         Without the full dynticks, there is no way to test the support
+         for context tracking and the subsystems that rely on it: RCU
+         userspace extended quiescent state and tickless cputime
+         accounting. This option copes with the absence of the full
+         dynticks subsystem by forcing the context tracking on all
+         CPUs in the system.
+ 
+         Say Y only if you're working on the developpement of an
+         architecture backend for the context tracking.
+ 
+         Say N otherwise, this option brings an overhead that you
+         don't want in production.
+ 
   
   config RCU_FANOUT
         int "Tree-based hierarchical RCU fanout value"
@@@ -1092,7 -1123,6 +1109,6 @@@ config IPC_N
   
   config USER_NS
         bool "User namespace"
-       depends on UIDGID_CONVERTED
         select UIDGID_STRICT_TYPE_CHECKS
   
         default n
@@@ -1126,20 -1156,8 +1142,8 @@@ config NET_N
   
   endif # NAMESPACES
   
- config UIDGID_CONVERTED
-       # True if all of the selected software conmponents are known
-       # to have uid_t and gid_t converted to kuid_t and kgid_t
-       # where appropriate and are otherwise safe to use with
-       # the user namespace.
-       bool
-       default y
- 
-       # Filesystems
-       depends on XFS_FS = n
- 
   config UIDGID_STRICT_TYPE_CHECKS
         bool "Require conversions between uid/gids and their internal representation"
-       depends on UIDGID_CONVERTED
         default n
         help
          While the nececessary conversions are being added to all subsystems this option allows
@@@ -1584,7 -1602,7 +1588,7 @@@ endchoic
   
   config SLUB_CPU_PARTIAL
         default y
-       depends on SLUB
+       depends on SLUB && SMP
         bool "SLUB per cpu partial cache"
         help
           Per cpu partial caches accellerate objects allocation and freeing
@@@ -1652,6 -1670,7 +1656,7 @@@ config BASE_SMAL
   
   menuconfig MODULES
         bool "Enable loadable module support"
+       option modules
         help
           Kernel modules are small pieces of compiled code which can
           be inserted in the running kernel, rather than being
diff --combined kernel/audit.c

index b8831ac25b709ec07d0cf2e893d01994d5240b4e,7b0e23a740ce345987c33f9e012302c24de0f4db..906ae5a0233a1011d558ff47c548808517ef9a03
--- 1/kernel/audit.c
--- 2/kernel/audit.c
+++ b/kernel/audit.c
@@@ -60,6 -60,7 +60,6 @@@
   #ifdef CONFIG_SECURITY
   #include <linux/security.h>
   #endif
- -#include <net/netlink.h>
   #include <linux/freezer.h>
   #include <linux/tty.h>
   #include <linux/pid_namespace.h>
@@@ -139,17 -140,6 +139,17 @@@ static struct task_struct *kauditd_task
   static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
   static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
   
+ +static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
+ +                                 .mask = -1,
+ +                                 .features = 0,
+ +                                 .lock = 0,};
+ +
+ +static char *audit_feature_names[2] = {
+ +      "only_unset_loginuid",
+ +      "loginuid_immutable",
+ +};
+ +
+ +
   /* Serialize requests from userspace. */
   DEFINE_MUTEX(audit_cmd_mutex);
   
@@@ -594,8 -584,6 +594,8 @@@ static int audit_netlink_ok(struct sk_b
                 return -EOPNOTSUPP;
         case AUDIT_GET:
         case AUDIT_SET:
+ +      case AUDIT_GET_FEATURE:
+ +      case AUDIT_SET_FEATURE:
         case AUDIT_LIST_RULES:
         case AUDIT_ADD_RULE:
         case AUDIT_DEL_RULE:
@@@ -625,7 -613,7 +625,7 @@@ static int audit_log_common_recv_msg(st
         int rc = 0;
         uid_t uid = from_kuid(&init_user_ns, current_uid());
   
- -      if (!audit_enabled) {
+ +      if (!audit_enabled && msg_type != AUDIT_USER_AVC) {
                 *ab = NULL;
                 return rc;
         }
@@@ -640,94 -628,6 +640,94 @@@
         return rc;
   }
   
+ +int is_audit_feature_set(int i)
+ +{
+ +      return af.features & AUDIT_FEATURE_TO_MASK(i);
+ +}
+ +
+ +
+ +static int audit_get_feature(struct sk_buff *skb)
+ +{
+ +      u32 seq;
+ +
+ +      seq = nlmsg_hdr(skb)->nlmsg_seq;
+ +
+ +      audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+ +                       &af, sizeof(af));
+ +
+ +      return 0;
+ +}
+ +
+ +static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature,
+ +                                   u32 old_lock, u32 new_lock, int res)
+ +{
+ +      struct audit_buffer *ab;
+ +
+ +      ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+ +      audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d",
+ +                       audit_feature_names[which], !!old_feature, !!new_feature,
+ +                       !!old_lock, !!new_lock, res);
+ +      audit_log_end(ab);
+ +}
+ +
+ +static int audit_set_feature(struct sk_buff *skb)
+ +{
+ +      struct audit_features *uaf;
+ +      int i;
+ +
+ +      BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0]));
+ +      uaf = nlmsg_data(nlmsg_hdr(skb));
+ +
+ +      /* if there is ever a version 2 we should handle that here */
+ +
+ +      for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+ +              u32 feature = AUDIT_FEATURE_TO_MASK(i);
+ +              u32 old_feature, new_feature, old_lock, new_lock;
+ +
+ +              /* if we are not changing this feature, move along */
+ +              if (!(feature & uaf->mask))
+ +                      continue;
+ +
+ +              old_feature = af.features & feature;
+ +              new_feature = uaf->features & feature;
+ +              new_lock = (uaf->lock | af.lock) & feature;
+ +              old_lock = af.lock & feature;
+ +
+ +              /* are we changing a locked feature? */
+ +              if ((af.lock & feature) && (new_feature != old_feature)) {
+ +                      audit_log_feature_change(i, old_feature, new_feature,
+ +                                               old_lock, new_lock, 0);
+ +                      return -EPERM;
+ +              }
+ +      }
+ +      /* nothing invalid, do the changes */
+ +      for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+ +              u32 feature = AUDIT_FEATURE_TO_MASK(i);
+ +              u32 old_feature, new_feature, old_lock, new_lock;
+ +
+ +              /* if we are not changing this feature, move along */
+ +              if (!(feature & uaf->mask))
+ +                      continue;
+ +
+ +              old_feature = af.features & feature;
+ +              new_feature = uaf->features & feature;
+ +              old_lock = af.lock & feature;
+ +              new_lock = (uaf->lock | af.lock) & feature;
+ +
+ +              if (new_feature != old_feature)
+ +                      audit_log_feature_change(i, old_feature, new_feature,
+ +                                               old_lock, new_lock, 1);
+ +
+ +              if (new_feature)
+ +                      af.features |= feature;
+ +              else
+ +                      af.features &= ~feature;
+ +              af.lock |= new_lock;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
   {
         u32                     seq;
@@@ -759,7 -659,6 +759,7 @@@
   
         switch (msg_type) {
         case AUDIT_GET:
+ +              memset(&status_set, 0, sizeof(status_set));
                 status_set.enabled       = audit_enabled;
                 status_set.failure       = audit_failure;
                 status_set.pid           = audit_pid;
@@@ -771,7 -670,7 +771,7 @@@
                                  &status_set, sizeof(status_set));
                 break;
         case AUDIT_SET:
- -              if (nlh->nlmsg_len < sizeof(struct audit_status))
+ +              if (nlmsg_len(nlh) < sizeof(struct audit_status))
                         return -EINVAL;
                 status_get   = (struct audit_status *)data;
                 if (status_get->mask & AUDIT_STATUS_ENABLED) {
@@@ -800,16 -699,6 +800,16 @@@
                 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
                         err = audit_set_backlog_limit(status_get->backlog_limit);
                 break;
+ +      case AUDIT_GET_FEATURE:
+ +              err = audit_get_feature(skb);
+ +              if (err)
+ +                      return err;
+ +              break;
+ +      case AUDIT_SET_FEATURE:
+ +              err = audit_set_feature(skb);
+ +              if (err)
+ +                      return err;
+ +              break;
         case AUDIT_USER:
         case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
         case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
@@@ -826,8 -715,7 +826,8 @@@
                         }
                         audit_log_common_recv_msg(&ab, msg_type);
                         if (msg_type != AUDIT_USER_TTY)
- -                              audit_log_format(ab, " msg='%.1024s'",
+ +                              audit_log_format(ab, " msg='%.*s'",
+ +                                               AUDIT_MESSAGE_TEXT_MAX,
                                                  (char *)data);
                         else {
                                 int size;
@@@ -930,7 -818,7 +930,7 @@@
                 struct task_struct *tsk = current;
   
                 spin_lock(&tsk->sighand->siglock);
- -              s.enabled = tsk->signal->audit_tty != 0;
+ +              s.enabled = tsk->signal->audit_tty;
                 s.log_passwd = tsk->signal->audit_tty_log_passwd;
                 spin_unlock(&tsk->sighand->siglock);
   
@@@ -944,7 -832,7 +944,7 @@@
   
                 memset(&s, 0, sizeof(s));
                 /* guard against past and future API changes */
- -              memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+ +              memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
                 if ((s.enabled != 0 && s.enabled != 1) ||
                     (s.log_passwd != 0 && s.log_passwd != 1))
                         return -EINVAL;
@@@ -1179,6 -1067,13 +1179,6 @@@ static void wait_for_auditd(unsigned lo
         remove_wait_queue(&audit_backlog_wait, &wait);
   }
   
- -/* Obtain an audit buffer.  This routine does locking to obtain the
- - * audit buffer, but then no locking is required for calls to
- - * audit_log_*format.  If the tsk is a task that is currently in a
- - * syscall, then the syscall is marked as auditable and an audit record
- - * will be written at syscall exit.  If there is no associated task, tsk
- - * should be NULL. */
- -
   /**
    * audit_log_start - obtain an audit buffer
    * @ctx: audit_context (may be NULL)
@@@ -1222,9 -1117,10 +1222,10 @@@ struct audit_buffer *audit_log_start(st
   
                         sleep_time = timeout_start + audit_backlog_wait_time -
                                         jiffies;
-                       if ((long)sleep_time > 0)
+                       if ((long)sleep_time > 0) {
                                 wait_for_auditd(sleep_time);
-                       continue;
+                               continue;
+                       }
                 }
                 if (audit_rate_check() && printk_ratelimit())
                         printk(KERN_WARNING
@@@ -1493,7 -1389,7 +1494,7 @@@ void audit_log_session_info(struct audi
         u32 sessionid = audit_get_sessionid(current);
         uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
   
- -      audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
+ +      audit_log_format(ab, " auid=%u ses=%u", auid, sessionid);
   }
   
   void audit_log_key(struct audit_buffer *ab, char *key)
@@@ -1640,26 -1536,6 +1641,26 @@@ void audit_log_name(struct audit_contex
                 }
         }
   
+ +      /* log the audit_names record type */
+ +      audit_log_format(ab, " nametype=");
+ +      switch(n->type) {
+ +      case AUDIT_TYPE_NORMAL:
+ +              audit_log_format(ab, "NORMAL");
+ +              break;
+ +      case AUDIT_TYPE_PARENT:
+ +              audit_log_format(ab, "PARENT");
+ +              break;
+ +      case AUDIT_TYPE_CHILD_DELETE:
+ +              audit_log_format(ab, "DELETE");
+ +              break;
+ +      case AUDIT_TYPE_CHILD_CREATE:
+ +              audit_log_format(ab, "CREATE");
+ +              break;
+ +      default:
+ +              audit_log_format(ab, "UNKNOWN");
+ +              break;
+ +      }
+ +
         audit_log_fcaps(ab, n);
         audit_log_end(ab);
   }
author	Eric Paris <eparis@redhat.com>
	Fri, 22 Nov 2013 23:57:08 +0000 (18:57 -0500)
committer	Eric Paris <eparis@redhat.com>
	Fri, 22 Nov 2013 23:57:54 +0000 (18:57 -0500)
		1	2
fs/exec.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/audit.c	patch \|	diff1 \|	diff2 \|	blob \| history