Merge tag 'powerpc-3.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Feb 2015 02:15:38 +0000 (18:15 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Feb 2015 02:15:38 +0000 (18:15 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Feb 2015 02:15:38 +0000 (18:15 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Feb 2015 02:15:38 +0000 (18:15 -0800)
diff --combined Documentation/kernel-parameters.txt

index 512a35929f946a111391a8e638ae0a2e4f03d290,7dedfe56c3f30a7fa77c290759abf675398a63af..a89e326375702dae82244472e9955564e2949060
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -1277,7 -1277,6 +1277,7 @@@ bytes respectively. Such letter suffixe
         i8042.notimeout [HW] Ignore timeout condition signalled by controller
         i8042.reset     [HW] Reset the controller during init and cleanup
         i8042.unlock    [HW] Unlock (ignore) the keylock
+ +      i8042.kbdreset  [HW] Reset device connected to KBD port
   
         i810=           [HW,DRM]
   
@@@ -1470,9 -1469,6 +1470,9 @@@
                        no_hwp
                          Do not enable hardware P state control (HWP)
                          if available.
+ +              hwp_only
+ +                      Only load intel_pstate on systems which support
+ +                      hardware P state control (HWP) if available.
   
         intremap=       [X86-64, Intel-IOMMU]
                         on      enable Interrupt Remapping (default)
@@@ -1497,6 -1493,8 +1497,8 @@@
                 forcesac
                 soft
                 pt              [x86, IA-64]
+               nobypass        [PPC/POWERNV]
+                       Disable IOMMU bypass, using IOMMU for PCI devices.
   
   
         io7=            [HW] IO7 for Marvel based alpha systems
@@@ -3210,18 -3208,6 +3212,18 @@@
   
         retain_initrd   [RAM] Keep initrd memory after extraction
   
+ +      rfkill.default_state=
+ +              0       "airplane mode".  All wifi, bluetooth, wimax, gps, fm,
+ +                      etc. communication is blocked by default.
+ +              1       Unblocked.
+ +
+ +      rfkill.master_switch_mode=
+ +              0       The "airplane mode" button does nothing.
+ +              1       The "airplane mode" button toggles between everything
+ +                      blocked and the previous configuration.
+ +              2       The "airplane mode" button toggles between everything
+ +                      blocked and everything unblocked.
+ +
         rhash_entries=  [KNL,NET]
                         Set number of hash buckets for route cache
   
diff --combined arch/powerpc/include/asm/thread_info.h

index 0be6c681cab1341061c02031464d5355ff8a4d7d,c1efa05613f0d52c313f26489f331c11dafc1877..e8abc83e699fbe32179a2bac70a91c2af9ff4ec5
--- 1/arch/powerpc/include/asm/thread_info.h
--- 2/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@@ -23,9 -23,9 +23,9 @@@
   #define THREAD_SIZE           (1 << THREAD_SHIFT)
   
   #ifdef CONFIG_PPC64
- -#define CURRENT_THREAD_INFO(dest, sp) clrrdi dest, sp, THREAD_SHIFT
+ +#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(clrrdi dest, sp, THREAD_SHIFT)
   #else
- -#define CURRENT_THREAD_INFO(dest, sp) rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT
+ +#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT)
   #endif
   
   #ifndef __ASSEMBLY__
@@@ -71,13 -71,12 +71,13 @@@ struct thread_info 
   #define THREAD_SIZE_ORDER     (THREAD_SHIFT - PAGE_SHIFT)
   
   /* how to get the thread information struct from C */
- -register unsigned long __current_r1 asm("r1");
   static inline struct thread_info *current_thread_info(void)
   {
- -      /* gcc4, at least, is smart enough to turn this into a single
- -       * rlwinm for ppc32 and clrrdi for ppc64 */
- -      return (struct thread_info *)(__current_r1 & ~(THREAD_SIZE-1));
+ +      unsigned long val;
+ +
+ +      asm (CURRENT_THREAD_INFO(%0,1) : "=r" (val));
+ +
+ +      return (struct thread_info *)val;
   }
   
   #endif /* __ASSEMBLY__ */
@@@ -125,7 -124,7 +125,7 @@@
   #define _TIF_SYSCALL_TRACEPOINT       (1<<TIF_SYSCALL_TRACEPOINT)
   #define _TIF_EMULATE_STACK_STORE      (1<<TIF_EMULATE_STACK_STORE)
   #define _TIF_NOHZ             (1<<TIF_NOHZ)
- #define _TIF_SYSCALL_T_OR_A   (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ #define _TIF_SYSCALL_DOTRACE  (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
                                  _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
                                  _TIF_NOHZ)
   
diff --combined arch/powerpc/kernel/smp.c

index 8b2d2dc8ef106ef780c9a145335e9de17b3879a7,1cc4bdce19f3d77ba136264bf2da8027231084d6..6e19afa35a153d2736af94bc845008d724fe735f
--- 1/arch/powerpc/kernel/smp.c
--- 2/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@@ -434,20 -434,6 +434,6 @@@ void generic_cpu_die(unsigned int cpu
         printk(KERN_ERR "CPU%d didn't die...\n", cpu);
   }
   
- void generic_mach_cpu_die(void)
- {
-       unsigned int cpu;
- 
-       local_irq_disable();
-       idle_task_exit();
-       cpu = smp_processor_id();
-       printk(KERN_DEBUG "CPU%d offline\n", cpu);
-       __this_cpu_write(cpu_state, CPU_DEAD);
-       smp_wmb();
-       while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE)
-               cpu_relax();
- }
- 
   void generic_set_cpu_dead(unsigned int cpu)
   {
         per_cpu(cpu_state, cpu) = CPU_DEAD;
@@@ -700,7 -686,6 +686,7 @@@ void start_secondary(void *unused
         smp_store_cpu_info(cpu);
         set_dec(tb_ticks_per_jiffy);
         preempt_disable();
+ +      cpu_callin_map[cpu] = 1;
   
         if (smp_ops->setup_cpu)
                 smp_ops->setup_cpu(cpu);
@@@ -739,6 -724,14 +725,6 @@@
         notify_cpu_starting(cpu);
         set_cpu_online(cpu, true);
   
- -      /*
- -       * CPU must be marked active and online before we signal back to the
- -       * master, because the scheduler needs to see the cpu_online and
- -       * cpu_active bits set.
- -       */
- -      smp_wmb();
- -      cpu_callin_map[cpu] = 1;
- -
         local_irq_enable();
   
         cpu_startup_entry(CPUHP_ONLINE);
diff --combined arch/powerpc/sysdev/fsl_pci.c

index 271b67e7670c71fb6ee14bfa260635cf1d944f62,7cc215e86d8297c0bb5e2c3dcd600bd3bee094f0..4b74c276e427e49ccf7cc748f496a7d6ba650a6e
--- 1/arch/powerpc/sysdev/fsl_pci.c
--- 2/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@@ -68,13 -68,10 +68,10 @@@ static int fsl_pcie_check_link(struct p
         u32 val = 0;
   
         if (hose->indirect_type & PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK) {
-               if (hose->ops->read == fsl_indirect_read_config) {
-                       struct pci_bus bus;
-                       bus.number = hose->first_busno;
-                       bus.sysdata = hose;
-                       bus.ops = hose->ops;
-                       indirect_read_config(&bus, 0, PCIE_LTSSM, 4, &val);
-               } else
+               if (hose->ops->read == fsl_indirect_read_config)
+                       __indirect_read_config(hose, hose->first_busno, 0,
+                                              PCIE_LTSSM, 4, &val);
+               else
                         early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val);
                 if (val < PCIE_LTSSM_L0)
                         return 1;
@@@ -645,21 -642,61 +642,21 @@@ mapped
         return pcie->cfg_type1 + offset;
   }
   
- -static int mpc83xx_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
- -                                  int offset, int len, u32 *val)
- -{
- -      void __iomem *cfg_addr;
- -
- -      cfg_addr = mpc83xx_pcie_remap_cfg(bus, devfn, offset);
- -      if (!cfg_addr)
- -              return PCIBIOS_DEVICE_NOT_FOUND;
- -
- -      switch (len) {
- -      case 1:
- -              *val = in_8(cfg_addr);
- -              break;
- -      case 2:
- -              *val = in_le16(cfg_addr);
- -              break;
- -      default:
- -              *val = in_le32(cfg_addr);
- -              break;
- -      }
- -
- -      return PCIBIOS_SUCCESSFUL;
- -}
- -
   static int mpc83xx_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
                                      int offset, int len, u32 val)
   {
         struct pci_controller *hose = pci_bus_to_host(bus);
- -      void __iomem *cfg_addr;
- -
- -      cfg_addr = mpc83xx_pcie_remap_cfg(bus, devfn, offset);
- -      if (!cfg_addr)
- -              return PCIBIOS_DEVICE_NOT_FOUND;
   
         /* PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS */
         if (offset == PCI_PRIMARY_BUS && bus->number == hose->first_busno)
                 val &= 0xffffff00;
   
- -      switch (len) {
- -      case 1:
- -              out_8(cfg_addr, val);
- -              break;
- -      case 2:
- -              out_le16(cfg_addr, val);
- -              break;
- -      default:
- -              out_le32(cfg_addr, val);
- -              break;
- -      }
- -
- -      return PCIBIOS_SUCCESSFUL;
+ +      return pci_generic_config_write(bus, devfn, offset, len, val);
   }
   
   static struct pci_ops mpc83xx_pcie_ops = {
- -      .read = mpc83xx_pcie_read_config,
+ +      .map_bus = mpc83xx_pcie_remap_cfg,
+ +      .read = pci_generic_config_read,
         .write = mpc83xx_pcie_write_config,
   };
   
diff --combined arch/powerpc/xmon/xmon.c

index 13c6e200b24ec5bc2a7927308eaf579f1a904cd5,e66ace703a69454a611d54e3a9b832449b51cf3a..e599259d84fc83773dcc01f7c81a362fdd777d65
--- 1/arch/powerpc/xmon/xmon.c
--- 2/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@@ -25,6 -25,7 +25,7 @@@
   #include <linux/irq.h>
   #include <linux/bug.h>
   #include <linux/nmi.h>
+ #include <linux/ctype.h>
   
   #include <asm/ptrace.h>
   #include <asm/string.h>
@@@ -183,14 -184,6 +184,6 @@@ extern void xmon_leave(void)
   #define GETWORD(v)    (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
   #endif
   
- #define isxdigit(c)   (('0' <= (c) && (c) <= '9') \
-                        || ('a' <= (c) && (c) <= 'f') \
-                        || ('A' <= (c) && (c) <= 'F'))
- #define isalnum(c)    (('0' <= (c) && (c) <= '9') \
-                        || ('a' <= (c) && (c) <= 'z') \
-                        || ('A' <= (c) && (c) <= 'Z'))
- #define isspace(c)    (c == ' ' || c == '\t' || c == 10 || c == 13 || c == 0)
- 
   static char *help_string = "\
   Commands:\n\
     b   show breakpoints\n\
@@@ -337,7 -330,6 +330,7 @@@ static inline void disable_surveillance
         args.token = rtas_token("set-indicator");
         if (args.token == RTAS_UNKNOWN_SERVICE)
                 return;
+ +      args.token = cpu_to_be32(args.token);
         args.nargs = cpu_to_be32(3);
         args.nret = cpu_to_be32(1);
         args.rets = &args.args[3];
@@@ -2165,9 -2157,6 +2158,6 @@@ static void dump_pacas(void
   }
   #endif
   
- #define isxdigit(c)   (('0' <= (c) && (c) <= '9') \
-                        || ('a' <= (c) && (c) <= 'f') \
-                        || ('A' <= (c) && (c) <= 'F'))
   static void
   dump(void)
   {
@@@ -2570,7 -2559,7 +2560,7 @@@ scanhex(unsigned long *vp
                 int i;
                 for (i=0; i<63; i++) {
                         c = inchar();
-                       if (isspace(c)) {
+                       if (isspace(c) || c == '\0') {
                                 termch = c;
                                 break;
                         }
diff --combined drivers/misc/cxl/file.c

index b15d8113877c9f6ed5c45c58fb012fa6f50276aa,8953de6fde2d10437a594796a1a5ee9727dc3af8..2364bcadb9a94c195abc6398a77c734f45a7e468
--- 1/drivers/misc/cxl/file.c
--- 2/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@@ -23,6 -23,7 +23,7 @@@
   #include <asm/copro.h>
   
   #include "cxl.h"
+ #include "trace.h"
   
   #define CXL_NUM_MINORS 256 /* Total to reserve */
   #define CXL_DEV_MINORS 13   /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */
@@@ -140,20 -141,18 +141,20 @@@ static long afu_ioctl_start_work(struc
   
         pr_devel("%s: pe: %i\n", __func__, ctx->pe);
   
- -      mutex_lock(&ctx->status_mutex);
- -      if (ctx->status != OPENED) {
- -              rc = -EIO;
- -              goto out;
- -      }
- -
+ +      /* Do this outside the status_mutex to avoid a circular dependency with
+ +       * the locking in cxl_mmap_fault() */
         if (copy_from_user(&work, uwork,
                            sizeof(struct cxl_ioctl_start_work))) {
                 rc = -EFAULT;
                 goto out;
         }
   
+ +      mutex_lock(&ctx->status_mutex);
+ +      if (ctx->status != OPENED) {
+ +              rc = -EIO;
+ +              goto out;
+ +      }
+ +
         /*
          * if any of the reserved fields are set or any of the unused
          * flags are set it's invalid
@@@ -186,9 -185,13 +187,13 @@@
          */
         ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID));
   
+       trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
+ 
         if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor,
-                                    amr)))
+                                    amr))) {
+               afu_release_irqs(ctx);
                 goto out;
+       }
   
         ctx->status = STARTED;
         rc = 0;
diff --combined include/linux/perf_event.h

index 5cad0e6f35524b454ec691e1787848d322893b3b,1d3631448b910e945e9658ebfebc6fd3e998744a..2cdc9d422bed9245bef3e0e62a007efad80be6c1
--- 1/include/linux/perf_event.h
--- 2/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@@ -79,6 -79,11 +79,6 @@@ struct perf_branch_stack 
         struct perf_branch_entry        entries[0];
   };
   
- -struct perf_regs {
- -      __u64           abi;
- -      struct pt_regs  *regs;
- -};
- -
   struct task_struct;
   
   /*
@@@ -450,6 -455,11 +450,6 @@@ struct perf_event 
   #endif /* CONFIG_PERF_EVENTS */
   };
   
- -enum perf_event_context_type {
- -      task_context,
- -      cpu_context,
- -};
- -
   /**
    * struct perf_event_context - event context structure
    *
@@@ -457,6 -467,7 +457,6 @@@
    */
   struct perf_event_context {
         struct pmu                      *pmu;
- -      enum perf_event_context_type    type;
         /*
          * Protect the states of the events in the list,
          * nr_active, and the list:
@@@ -469,7 -480,6 +469,7 @@@
          */
         struct mutex                    mutex;
   
+ +      struct list_head                active_ctx_list;
         struct list_head                pinned_groups;
         struct list_head                flexible_groups;
         struct list_head                event_list;
@@@ -520,6 -530,7 +520,6 @@@ struct perf_cpu_context 
         int                             exclusive;
         struct hrtimer                  hrtimer;
         ktime_t                         hrtimer_interval;
- -      struct list_head                rotation_list;
         struct pmu                      *unique_pmu;
         struct perf_cgroup              *cgrp;
   };
@@@ -599,14 -610,7 +599,14 @@@ struct perf_sample_data 
                 u32     reserved;
         }                               cpu_entry;
         struct perf_callchain_entry     *callchain;
+ +
+ +      /*
+ +       * regs_user may point to task_pt_regs or to regs_user_copy, depending
+ +       * on arch details.
+ +       */
         struct perf_regs                regs_user;
+ +      struct pt_regs                  regs_user_copy;
+ +
         struct perf_regs                regs_intr;
         u64                             stack_user_size;
   } ____cacheline_aligned;
@@@ -659,7 -663,6 +659,7 @@@ static inline int is_software_event(str
   
   extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
   
+ +extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
   extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
   
   #ifndef perf_arch_fetch_caller_regs
@@@ -684,25 -687,14 +684,25 @@@ static inline void perf_fetch_caller_re
   static __always_inline void
   perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
   {
- -      struct pt_regs hot_regs;
+ +      if (static_key_false(&perf_swevent_enabled[event_id]))
+ +              __perf_sw_event(event_id, nr, regs, addr);
+ +}
+ +
+ +DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
   
+ +/*
+ + * 'Special' version for the scheduler, it hard assumes no recursion,
+ + * which is guaranteed by us not actually scheduling inside other swevents
+ + * because those disable preemption.
+ + */
+ +static __always_inline void
+ +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
+ +{
         if (static_key_false(&perf_swevent_enabled[event_id])) {
- -              if (!regs) {
- -                      perf_fetch_caller_regs(&hot_regs);
- -                      regs = &hot_regs;
- -              }
- -              __perf_sw_event(event_id, nr, regs, addr);
+ +              struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+ +
+ +              perf_fetch_caller_regs(regs);
+ +              ___perf_sw_event(event_id, nr, regs, addr);
         }
   }
   
@@@ -718,7 -710,7 +718,7 @@@ static inline void perf_event_task_sche
   static inline void perf_event_task_sched_out(struct task_struct *prev,
                                              struct task_struct *next)
   {
- -      perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
+ +      perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
   
         if (static_key_false(&perf_sched_events.key))
                 __perf_event_task_sched_out(prev, next);
@@@ -829,8 -821,6 +829,8 @@@ static inline int perf_event_refresh(st
   static inline void
   perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)   { }
   static inline void
+ +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)                   { }
+ +static inline void
   perf_bp_event(struct perf_event *event, void *data)                   { }
   
   static inline int perf_register_guest_info_callbacks
@@@ -907,12 -897,22 +907,22 @@@ struct perf_pmu_events_attr 
         const char *event_str;
   };
   
+ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
+                             char *page);
+ 
   #define PMU_EVENT_ATTR(_name, _var, _id, _show)                               \
   static struct perf_pmu_events_attr _var = {                           \
         .attr = __ATTR(_name, 0444, _show, NULL),                       \
         .id   =  _id,                                                   \
   };
   
+ #define PMU_EVENT_ATTR_STRING(_name, _var, _str)                          \
+ static struct perf_pmu_events_attr _var = {                               \
+       .attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
+       .id             = 0,                                                \
+       .event_str      = _str,                                             \
+ };
+ 
   #define PMU_FORMAT_ATTR(_name, _format)                                       \
   static ssize_t                                                                \
   _name##_show(struct device *dev,                                      \
diff --combined kernel/events/core.c

index 7f2fbb8b5069b3258bdd9721c60b850f965953d1,934687f8d51b53fa19c6222398ecce63f9202534..8812d8e35f5b03b13e148ff67ae33453cad306c1
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -872,32 -872,22 +872,32 @@@ void perf_pmu_enable(struct pmu *pmu
                 pmu->pmu_enable(pmu);
   }
   
- -static DEFINE_PER_CPU(struct list_head, rotation_list);
+ +static DEFINE_PER_CPU(struct list_head, active_ctx_list);
   
   /*
- - * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
- - * because they're strictly cpu affine and rotate_start is called with IRQs
- - * disabled, while rotate_context is called from IRQ context.
+ + * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and
+ + * perf_event_task_tick() are fully serialized because they're strictly cpu
+ + * affine and perf_event_ctx{activate,deactivate} are called with IRQs
+ + * disabled, while perf_event_task_tick is called from IRQ context.
    */
- -static void perf_pmu_rotate_start(struct pmu *pmu)
+ +static void perf_event_ctx_activate(struct perf_event_context *ctx)
   {
- -      struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- -      struct list_head *head = this_cpu_ptr(&rotation_list);
+ +      struct list_head *head = this_cpu_ptr(&active_ctx_list);
   
         WARN_ON(!irqs_disabled());
   
- -      if (list_empty(&cpuctx->rotation_list))
- -              list_add(&cpuctx->rotation_list, head);
+ +      WARN_ON(!list_empty(&ctx->active_ctx_list));
+ +
+ +      list_add(&ctx->active_ctx_list, head);
+ +}
+ +
+ +static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
+ +{
+ +      WARN_ON(!irqs_disabled());
+ +
+ +      WARN_ON(list_empty(&ctx->active_ctx_list));
+ +
+ +      list_del_init(&ctx->active_ctx_list);
   }
   
   static void get_ctx(struct perf_event_context *ctx)
@@@ -916,84 -906,6 +916,84 @@@ static void put_ctx(struct perf_event_c
         }
   }
   
+ +/*
+ + * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
+ + * perf_pmu_migrate_context() we need some magic.
+ + *
+ + * Those places that change perf_event::ctx will hold both
+ + * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
+ + *
+ + * Lock ordering is by mutex address. There is one other site where
+ + * perf_event_context::mutex nests and that is put_event(). But remember that
+ + * that is a parent<->child context relation, and migration does not affect
+ + * children, therefore these two orderings should not interact.
+ + *
+ + * The change in perf_event::ctx does not affect children (as claimed above)
+ + * because the sys_perf_event_open() case will install a new event and break
+ + * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
+ + * concerned with cpuctx and that doesn't have children.
+ + *
+ + * The places that change perf_event::ctx will issue:
+ + *
+ + *   perf_remove_from_context();
+ + *   synchronize_rcu();
+ + *   perf_install_in_context();
+ + *
+ + * to affect the change. The remove_from_context() + synchronize_rcu() should
+ + * quiesce the event, after which we can install it in the new location. This
+ + * means that only external vectors (perf_fops, prctl) can perturb the event
+ + * while in transit. Therefore all such accessors should also acquire
+ + * perf_event_context::mutex to serialize against this.
+ + *
+ + * However; because event->ctx can change while we're waiting to acquire
+ + * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
+ + * function.
+ + *
+ + * Lock order:
+ + *    task_struct::perf_event_mutex
+ + *      perf_event_context::mutex
+ + *        perf_event_context::lock
+ + *        perf_event::child_mutex;
+ + *        perf_event::mmap_mutex
+ + *        mmap_sem
+ + */
+ +static struct perf_event_context *
+ +perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
+ +{
+ +      struct perf_event_context *ctx;
+ +
+ +again:
+ +      rcu_read_lock();
+ +      ctx = ACCESS_ONCE(event->ctx);
+ +      if (!atomic_inc_not_zero(&ctx->refcount)) {
+ +              rcu_read_unlock();
+ +              goto again;
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      mutex_lock_nested(&ctx->mutex, nesting);
+ +      if (event->ctx != ctx) {
+ +              mutex_unlock(&ctx->mutex);
+ +              put_ctx(ctx);
+ +              goto again;
+ +      }
+ +
+ +      return ctx;
+ +}
+ +
+ +static inline struct perf_event_context *
+ +perf_event_ctx_lock(struct perf_event *event)
+ +{
+ +      return perf_event_ctx_lock_nested(event, 0);
+ +}
+ +
+ +static void perf_event_ctx_unlock(struct perf_event *event,
+ +                                struct perf_event_context *ctx)
+ +{
+ +      mutex_unlock(&ctx->mutex);
+ +      put_ctx(ctx);
+ +}
+ +
   /*
    * This must be done under the ctx->lock, such as to serialize against
    * context_equiv(), therefore we cannot call put_ctx() since that might end up
@@@ -1243,6 -1155,8 +1243,6 @@@ list_add_event(struct perf_event *event
                 ctx->nr_branch_stack++;
   
         list_add_rcu(&event->event_entry, &ctx->event_list);
- -      if (!ctx->nr_events)
- -              perf_pmu_rotate_start(ctx->pmu);
         ctx->nr_events++;
         if (event->attr.inherit_stat)
                 ctx->nr_stat++;
@@@ -1361,8 -1275,6 +1361,8 @@@ static void perf_group_attach(struct pe
         if (group_leader == event)
                 return;
   
+ +      WARN_ON_ONCE(group_leader->ctx != event->ctx);
+ +
         if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
                         !is_software_event(event))
                 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
@@@ -1384,10 -1296,6 +1384,10 @@@ static voi
   list_del_event(struct perf_event *event, struct perf_event_context *ctx)
   {
         struct perf_cpu_context *cpuctx;
+ +
+ +      WARN_ON_ONCE(event->ctx != ctx);
+ +      lockdep_assert_held(&ctx->lock);
+ +
         /*
          * We can have double detach due to exit/hot-unplug + close.
          */
@@@ -1472,8 -1380,6 +1472,8 @@@ static void perf_group_detach(struct pe
   
                 /* Inherit group flags from the previous leader */
                 sibling->group_flags = event->group_flags;
+ +
+ +              WARN_ON_ONCE(sibling->ctx != event->ctx);
         }
   
   out:
@@@ -1536,10 -1442,6 +1536,10 @@@ event_sched_out(struct perf_event *even
   {
         u64 tstamp = perf_event_time(event);
         u64 delta;
+ +
+ +      WARN_ON_ONCE(event->ctx != ctx);
+ +      lockdep_assert_held(&ctx->lock);
+ +
         /*
          * An event which could not be activated because of
          * filter mismatch still needs to have its timings
@@@ -1569,8 -1471,7 +1569,8 @@@
   
         if (!is_software_event(event))
                 cpuctx->active_oncpu--;
- -      ctx->nr_active--;
+ +      if (!--ctx->nr_active)
+ +              perf_event_ctx_deactivate(ctx);
         if (event->attr.freq && event->attr.sample_freq)
                 ctx->nr_freq--;
         if (event->attr.exclusive || !cpuctx->active_oncpu)
@@@ -1753,7 -1654,7 +1753,7 @@@ int __perf_event_disable(void *info
    * is the current context on this CPU and preemption is disabled,
    * hence we can't get into perf_event_task_sched_out for this context.
    */
- -void perf_event_disable(struct perf_event *event)
+ +static void _perf_event_disable(struct perf_event *event)
   {
         struct perf_event_context *ctx = event->ctx;
         struct task_struct *task = ctx->task;
@@@ -1794,19 -1695,6 +1794,19 @@@ retry
         }
         raw_spin_unlock_irq(&ctx->lock);
   }
+ +
+ +/*
+ + * Strictly speaking kernel users cannot create groups and therefore this
+ + * interface does not need the perf_event_ctx_lock() magic.
+ + */
+ +void perf_event_disable(struct perf_event *event)
+ +{
+ +      struct perf_event_context *ctx;
+ +
+ +      ctx = perf_event_ctx_lock(event);
+ +      _perf_event_disable(event);
+ +      perf_event_ctx_unlock(event, ctx);
+ +}
   EXPORT_SYMBOL_GPL(perf_event_disable);
   
   static void perf_set_shadow_time(struct perf_event *event,
@@@ -1894,8 -1782,7 +1894,8 @@@ event_sched_in(struct perf_event *event
   
         if (!is_software_event(event))
                 cpuctx->active_oncpu++;
- -      ctx->nr_active++;
+ +      if (!ctx->nr_active++)
+ +              perf_event_ctx_activate(ctx);
         if (event->attr.freq && event->attr.sample_freq)
                 ctx->nr_freq++;
   
@@@ -2271,7 -2158,7 +2271,7 @@@ unlock
    * perf_event_for_each_child or perf_event_for_each as described
    * for perf_event_disable.
    */
- -void perf_event_enable(struct perf_event *event)
+ +static void _perf_event_enable(struct perf_event *event)
   {
         struct perf_event_context *ctx = event->ctx;
         struct task_struct *task = ctx->task;
@@@ -2327,21 -2214,9 +2327,21 @@@ retry
   out:
         raw_spin_unlock_irq(&ctx->lock);
   }
+ +
+ +/*
+ + * See perf_event_disable();
+ + */
+ +void perf_event_enable(struct perf_event *event)
+ +{
+ +      struct perf_event_context *ctx;
+ +
+ +      ctx = perf_event_ctx_lock(event);
+ +      _perf_event_enable(event);
+ +      perf_event_ctx_unlock(event, ctx);
+ +}
   EXPORT_SYMBOL_GPL(perf_event_enable);
   
- -int perf_event_refresh(struct perf_event *event, int refresh)
+ +static int _perf_event_refresh(struct perf_event *event, int refresh)
   {
         /*
          * not supported on inherited events
@@@ -2350,25 -2225,10 +2350,25 @@@
                 return -EINVAL;
   
         atomic_add(refresh, &event->event_limit);
- -      perf_event_enable(event);
+ +      _perf_event_enable(event);
   
         return 0;
   }
+ +
+ +/*
+ + * See perf_event_disable()
+ + */
+ +int perf_event_refresh(struct perf_event *event, int refresh)
+ +{
+ +      struct perf_event_context *ctx;
+ +      int ret;
+ +
+ +      ctx = perf_event_ctx_lock(event);
+ +      ret = _perf_event_refresh(event, refresh);
+ +      perf_event_ctx_unlock(event, ctx);
+ +
+ +      return ret;
+ +}
   EXPORT_SYMBOL_GPL(perf_event_refresh);
   
   static void ctx_sched_out(struct perf_event_context *ctx,
@@@ -2752,6 -2612,12 +2752,6 @@@ static void perf_event_context_sched_in
   
         perf_pmu_enable(ctx->pmu);
         perf_ctx_unlock(cpuctx, ctx);
- -
- -      /*
- -       * Since these rotations are per-cpu, we need to ensure the
- -       * cpu-context we got scheduled on is actually rotating.
- -       */
- -      perf_pmu_rotate_start(ctx->pmu);
   }
   
   /*
@@@ -3039,18 -2905,25 +3039,18 @@@ static void rotate_ctx(struct perf_even
                 list_rotate_left(&ctx->flexible_groups);
   }
   
- -/*
- - * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
- - * because they're strictly cpu affine and rotate_start is called with IRQs
- - * disabled, while rotate_context is called from IRQ context.
- - */
   static int perf_rotate_context(struct perf_cpu_context *cpuctx)
   {
         struct perf_event_context *ctx = NULL;
- -      int rotate = 0, remove = 1;
+ +      int rotate = 0;
   
         if (cpuctx->ctx.nr_events) {
- -              remove = 0;
                 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
                         rotate = 1;
         }
   
         ctx = cpuctx->task_ctx;
         if (ctx && ctx->nr_events) {
- -              remove = 0;
                 if (ctx->nr_events != ctx->nr_active)
                         rotate = 1;
         }
@@@ -3074,6 -2947,8 +3074,6 @@@
         perf_pmu_enable(cpuctx->ctx.pmu);
         perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
   done:
- -      if (remove)
- -              list_del_init(&cpuctx->rotation_list);
   
         return rotate;
   }
@@@ -3091,8 -2966,9 +3091,8 @@@ bool perf_event_can_stop_tick(void
   
   void perf_event_task_tick(void)
   {
- -      struct list_head *head = this_cpu_ptr(&rotation_list);
- -      struct perf_cpu_context *cpuctx, *tmp;
- -      struct perf_event_context *ctx;
+ +      struct list_head *head = this_cpu_ptr(&active_ctx_list);
+ +      struct perf_event_context *ctx, *tmp;
         int throttled;
   
         WARN_ON(!irqs_disabled());
@@@ -3100,8 -2976,14 +3100,8 @@@
         __this_cpu_inc(perf_throttled_seq);
         throttled = __this_cpu_xchg(perf_throttled_count, 0);
   
- -      list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
- -              ctx = &cpuctx->ctx;
+ +      list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
                 perf_adjust_freq_unthr_context(ctx, throttled);
- -
- -              ctx = cpuctx->task_ctx;
- -              if (ctx)
- -                      perf_adjust_freq_unthr_context(ctx, throttled);
- -      }
   }
   
   static int event_enable_on_exec(struct perf_event *event,
@@@ -3260,7 -3142,6 +3260,7 @@@ static void __perf_event_init_context(s
   {
         raw_spin_lock_init(&ctx->lock);
         mutex_init(&ctx->mutex);
+ +      INIT_LIST_HEAD(&ctx->active_ctx_list);
         INIT_LIST_HEAD(&ctx->pinned_groups);
         INIT_LIST_HEAD(&ctx->flexible_groups);
         INIT_LIST_HEAD(&ctx->event_list);
@@@ -3540,16 -3421,7 +3540,16 @@@ static void perf_remove_from_owner(stru
         rcu_read_unlock();
   
         if (owner) {
- -              mutex_lock(&owner->perf_event_mutex);
+ +              /*
+ +               * If we're here through perf_event_exit_task() we're already
+ +               * holding ctx->mutex which would be an inversion wrt. the
+ +               * normal lock order.
+ +               *
+ +               * However we can safely take this lock because its the child
+ +               * ctx->mutex.
+ +               */
+ +              mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
+ +
                 /*
                  * We have to re-check the event->owner field, if it is cleared
                  * we raced with perf_event_exit_task(), acquiring the mutex
@@@ -3568,7 -3440,7 +3568,7 @@@
    */
   static void put_event(struct perf_event *event)
   {
- -      struct perf_event_context *ctx = event->ctx;
+ +      struct perf_event_context *ctx;
   
         if (!atomic_long_dec_and_test(&event->refcount))
                 return;
@@@ -3576,6 -3448,7 +3576,6 @@@
         if (!is_kernel_event(event))
                 perf_remove_from_owner(event);
   
- -      WARN_ON_ONCE(ctx->parent_ctx);
         /*
          * There are two ways this annotation is useful:
          *
@@@ -3588,8 -3461,7 +3588,8 @@@
          *     the last filedesc died, so there is no possibility
          *     to trigger the AB-BA case.
          */
- -      mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
+ +      ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
+ +      WARN_ON_ONCE(ctx->parent_ctx);
         perf_remove_from_context(event, true);
         mutex_unlock(&ctx->mutex);
   
@@@ -3675,13 -3547,12 +3675,13 @@@ static int perf_event_read_group(struc
                                    u64 read_format, char __user *buf)
   {
         struct perf_event *leader = event->group_leader, *sub;
- -      int n = 0, size = 0, ret = -EFAULT;
         struct perf_event_context *ctx = leader->ctx;
- -      u64 values[5];
+ +      int n = 0, size = 0, ret;
         u64 count, enabled, running;
+ +      u64 values[5];
+ +
+ +      lockdep_assert_held(&ctx->mutex);
   
- -      mutex_lock(&ctx->mutex);
         count = perf_event_read_value(leader, &enabled, &running);
   
         values[n++] = 1 + leader->nr_siblings;
@@@ -3696,7 -3567,7 +3696,7 @@@
         size = n * sizeof(u64);
   
         if (copy_to_user(buf, values, size))
- -              goto unlock;
+ +              return -EFAULT;
   
         ret = size;
   
@@@ -3710,11 -3581,14 +3710,11 @@@
                 size = n * sizeof(u64);
   
                 if (copy_to_user(buf + ret, values, size)) {
- -                      ret = -EFAULT;
- -                      goto unlock;
+ +                      return -EFAULT;
                 }
   
                 ret += size;
         }
- -unlock:
- -      mutex_unlock(&ctx->mutex);
   
         return ret;
   }
@@@ -3786,14 -3660,8 +3786,14 @@@ static ssize_
   perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
   {
         struct perf_event *event = file->private_data;
+ +      struct perf_event_context *ctx;
+ +      int ret;
   
- -      return perf_read_hw(event, buf, count);
+ +      ctx = perf_event_ctx_lock(event);
+ +      ret = perf_read_hw(event, buf, count);
+ +      perf_event_ctx_unlock(event, ctx);
+ +
+ +      return ret;
   }
   
   static unsigned int perf_poll(struct file *file, poll_table *wait)
@@@ -3819,7 -3687,7 +3819,7 @@@
         return events;
   }
   
- -static void perf_event_reset(struct perf_event *event)
+ +static void _perf_event_reset(struct perf_event *event)
   {
         (void)perf_event_read(event);
         local64_set(&event->count, 0);
@@@ -3838,7 -3706,6 +3838,7 @@@ static void perf_event_for_each_child(s
         struct perf_event *child;
   
         WARN_ON_ONCE(event->ctx->parent_ctx);
+ +
         mutex_lock(&event->child_mutex);
         func(event);
         list_for_each_entry(child, &event->child_list, child_list)
@@@ -3852,13 -3719,14 +3852,13 @@@ static void perf_event_for_each(struct 
         struct perf_event_context *ctx = event->ctx;
         struct perf_event *sibling;
   
- -      WARN_ON_ONCE(ctx->parent_ctx);
- -      mutex_lock(&ctx->mutex);
+ +      lockdep_assert_held(&ctx->mutex);
+ +
         event = event->group_leader;
   
         perf_event_for_each_child(event, func);
         list_for_each_entry(sibling, &event->sibling_list, group_entry)
                 perf_event_for_each_child(sibling, func);
- -      mutex_unlock(&ctx->mutex);
   }
   
   static int perf_event_period(struct perf_event *event, u64 __user *arg)
@@@ -3928,24 -3796,25 +3928,24 @@@ static int perf_event_set_output(struc
                                  struct perf_event *output_event);
   static int perf_event_set_filter(struct perf_event *event, void __user *arg);
   
- -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ +static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
   {
- -      struct perf_event *event = file->private_data;
         void (*func)(struct perf_event *);
         u32 flags = arg;
   
         switch (cmd) {
         case PERF_EVENT_IOC_ENABLE:
- -              func = perf_event_enable;
+ +              func = _perf_event_enable;
                 break;
         case PERF_EVENT_IOC_DISABLE:
- -              func = perf_event_disable;
+ +              func = _perf_event_disable;
                 break;
         case PERF_EVENT_IOC_RESET:
- -              func = perf_event_reset;
+ +              func = _perf_event_reset;
                 break;
   
         case PERF_EVENT_IOC_REFRESH:
- -              return perf_event_refresh(event, arg);
+ +              return _perf_event_refresh(event, arg);
   
         case PERF_EVENT_IOC_PERIOD:
                 return perf_event_period(event, (u64 __user *)arg);
@@@ -3992,19 -3861,6 +3992,19 @@@
         return 0;
   }
   
+ +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ +{
+ +      struct perf_event *event = file->private_data;
+ +      struct perf_event_context *ctx;
+ +      long ret;
+ +
+ +      ctx = perf_event_ctx_lock(event);
+ +      ret = _perf_ioctl(event, cmd, arg);
+ +      perf_event_ctx_unlock(event, ctx);
+ +
+ +      return ret;
+ +}
+ +
   #ifdef CONFIG_COMPAT
   static long perf_compat_ioctl(struct file *file, unsigned int cmd,
                                 unsigned long arg)
@@@ -4027,15 -3883,11 +4027,15 @@@
   
   int perf_event_task_enable(void)
   {
+ +      struct perf_event_context *ctx;
         struct perf_event *event;
   
         mutex_lock(&current->perf_event_mutex);
- -      list_for_each_entry(event, &current->perf_event_list, owner_entry)
- -              perf_event_for_each_child(event, perf_event_enable);
+ +      list_for_each_entry(event, &current->perf_event_list, owner_entry) {
+ +              ctx = perf_event_ctx_lock(event);
+ +              perf_event_for_each_child(event, _perf_event_enable);
+ +              perf_event_ctx_unlock(event, ctx);
+ +      }
         mutex_unlock(&current->perf_event_mutex);
   
         return 0;
@@@ -4043,15 -3895,11 +4043,15 @@@
   
   int perf_event_task_disable(void)
   {
+ +      struct perf_event_context *ctx;
         struct perf_event *event;
   
         mutex_lock(&current->perf_event_mutex);
- -      list_for_each_entry(event, &current->perf_event_list, owner_entry)
- -              perf_event_for_each_child(event, perf_event_disable);
+ +      list_for_each_entry(event, &current->perf_event_list, owner_entry) {
+ +              ctx = perf_event_ctx_lock(event);
+ +              perf_event_for_each_child(event, _perf_event_disable);
+ +              perf_event_ctx_unlock(event, ctx);
+ +      }
         mutex_unlock(&current->perf_event_mutex);
   
         return 0;
@@@ -4613,14 -4461,18 +4613,14 @@@ perf_output_sample_regs(struct perf_out
   }
   
   static void perf_sample_regs_user(struct perf_regs *regs_user,
- -                                struct pt_regs *regs)
+ +                                struct pt_regs *regs,
+ +                                struct pt_regs *regs_user_copy)
   {
- -      if (!user_mode(regs)) {
- -              if (current->mm)
- -                      regs = task_pt_regs(current);
- -              else
- -                      regs = NULL;
- -      }
- -
- -      if (regs) {
- -              regs_user->abi  = perf_reg_abi(current);
+ +      if (user_mode(regs)) {
+ +              regs_user->abi = perf_reg_abi(current);
                 regs_user->regs = regs;
+ +      } else if (current->mm) {
+ +              perf_get_regs_user(regs_user, regs, regs_user_copy);
         } else {
                 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
                 regs_user->regs = NULL;
@@@ -5099,8 -4951,7 +5099,8 @@@ void perf_prepare_sample(struct perf_ev
         }
   
         if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
- -              perf_sample_regs_user(&data->regs_user, regs);
+ +              perf_sample_regs_user(&data->regs_user, regs,
+ +                                    &data->regs_user_copy);
   
         if (sample_type & PERF_SAMPLE_REGS_USER) {
                 /* regs dump ABI info */
@@@ -6041,8 -5892,6 +6041,8 @@@ end
         rcu_read_unlock();
   }
   
+ +DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
+ +
   int perf_swevent_get_recursion_context(void)
   {
         struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
@@@ -6058,30 -5907,21 +6058,30 @@@ inline void perf_swevent_put_recursion_
         put_recursion_context(swhash->recursion, rctx);
   }
   
- -void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
+ +void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
   {
         struct perf_sample_data data;
- -      int rctx;
   
- -      preempt_disable_notrace();
- -      rctx = perf_swevent_get_recursion_context();
- -      if (rctx < 0)
+ +      if (WARN_ON_ONCE(!regs))
                 return;
   
         perf_sample_data_init(&data, addr, 0);
- -
         do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
+ +}
+ +
+ +void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
+ +{
+ +      int rctx;
+ +
+ +      preempt_disable_notrace();
+ +      rctx = perf_swevent_get_recursion_context();
+ +      if (unlikely(rctx < 0))
+ +              goto fail;
+ +
+ +      ___perf_sw_event(event_id, nr, regs, addr);
   
         perf_swevent_put_recursion_context(rctx);
+ +fail:
         preempt_enable_notrace();
   }
   
@@@ -6939,10 -6779,12 +6939,10 @@@ skip_type
                 __perf_event_init_context(&cpuctx->ctx);
                 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
                 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
- -              cpuctx->ctx.type = cpu_context;
                 cpuctx->ctx.pmu = pmu;
   
                 __perf_cpu_hrtimer_init(cpuctx, cpu);
   
- -              INIT_LIST_HEAD(&cpuctx->rotation_list);
                 cpuctx->unique_pmu = pmu;
         }
   
@@@ -7015,20 -6857,6 +7015,20 @@@ void perf_pmu_unregister(struct pmu *pm
   }
   EXPORT_SYMBOL_GPL(perf_pmu_unregister);
   
+ +static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
+ +{
+ +      int ret;
+ +
+ +      if (!try_module_get(pmu->module))
+ +              return -ENODEV;
+ +      event->pmu = pmu;
+ +      ret = pmu->event_init(event);
+ +      if (ret)
+ +              module_put(pmu->module);
+ +
+ +      return ret;
+ +}
+ +
   struct pmu *perf_init_event(struct perf_event *event)
   {
         struct pmu *pmu = NULL;
@@@ -7041,14 -6869,24 +7041,14 @@@
         pmu = idr_find(&pmu_idr, event->attr.type);
         rcu_read_unlock();
         if (pmu) {
- -              if (!try_module_get(pmu->module)) {
- -                      pmu = ERR_PTR(-ENODEV);
- -                      goto unlock;
- -              }
- -              event->pmu = pmu;
- -              ret = pmu->event_init(event);
+ +              ret = perf_try_init_event(pmu, event);
                 if (ret)
                         pmu = ERR_PTR(ret);
                 goto unlock;
         }
   
         list_for_each_entry_rcu(pmu, &pmus, entry) {
- -              if (!try_module_get(pmu->module)) {
- -                      pmu = ERR_PTR(-ENODEV);
- -                      goto unlock;
- -              }
- -              event->pmu = pmu;
- -              ret = pmu->event_init(event);
+ +              ret = perf_try_init_event(pmu, event);
                 if (!ret)
                         goto unlock;
   
@@@ -7412,15 -7250,6 +7412,15 @@@ out
         return ret;
   }
   
+ +static void mutex_lock_double(struct mutex *a, struct mutex *b)
+ +{
+ +      if (b < a)
+ +              swap(a, b);
+ +
+ +      mutex_lock(a);
+ +      mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+ +}
+ +
   /**
    * sys_perf_event_open - open a performance event, associate it to a task/cpu
    *
@@@ -7436,7 -7265,7 +7436,7 @@@ SYSCALL_DEFINE5(perf_event_open
         struct perf_event *group_leader = NULL, *output_event = NULL;
         struct perf_event *event, *sibling;
         struct perf_event_attr attr;
- -      struct perf_event_context *ctx;
+ +      struct perf_event_context *ctx, *uninitialized_var(gctx);
         struct file *event_file = NULL;
         struct fd group = {NULL, 0};
         struct task_struct *task = NULL;
@@@ -7594,19 -7423,7 +7594,19 @@@
                  * task or CPU context:
                  */
                 if (move_group) {
- -                      if (group_leader->ctx->type != ctx->type)
+ +                      /*
+ +                       * Make sure we're both on the same task, or both
+ +                       * per-cpu events.
+ +                       */
+ +                      if (group_leader->ctx->task != ctx->task)
+ +                              goto err_context;
+ +
+ +                      /*
+ +                       * Make sure we're both events for the same CPU;
+ +                       * grouping events for different CPUs is broken; since
+ +                       * you can never concurrently schedule them anyhow.
+ +                       */
+ +                      if (group_leader->cpu != event->cpu)
                                 goto err_context;
                 } else {
                         if (group_leader->ctx != ctx)
@@@ -7634,68 -7451,43 +7634,68 @@@
         }
   
         if (move_group) {
- -              struct perf_event_context *gctx = group_leader->ctx;
- -
- -              mutex_lock(&gctx->mutex);
- -              perf_remove_from_context(group_leader, false);
+ +              gctx = group_leader->ctx;
   
                 /*
- -               * Removing from the context ends up with disabled
- -               * event. What we want here is event in the initial
- -               * startup state, ready to be add into new context.
+ +               * See perf_event_ctx_lock() for comments on the details
+ +               * of swizzling perf_event::ctx.
                  */
- -              perf_event__state_init(group_leader);
+ +              mutex_lock_double(&gctx->mutex, &ctx->mutex);
+ +
+ +              perf_remove_from_context(group_leader, false);
+ +
                 list_for_each_entry(sibling, &group_leader->sibling_list,
                                     group_entry) {
                         perf_remove_from_context(sibling, false);
- -                      perf_event__state_init(sibling);
                         put_ctx(gctx);
                 }
- -              mutex_unlock(&gctx->mutex);
- -              put_ctx(gctx);
+ +      } else {
+ +              mutex_lock(&ctx->mutex);
         }
   
         WARN_ON_ONCE(ctx->parent_ctx);
- -      mutex_lock(&ctx->mutex);
   
         if (move_group) {
+ +              /*
+ +               * Wait for everybody to stop referencing the events through
+ +               * the old lists, before installing it on new lists.
+ +               */
                 synchronize_rcu();
- -              perf_install_in_context(ctx, group_leader, group_leader->cpu);
- -              get_ctx(ctx);
+ +
+ +              /*
+ +               * Install the group siblings before the group leader.
+ +               *
+ +               * Because a group leader will try and install the entire group
+ +               * (through the sibling list, which is still in-tact), we can
+ +               * end up with siblings installed in the wrong context.
+ +               *
+ +               * By installing siblings first we NO-OP because they're not
+ +               * reachable through the group lists.
+ +               */
                 list_for_each_entry(sibling, &group_leader->sibling_list,
                                     group_entry) {
+ +                      perf_event__state_init(sibling);
                         perf_install_in_context(ctx, sibling, sibling->cpu);
                         get_ctx(ctx);
                 }
+ +
+ +              /*
+ +               * Removing from the context ends up with disabled
+ +               * event. What we want here is event in the initial
+ +               * startup state, ready to be add into new context.
+ +               */
+ +              perf_event__state_init(group_leader);
+ +              perf_install_in_context(ctx, group_leader, group_leader->cpu);
+ +              get_ctx(ctx);
         }
   
         perf_install_in_context(ctx, event, event->cpu);
         perf_unpin_context(ctx);
+ +
+ +      if (move_group) {
+ +              mutex_unlock(&gctx->mutex);
+ +              put_ctx(gctx);
+ +      }
         mutex_unlock(&ctx->mutex);
   
         put_online_cpus();
@@@ -7803,11 -7595,7 +7803,11 @@@ void perf_pmu_migrate_context(struct pm
         src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
         dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
   
- -      mutex_lock(&src_ctx->mutex);
+ +      /*
+ +       * See perf_event_ctx_lock() for comments on the details
+ +       * of swizzling perf_event::ctx.
+ +       */
+ +      mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
         list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
                                  event_entry) {
                 perf_remove_from_context(event, false);
@@@ -7815,36 -7603,11 +7815,36 @@@
                 put_ctx(src_ctx);
                 list_add(&event->migrate_entry, &events);
         }
- -      mutex_unlock(&src_ctx->mutex);
   
+ +      /*
+ +       * Wait for the events to quiesce before re-instating them.
+ +       */
         synchronize_rcu();
   
- -      mutex_lock(&dst_ctx->mutex);
+ +      /*
+ +       * Re-instate events in 2 passes.
+ +       *
+ +       * Skip over group leaders and only install siblings on this first
+ +       * pass, siblings will not get enabled without a leader, however a
+ +       * leader will enable its siblings, even if those are still on the old
+ +       * context.
+ +       */
+ +      list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+ +              if (event->group_leader == event)
+ +                      continue;
+ +
+ +              list_del(&event->migrate_entry);
+ +              if (event->state >= PERF_EVENT_STATE_OFF)
+ +                      event->state = PERF_EVENT_STATE_INACTIVE;
+ +              account_event_cpu(event, dst_cpu);
+ +              perf_install_in_context(dst_ctx, event, dst_cpu);
+ +              get_ctx(dst_ctx);
+ +      }
+ +
+ +      /*
+ +       * Once all the siblings are setup properly, install the group leaders
+ +       * to make it go.
+ +       */
         list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
                 list_del(&event->migrate_entry);
                 if (event->state >= PERF_EVENT_STATE_OFF)
@@@ -7854,7 -7617,6 +7854,7 @@@
                 get_ctx(dst_ctx);
         }
         mutex_unlock(&dst_ctx->mutex);
+ +      mutex_unlock(&src_ctx->mutex);
   }
   EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
   
@@@ -8041,19 -7803,14 +8041,19 @@@ static void perf_free_event(struct perf
   
         put_event(parent);
   
+ +      raw_spin_lock_irq(&ctx->lock);
         perf_group_detach(event);
         list_del_event(event, ctx);
+ +      raw_spin_unlock_irq(&ctx->lock);
         free_event(event);
   }
   
   /*
- - * free an unexposed, unused context as created by inheritance by
+ + * Free an unexposed, unused context as created by inheritance by
    * perf_event_init_task below, used by fork() in case of fail.
+ + *
+ + * Not all locks are strictly required, but take them anyway to be nice and
+ + * help out with the lockdep assertions.
    */
   void perf_event_free_task(struct task_struct *task)
   {
@@@ -8372,7 -8129,7 +8372,7 @@@ static void __init perf_event_init_all_
         for_each_possible_cpu(cpu) {
                 swhash = &per_cpu(swevent_htable, cpu);
                 mutex_init(&swhash->hlist_mutex);
- -              INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
+ +              INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
         }
   }
   
@@@ -8393,11 -8150,22 +8393,11 @@@ static void perf_event_init_cpu(int cpu
   }
   
   #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
- -static void perf_pmu_rotate_stop(struct pmu *pmu)
- -{
- -      struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- -
- -      WARN_ON(!irqs_disabled());
- -
- -      list_del_init(&cpuctx->rotation_list);
- -}
- -
   static void __perf_event_exit_context(void *__info)
   {
         struct remove_event re = { .detach_group = true };
         struct perf_event_context *ctx = __info;
   
- -      perf_pmu_rotate_stop(ctx->pmu);
- -
         rcu_read_lock();
         list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
                 __perf_remove_from_context(&re);
@@@ -8508,6 -8276,18 +8508,18 @@@ void __init perf_event_init(void
                      != 1024);
   }
   
+ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
+                             char *page)
+ {
+       struct perf_pmu_events_attr *pmu_attr =
+               container_of(attr, struct perf_pmu_events_attr, attr);
+ 
+       if (pmu_attr->event_str)
+               return sprintf(page, "%s\n", pmu_attr->event_str);
+ 
+       return 0;
+ }
+ 
   static int __init perf_event_sysfs_init(void)
   {
         struct pmu *pmu;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Feb 2015 02:15:38 +0000 (18:15 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Feb 2015 02:15:38 +0000 (18:15 -0800)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/thread_info.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/sysdev/fsl_pci.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/xmon/xmon.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/misc/cxl/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/perf_event.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history