Merge branch 'sched/core' into core/locking, to prepare the kernel/locking/ file...
authorIngo Molnar <mingo@kernel.org>
Wed, 6 Nov 2013 06:50:37 +0000 (07:50 +0100)
committerIngo Molnar <mingo@kernel.org>
Wed, 6 Nov 2013 06:50:37 +0000 (07:50 +0100)
Conflicts:
kernel/Makefile

There are conflicts in kernel/Makefile due to file moving in the
scheduler tree - resolve them.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
70 files changed:
Documentation/DocBook/device-drivers.tmpl
Documentation/RCU/checklist.txt
Documentation/RCU/stallwarn.txt
Documentation/kernel-parameters.txt
Documentation/kernel-per-CPU-kthreads.txt
Documentation/lockstat.txt
MAINTAINERS
Makefile
arch/arc/mm/fault.c
arch/mips/kernel/perf_event_mipsxx.c
arch/mips/mti-malta/malta-int.c
arch/mips/ralink/timer.c
arch/x86/Kconfig
arch/x86/kernel/alternative.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/nmi.c
drivers/clk/clk-nomadik.c
drivers/clk/mvebu/armada-370.c
drivers/clk/socfpga/clk.c
drivers/clk/versatile/clk-icst.c
drivers/gpu/drm/i915/intel_crt.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/pl2303.c
fs/dcache.c
include/linux/ipc_namespace.h
include/linux/rculist.h
include/linux/rcupdate.h
include/linux/rcutiny.h
include/linux/rcutree.h
include/linux/sched/sysctl.h
include/trace/events/rcu.h
include/trace/events/sched.h
include/uapi/linux/perf_event.h
ipc/ipc_sysctl.c
kernel/Makefile
kernel/events/ring_buffer.c
kernel/hung_task.c
kernel/lockdep.c
kernel/lockdep_proc.c
kernel/rcu.h [deleted file]
kernel/rcu/Makefile [new file with mode: 0644]
kernel/rcu/rcu.h [new file with mode: 0644]
kernel/rcu/srcu.c [new file with mode: 0644]
kernel/rcu/tiny.c [new file with mode: 0644]
kernel/rcu/tiny_plugin.h [new file with mode: 0644]
kernel/rcu/torture.c [new file with mode: 0644]
kernel/rcu/tree.c [new file with mode: 0644]
kernel/rcu/tree.h [new file with mode: 0644]
kernel/rcu/tree_plugin.h [new file with mode: 0644]
kernel/rcu/tree_trace.c [new file with mode: 0644]
kernel/rcu/update.c [new file with mode: 0644]
kernel/rcupdate.c [deleted file]
kernel/rcutiny.c [deleted file]
kernel/rcutiny_plugin.h [deleted file]
kernel/rcutorture.c [deleted file]
kernel/rcutree.c [deleted file]
kernel/rcutree.h [deleted file]
kernel/rcutree_plugin.h [deleted file]
kernel/rcutree_trace.c [deleted file]
kernel/srcu.c [deleted file]
kernel/sysctl.c
lib/scatterlist.c
mm/memcontrol.c
scripts/kallsyms.c
scripts/link-vmlinux.sh
sound/core/pcm.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/wm_hubs.c
sound/soc/soc-dapm.c

index fe397f90a34f50153f5936cfa13635ed7bc85b7b..6c9d9d37c83a30e24cce3ed25abf484e1bbd18f1 100644 (file)
@@ -87,7 +87,10 @@ X!Iinclude/linux/kobject.h
 !Ekernel/printk/printk.c
 !Ekernel/panic.c
 !Ekernel/sys.c
-!Ekernel/rcupdate.c
+!Ekernel/rcu/srcu.c
+!Ekernel/rcu/tree.c
+!Ekernel/rcu/tree_plugin.h
+!Ekernel/rcu/update.c
      </sect1>
 
      <sect1><title>Device Resource Management</title>
index 7703ec73a9bbb225a45258c4cde11c8cc9dd236f..91266193b8f49e840709db77d75f8357428b2806 100644 (file)
@@ -202,8 +202,8 @@ over a rather long period of time, but improvements are always welcome!
        updater uses call_rcu_sched() or synchronize_sched(), then
        the corresponding readers must disable preemption, possibly
        by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
-       If the updater uses synchronize_srcu() or call_srcu(),
-       the the corresponding readers must use srcu_read_lock() and
+       If the updater uses synchronize_srcu() or call_srcu(), then
+       the corresponding readers must use srcu_read_lock() and
        srcu_read_unlock(), and with the same srcu_struct.  The rules for
        the expedited primitives are the same as for their non-expedited
        counterparts.  Mixing things up will result in confusion and
index 8e9359de1d28b2e845d25425be72451b3f6be4f7..6f3a0057548ec0b3c13d02fe5d82ea7b0bf2ece4 100644 (file)
@@ -12,12 +12,12 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
        This kernel configuration parameter defines the period of time
        that RCU will wait from the beginning of a grace period until it
        issues an RCU CPU stall warning.  This time period is normally
-       sixty seconds.
+       21 seconds.
 
        This configuration parameter may be changed at runtime via the
        /sys/module/rcutree/parameters/rcu_cpu_stall_timeout, however
        this parameter is checked only at the beginning of a cycle.
-       So if you are 30 seconds into a 70-second stall, setting this
+       So if you are 10 seconds into a 40-second stall, setting this
        sysfs parameter to (say) five will shorten the timeout for the
        -next- stall, or the following warning for the current stall
        (assuming the stall lasts long enough).  It will not affect the
@@ -32,7 +32,7 @@ CONFIG_RCU_CPU_STALL_VERBOSE
        also dump the stacks of any tasks that are blocking the current
        RCU-preempt grace period.
 
-RCU_CPU_STALL_INFO
+CONFIG_RCU_CPU_STALL_INFO
 
        This kernel configuration parameter causes the stall warning to
        print out additional per-CPU diagnostic information, including
@@ -43,7 +43,8 @@ RCU_STALL_DELAY_DELTA
        Although the lockdep facility is extremely useful, it does add
        some overhead.  Therefore, under CONFIG_PROVE_RCU, the
        RCU_STALL_DELAY_DELTA macro allows five extra seconds before
-       giving an RCU CPU stall warning message.
+       giving an RCU CPU stall warning message.  (This is a cpp
+       macro, not a kernel configuration parameter.)
 
 RCU_STALL_RAT_DELAY
 
@@ -52,7 +53,8 @@ RCU_STALL_RAT_DELAY
        However, if the offending CPU does not detect its own stall in
        the number of jiffies specified by RCU_STALL_RAT_DELAY, then
        some other CPU will complain.  This delay is normally set to
-       two jiffies.
+       two jiffies.  (This is a cpp macro, not a kernel configuration
+       parameter.)
 
 When a CPU detects that it is stalling, it will print a message similar
 to the following:
@@ -86,7 +88,12 @@ printing, there will be a spurious stall-warning message:
 
 INFO: rcu_bh_state detected stalls on CPUs/tasks: { } (detected by 4, 2502 jiffies)
 
-This is rare, but does happen from time to time in real life.
+This is rare, but does happen from time to time in real life.  It is also
+possible for a zero-jiffy stall to be flagged in this case, depending
+on how the stall warning and the grace-period initialization happen to
+interact.  Please note that it is not possible to entirely eliminate this
+sort of false positive without resorting to things like stop_machine(),
+which is overkill for this sort of problem.
 
 If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set,
 more information is printed with the stall-warning message, for example:
@@ -216,4 +223,5 @@ that portion of the stack which remains the same from trace to trace.
 If you can reliably trigger the stall, ftrace can be quite helpful.
 
 RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
-and with RCU's event tracing.
+and with RCU's event tracing.  For information on RCU's event tracing,
+see include/trace/events/rcu.h.
index fcbb736d55feb439c1152be71355d5ab79f8edd2..203f4a9d9efe9ae1b2384c6d040cfd3d6f93a603 100644 (file)
@@ -2599,7 +2599,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
        ramdisk_size=   [RAM] Sizes of RAM disks in kilobytes
                        See Documentation/blockdev/ramdisk.txt.
 
-       rcu_nocbs=      [KNL,BOOT]
+       rcu_nocbs=      [KNL]
                        In kernels built with CONFIG_RCU_NOCB_CPU=y, set
                        the specified list of CPUs to be no-callback CPUs.
                        Invocation of these CPUs' RCU callbacks will
@@ -2612,7 +2612,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        real-time workloads.  It can also improve energy
                        efficiency for asymmetric multiprocessors.
 
-       rcu_nocb_poll   [KNL,BOOT]
+       rcu_nocb_poll   [KNL]
                        Rather than requiring that offloaded CPUs
                        (specified by rcu_nocbs= above) explicitly
                        awaken the corresponding "rcuoN" kthreads,
@@ -2623,126 +2623,145 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        energy efficiency by requiring that the kthreads
                        periodically wake up to do the polling.
 
-       rcutree.blimit= [KNL,BOOT]
+       rcutree.blimit= [KNL]
                        Set maximum number of finished RCU callbacks to process
                        in one batch.
 
-       rcutree.fanout_leaf=    [KNL,BOOT]
+       rcutree.rcu_fanout_leaf= [KNL]
                        Increase the number of CPUs assigned to each
                        leaf rcu_node structure.  Useful for very large
                        systems.
 
-       rcutree.jiffies_till_first_fqs= [KNL,BOOT]
+       rcutree.jiffies_till_first_fqs= [KNL]
                        Set delay from grace-period initialization to
                        first attempt to force quiescent states.
                        Units are jiffies, minimum value is zero,
                        and maximum value is HZ.
 
-       rcutree.jiffies_till_next_fqs= [KNL,BOOT]
+       rcutree.jiffies_till_next_fqs= [KNL]
                        Set delay between subsequent attempts to force
                        quiescent states.  Units are jiffies, minimum
                        value is one, and maximum value is HZ.
 
-       rcutree.qhimark=        [KNL,BOOT]
+       rcutree.qhimark= [KNL]
                        Set threshold of queued
                        RCU callbacks over which batch limiting is disabled.
 
-       rcutree.qlowmark=       [KNL,BOOT]
+       rcutree.qlowmark= [KNL]
                        Set threshold of queued RCU callbacks below which
                        batch limiting is re-enabled.
 
-       rcutree.rcu_cpu_stall_suppress= [KNL,BOOT]
-                       Suppress RCU CPU stall warning messages.
-
-       rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
-                       Set timeout for RCU CPU stall warning messages.
-
-       rcutree.rcu_idle_gp_delay=      [KNL,BOOT]
+       rcutree.rcu_idle_gp_delay= [KNL]
                        Set wakeup interval for idle CPUs that have
                        RCU callbacks (RCU_FAST_NO_HZ=y).
 
-       rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT]
+       rcutree.rcu_idle_lazy_gp_delay= [KNL]
                        Set wakeup interval for idle CPUs that have
                        only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
                        Lazy RCU callbacks are those which RCU can
                        prove do nothing more than free memory.
 
-       rcutorture.fqs_duration= [KNL,BOOT]
+       rcutorture.fqs_duration= [KNL]
                        Set duration of force_quiescent_state bursts.
 
-       rcutorture.fqs_holdoff= [KNL,BOOT]
+       rcutorture.fqs_holdoff= [KNL]
                        Set holdoff time within force_quiescent_state bursts.
 
-       rcutorture.fqs_stutter= [KNL,BOOT]
+       rcutorture.fqs_stutter= [KNL]
                        Set wait time between force_quiescent_state bursts.
 
-       rcutorture.irqreader= [KNL,BOOT]
-                       Test RCU readers from irq handlers.
+       rcutorture.gp_exp= [KNL]
+                       Use expedited update-side primitives.
+
+       rcutorture.gp_normal= [KNL]
+                       Use normal (non-expedited) update-side primitives.
+                       If both gp_exp and gp_normal are set, do both.
+                       If neither gp_exp nor gp_normal are set, still
+                       do both.
 
-       rcutorture.n_barrier_cbs= [KNL,BOOT]
+       rcutorture.n_barrier_cbs= [KNL]
                        Set callbacks/threads for rcu_barrier() testing.
 
-       rcutorture.nfakewriters= [KNL,BOOT]
+       rcutorture.nfakewriters= [KNL]
                        Set number of concurrent RCU writers.  These just
                        stress RCU, they don't participate in the actual
                        test, hence the "fake".
 
-       rcutorture.nreaders= [KNL,BOOT]
+       rcutorture.nreaders= [KNL]
                        Set number of RCU readers.
 
-       rcutorture.onoff_holdoff= [KNL,BOOT]
+       rcutorture.object_debug= [KNL]
+                       Enable debug-object double-call_rcu() testing.
+
+       rcutorture.onoff_holdoff= [KNL]
                        Set time (s) after boot for CPU-hotplug testing.
 
-       rcutorture.onoff_interval= [KNL,BOOT]
+       rcutorture.onoff_interval= [KNL]
                        Set time (s) between CPU-hotplug operations, or
                        zero to disable CPU-hotplug testing.
 
-       rcutorture.shuffle_interval= [KNL,BOOT]
+       rcutorture.rcutorture_runnable= [BOOT]
+                       Start rcutorture running at boot time.
+
+       rcutorture.shuffle_interval= [KNL]
                        Set task-shuffle interval (s).  Shuffling tasks
                        allows some CPUs to go into dyntick-idle mode
                        during the rcutorture test.
 
-       rcutorture.shutdown_secs= [KNL,BOOT]
+       rcutorture.shutdown_secs= [KNL]
                        Set time (s) after boot system shutdown.  This
                        is useful for hands-off automated testing.
 
-       rcutorture.stall_cpu= [KNL,BOOT]
+       rcutorture.stall_cpu= [KNL]
                        Duration of CPU stall (s) to test RCU CPU stall
                        warnings, zero to disable.
 
-       rcutorture.stall_cpu_holdoff= [KNL,BOOT]
+       rcutorture.stall_cpu_holdoff= [KNL]
                        Time to wait (s) after boot before inducing stall.
 
-       rcutorture.stat_interval= [KNL,BOOT]
+       rcutorture.stat_interval= [KNL]
                        Time (s) between statistics printk()s.
 
-       rcutorture.stutter= [KNL,BOOT]
+       rcutorture.stutter= [KNL]
                        Time (s) to stutter testing, for example, specifying
                        five seconds causes the test to run for five seconds,
                        wait for five seconds, and so on.  This tests RCU's
                        ability to transition abruptly to and from idle.
 
-       rcutorture.test_boost= [KNL,BOOT]
+       rcutorture.test_boost= [KNL]
                        Test RCU priority boosting?  0=no, 1=maybe, 2=yes.
                        "Maybe" means test if the RCU implementation
                        under test support RCU priority boosting.
 
-       rcutorture.test_boost_duration= [KNL,BOOT]
+       rcutorture.test_boost_duration= [KNL]
                        Duration (s) of each individual boost test.
 
-       rcutorture.test_boost_interval= [KNL,BOOT]
+       rcutorture.test_boost_interval= [KNL]
                        Interval (s) between each boost test.
 
-       rcutorture.test_no_idle_hz= [KNL,BOOT]
+       rcutorture.test_no_idle_hz= [KNL]
                        Test RCU's dyntick-idle handling.  See also the
                        rcutorture.shuffle_interval parameter.
 
-       rcutorture.torture_type= [KNL,BOOT]
+       rcutorture.torture_type= [KNL]
                        Specify the RCU implementation to test.
 
-       rcutorture.verbose= [KNL,BOOT]
+       rcutorture.verbose= [KNL]
                        Enable additional printk() statements.
 
+       rcupdate.rcu_expedited= [KNL]
+                       Use expedited grace-period primitives, for
+                       example, synchronize_rcu_expedited() instead
+                       of synchronize_rcu().  This reduces latency,
+                       but can increase CPU utilization, degrade
+                       real-time latency, and degrade energy efficiency.
+
+       rcupdate.rcu_cpu_stall_suppress= [KNL]
+                       Suppress RCU CPU stall warning messages.
+
+       rcupdate.rcu_cpu_stall_timeout= [KNL]
+                       Set timeout for RCU CPU stall warning messages.
+
        rdinit=         [KNL]
                        Format: <full_path>
                        Run specified binary instead of /init from the ramdisk,
index 32351bfabf2038617c35326f4545ac9f33b02073..827104fb9364cf60df2359e45c3336f61d4568c5 100644 (file)
@@ -181,12 +181,17 @@ To reduce its OS jitter, do any of the following:
                make sure that this is safe on your particular system.
        d.      It is not possible to entirely get rid of OS jitter
                from vmstat_update() on CONFIG_SMP=y systems, but you
-               can decrease its frequency by writing a large value to
-               /proc/sys/vm/stat_interval.  The default value is HZ,
-               for an interval of one second.  Of course, larger values
-               will make your virtual-memory statistics update more
-               slowly.  Of course, you can also run your workload at
-               a real-time priority, thus preempting vmstat_update().
+               can decrease its frequency by writing a large value
+               to /proc/sys/vm/stat_interval.  The default value is
+               HZ, for an interval of one second.  Of course, larger
+               values will make your virtual-memory statistics update
+               more slowly.  Of course, you can also run your workload
+               at a real-time priority, thus preempting vmstat_update(),
+               but if your workload is CPU-bound, this is a bad idea.
+               However, there is an RFC patch from Christoph Lameter
+               (based on an earlier one from Gilad Ben-Yossef) that
+               reduces or even eliminates vmstat overhead for some
+               workloads at https://lkml.org/lkml/2013/9/4/379.
        e.      If running on high-end powerpc servers, build with
                CONFIG_PPC_RTAS_DAEMON=n.  This prevents the RTAS
                daemon from running on each CPU every second or so.
index dd2f7b26ca3077737dd93ee97217248216b1878d..72d010689751b3cc7c60349342ede7cd3f8faf49 100644 (file)
@@ -46,16 +46,14 @@ With these hooks we provide the following statistics:
  contentions       - number of lock acquisitions that had to wait
  wait time min     - shortest (non-0) time we ever had to wait for a lock
            max     - longest time we ever had to wait for a lock
-           total   - total time we spend waiting on this lock
+          total   - total time we spend waiting on this lock
+          avg     - average time spent waiting on this lock
  acq-bounces       - number of lock acquisitions that involved x-cpu data
  acquisitions      - number of times we took the lock
  hold time min     - shortest (non-0) time we ever held the lock
-           max     - longest time we ever held the lock
-           total   - total time this lock was held
-
-From these number various other statistics can be derived, such as:
-
- hold time average = hold time total / acquisitions
+          max     - longest time we ever held the lock
+          total   - total time this lock was held
+          avg     - average time this lock was held
 
 These numbers are gathered per lock class, per read/write state (when
 applicable).
@@ -84,37 +82,38 @@ Look at the current lock statistics:
 
 # less /proc/lock_stat
 
-01 lock_stat version 0.3
-02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-03                               class name    con-bounces    contentions   waittime-min   waittime-max waittime-total    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total
-04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+01 lock_stat version 0.4
+02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
+04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 05
-06                          &mm->mmap_sem-W:           233            538 18446744073708       22924.27      607243.51           1342          45806           1.71        8595.89     1180582.34
-07                          &mm->mmap_sem-R:           205            587 18446744073708       28403.36      731975.00           1940         412426           0.58      187825.45     6307502.88
-08                          ---------------
-09                            &mm->mmap_sem            487          [<ffffffff8053491f>] do_page_fault+0x466/0x928
-10                            &mm->mmap_sem            179          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
-11                            &mm->mmap_sem            279          [<ffffffff80210a57>] sys_mmap+0x75/0xce
-12                            &mm->mmap_sem             76          [<ffffffff802a490b>] sys_munmap+0x32/0x59
-13                          ---------------
-14                            &mm->mmap_sem            270          [<ffffffff80210a57>] sys_mmap+0x75/0xce
-15                            &mm->mmap_sem            431          [<ffffffff8053491f>] do_page_fault+0x466/0x928
-16                            &mm->mmap_sem            138          [<ffffffff802a490b>] sys_munmap+0x32/0x59
-17                            &mm->mmap_sem            145          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
+07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
+08                         ---------------
+09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+19                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
+13                         ---------------
+14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
 18
-19 ...............................................................................................................................................................................................
+19.............................................................................................................................................................................................................................
 20
-21                              dcache_lock:           621            623           0.52         118.26        1053.02           6745          91930           0.29         316.29      118423.41
-22                              -----------
-23                              dcache_lock            179          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
-24                              dcache_lock            113          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
-25                              dcache_lock             99          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
-26                              dcache_lock            104          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
-27                              -----------
-28                              dcache_lock            192          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
-29                              dcache_lock             98          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
-30                              dcache_lock             72          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
-31                              dcache_lock            112          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
+21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
+22                         ---------------
+23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+27                         ---------------
+28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
 
 This excerpt shows the first two lock class statistics. Line 01 shows the
 output version - each time the format changes this will be updated. Line 02-04
@@ -131,30 +130,30 @@ The integer part of the time values is in us.
 
 Dealing with nested locks, subclasses may appear:
 
-32...............................................................................................................................................................................................
+32...........................................................................................................................................................................................................................
 33
-34                               &rq->lock:         13128          13128           0.43         190.53      103881.26          97454        3453404           0.00         401.11    13224683.11
+34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
 35                               ---------
-36                               &rq->lock            645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-37                               &rq->lock            297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-38                               &rq->lock            360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
-39                               &rq->lock            428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
 40                               ---------
-41                               &rq->lock             77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-42                               &rq->lock            174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-43                               &rq->lock           4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-44                               &rq->lock            893          [<ffffffff81340524>] schedule+0x157/0x7b8
+41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
 45
-46...............................................................................................................................................................................................
+46...........................................................................................................................................................................................................................
 47
-48                             &rq->lock/1:         11526          11488           0.33         388.73      136294.31          21461          38404           0.00          37.93      109388.53
+48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
 49                             -----------
-50                             &rq->lock/1          11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
 51                             -----------
-52                             &rq->lock/1           5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-53                             &rq->lock/1           1224          [<ffffffff81340524>] schedule+0x157/0x7b8
-54                             &rq->lock/1           4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-55                             &rq->lock/1            181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
+54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
 
 Line 48 shows statistics for the second subclass (/1) of &rq->lock class
 (subclass starts from 0), since in this case, as line 50 suggests,
@@ -163,16 +162,16 @@ double_rq_lock actually acquires a nested lock of two spinlocks.
 View the top contending locks:
 
 # grep : /proc/lock_stat | head
-              &inode->i_data.tree_lock-W:            15          21657           0.18     1093295.30 11547131054.85             58          10415           0.16          87.51        6387.60
-              &inode->i_data.tree_lock-R:             0              0           0.00           0.00           0.00          23302         231198           0.25           8.45       98023.38
-                             dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
-                         &inode->i_mutex:           161            286 18446744073709       62882.54     1244614.55           3653          20598 18446744073709       62318.60     1693822.74
-                         &zone->lru_lock:            94             94           0.53           7.33          92.10           4366          32690           0.29          59.81       16350.06
-              &inode->i_data.i_mmap_mutex:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
-                        &q->__queue_lock:            48             50           0.52          31.62          86.31            774          13131           0.17         113.08       12277.52
-                        &rq->rq_lock_key:            43             47           0.74          68.50         170.63           3706          33929           0.22         107.99       17460.62
-                      &rq->rq_lock_key#2:            39             46           0.75           6.68          49.03           2979          32292           0.17         125.17       17137.63
-                         tasklist_lock-W:            15             15           1.45          10.87          32.70           1201           7390           0.58          62.55       13648.47
+                       clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
+                    tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
+                 &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
+                              &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
+              &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
+                        tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
+                        tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
+                             rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
+       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
+                             rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
 
 Clear the statistics:
 
index dcd69cb34806fce523bd516b28ad1572f28eeadf..ddabf7042b654d7a06dbe2b67d45706775445ccb 100644 (file)
@@ -6950,7 +6950,7 @@ M:        "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
 F:     Documentation/RCU/torture.txt
-F:     kernel/rcutorture.c
+F:     kernel/rcu/torture.c
 
 RDC R-321X SoC
 M:     Florian Fainelli <florian@openwrt.org>
@@ -6977,8 +6977,9 @@ T:        git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
 F:     Documentation/RCU/
 X:     Documentation/RCU/torture.txt
 F:     include/linux/rcu*
-F:     kernel/rcu*
-X:     kernel/rcutorture.c
+X:     include/linux/srcu.h
+F:     kernel/rcu/
+X:     kernel/rcu/torture.c
 
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:     Alessandro Zummo <a.zummo@towertech.it>
@@ -7667,8 +7668,8 @@ M:        "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 W:     http://www.rdrop.com/users/paulmck/RCU/
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
-F:     include/linux/srcu*
-F:     kernel/srcu*
+F:     include/linux/srcu.h
+F:     kernel/rcu/srcu.c
 
 SMACK SECURITY MODULE
 M:     Casey Schaufler <casey@schaufler-ca.com>
@@ -8919,61 +8920,14 @@ W:      http://pegasus2.sourceforge.net/
 S:     Maintained
 F:     drivers/net/usb/rtl8150.c
 
-USB SERIAL BELKIN F5U103 DRIVER
-M:     William Greathouse <wgreathouse@smva.com>
+USB SERIAL SUBSYSTEM
+M:     Johan Hovold <jhovold@gmail.com>
 L:     linux-usb@vger.kernel.org
 S:     Maintained
-F:     drivers/usb/serial/belkin_sa.*
-
-USB SERIAL CYPRESS M8 DRIVER
-M:     Lonnie Mendez <dignome@gmail.com>
-L:     linux-usb@vger.kernel.org
-S:     Maintained
-W:     http://geocities.com/i0xox0i
-W:     http://firstlight.net/cvs
-F:     drivers/usb/serial/cypress_m8.*
-
-USB SERIAL CYBERJACK DRIVER
-M:     Matthias Bruestle and Harald Welte <support@reiner-sct.com>
-W:     http://www.reiner-sct.de/support/treiber_cyberjack.php
-S:     Maintained
-F:     drivers/usb/serial/cyberjack.c
-
-USB SERIAL DIGI ACCELEPORT DRIVER
-M:     Peter Berger <pberger@brimson.com>
-M:     Al Borchers <alborchers@steinerpoint.com>
-L:     linux-usb@vger.kernel.org
-S:     Maintained
-F:     drivers/usb/serial/digi_acceleport.c
-
-USB SERIAL DRIVER
-M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:     linux-usb@vger.kernel.org
-S:     Supported
 F:     Documentation/usb/usb-serial.txt
-F:     drivers/usb/serial/generic.c
-F:     drivers/usb/serial/usb-serial.c
+F:     drivers/usb/serial/
 F:     include/linux/usb/serial.h
 
-USB SERIAL EMPEG EMPEG-CAR MARK I/II DRIVER
-M:     Gary Brubaker <xavyer@ix.netcom.com>
-L:     linux-usb@vger.kernel.org
-S:     Maintained
-F:     drivers/usb/serial/empeg.c
-
-USB SERIAL KEYSPAN DRIVER
-M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:     linux-usb@vger.kernel.org
-S:     Maintained
-F:     drivers/usb/serial/*keyspan*
-
-USB SERIAL WHITEHEAT DRIVER
-M:     Support Department <support@connecttech.com>
-L:     linux-usb@vger.kernel.org
-W:     http://www.connecttech.com
-S:     Supported
-F:     drivers/usb/serial/whiteheat*
-
 USB SMSC75XX ETHERNET DRIVER
 M:     Steve Glendinning <steve.glendinning@shawell.net>
 L:     netdev@vger.kernel.org
index 868c0eb67b081bcedbb9a284c0fdd2ec6f1af8ee..67077ad6edbb26cbe7ea9b54efbc7ad51105548a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = One Giant Leap for Frogkind
 
 # *DOCUMENTATION*
index d63f3de0cd5bf60e209bf00be68fb8c628862126..0c14d8a526833fd3236f680abcad6c5f63279d36 100644 (file)
@@ -17,7 +17,7 @@
 #include <asm/pgalloc.h>
 #include <asm/mmu.h>
 
-static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
+static int handle_vmalloc_fault(unsigned long address)
 {
        /*
         * Synchronize this task's top level page-table
@@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
        pud_t *pud, *pud_k;
        pmd_t *pmd, *pmd_k;
 
-       pgd = pgd_offset_fast(mm, address);
+       pgd = pgd_offset_fast(current->active_mm, address);
        pgd_k = pgd_offset_k(address);
 
        if (!pgd_present(*pgd_k))
@@ -72,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
         * nothing more.
         */
        if (address >= VMALLOC_START && address <= VMALLOC_END) {
-               ret = handle_vmalloc_fault(mm, address);
+               ret = handle_vmalloc_fault(address);
                if (unlikely(ret))
                        goto bad_area_nosemaphore;
                else
index 45f1ffcf1a4b6299b181cf99e9d08cac59da6cfb..24cdf64789c39dcf8be4a4c214b02540aa02370b 100644 (file)
@@ -971,11 +971,11 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
 [C(LL)] = {
        [C(OP_READ)] = {
                [C(RESULT_ACCESS)]      = { 0x1c, CNTR_ODD, P },
-               [C(RESULT_MISS)]        = { 0x1d, CNTR_EVEN | CNTR_ODD, P },
+               [C(RESULT_MISS)]        = { 0x1d, CNTR_EVEN, P },
        },
        [C(OP_WRITE)] = {
                [C(RESULT_ACCESS)]      = { 0x1c, CNTR_ODD, P },
-               [C(RESULT_MISS)]        = { 0x1d, CNTR_EVEN | CNTR_ODD, P },
+               [C(RESULT_MISS)]        = { 0x1d, CNTR_EVEN, P },
        },
 },
 [C(ITLB)] = {
index c69da37346995e94a5e5d4a33dafce0585b84ed9..5b28e81d94a086ad8a080e727439ffb48fa22ac9 100644 (file)
@@ -473,7 +473,7 @@ static void __init fill_ipi_map(void)
 {
        int cpu;
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+       for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
                fill_ipi_map1(gic_resched_int_base, cpu, GIC_CPU_INT1);
                fill_ipi_map1(gic_call_int_base, cpu, GIC_CPU_INT2);
        }
@@ -574,8 +574,9 @@ void __init arch_init_irq(void)
                /* FIXME */
                int i;
 #if defined(CONFIG_MIPS_MT_SMP)
-               gic_call_int_base = GIC_NUM_INTRS - NR_CPUS;
-               gic_resched_int_base = gic_call_int_base - NR_CPUS;
+               gic_call_int_base = GIC_NUM_INTRS -
+                       (NR_CPUS - nr_cpu_ids) * 2 - nr_cpu_ids;
+               gic_resched_int_base = gic_call_int_base - nr_cpu_ids;
                fill_ipi_map();
 #endif
                gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map,
@@ -599,7 +600,7 @@ void __init arch_init_irq(void)
                printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status());
                write_c0_status(0x1100dc00);
                printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status());
-               for (i = 0; i < NR_CPUS; i++) {
+               for (i = 0; i < nr_cpu_ids; i++) {
                        arch_init_ipiirq(MIPS_GIC_IRQ_BASE +
                                         GIC_RESCHED_INT(i), &irq_resched);
                        arch_init_ipiirq(MIPS_GIC_IRQ_BASE +
index e49241a2c39a3b4d359f8d77eb6063f26ea89be0..202785709441c0a704b7cf1110e151834e9219c3 100644 (file)
@@ -126,7 +126,7 @@ static int rt_timer_probe(struct platform_device *pdev)
                return -ENOENT;
        }
 
-       rt->membase = devm_request_and_ioremap(&pdev->dev, res);
+       rt->membase = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(rt->membase))
                return PTR_ERR(rt->membase);
 
index f67e839f06c845e406d81c8b2be285ced8b57448..e22d7f5f9837f7288d393a9af4a412afa35d49ff 100644 (file)
@@ -638,10 +638,10 @@ config PARAVIRT_SPINLOCKS
          spinlock implementation with something virtualization-friendly
          (for example, block the virtual CPU rather than spinning).
 
-         Unfortunately the downside is an up to 5% performance hit on
-         native kernels, with various workloads.
+         It has a minimal impact on native kernels and gives a nice performance
+         benefit on paravirtualized KVM / Xen kernels.
 
-         If you are unsure how to answer this question, answer N.
+         If you are unsure how to answer this question, answer Y.
 
 source "arch/x86/xen/Kconfig"
 
index 15e8563e5c244e0712c9696a3367067044a300b3..df94598ad05a845902e9897214cdceacc779a80d 100644 (file)
@@ -402,17 +402,6 @@ void alternatives_enable_smp(void)
 {
        struct smp_alt_module *mod;
 
-#ifdef CONFIG_LOCKDEP
-       /*
-        * Older binutils section handling bug prevented
-        * alternatives-replacement from working reliably.
-        *
-        * If this still occurs then you should see a hang
-        * or crash shortly after this line:
-        */
-       pr_info("lockdep: fixing up alternatives\n");
-#endif
-
        /* Why bother if there are no other CPUs? */
        BUG_ON(num_possible_cpus() == 1);
 
index 9d8449158cf989af3009c6606b7a878c827f5d32..8a87a3224121fdeac2d2fd59fb2142f6de8d8a24 100644 (file)
@@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void)
 static int __kprobes
 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
-       int ret;
        u64 start_clock;
        u64 finish_clock;
+       int ret;
 
        if (!atomic_read(&active_events))
                return NMI_DONE;
 
-       start_clock = local_clock();
+       start_clock = sched_clock();
        ret = x86_pmu.handle_irq(regs);
-       finish_clock = local_clock();
+       finish_clock = sched_clock();
 
        perf_sample_event_took(finish_clock - start_clock);
 
index ba77ebc2c35321dc4085c06a106f715ec18d9a21..6fcb49ce50a1260d1f4bfdf0b5a065dd4f6d77c7 100644 (file)
@@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
                u64 before, delta, whole_msecs;
                int remainder_ns, decimal_msecs, thishandled;
 
-               before = local_clock();
+               before = sched_clock();
                thishandled = a->handler(type, regs);
                handled += thishandled;
-               delta = local_clock() - before;
+               delta = sched_clock() - before;
                trace_nmi_handler(a->handler, (int)delta, thishandled);
 
                if (delta < nmi_longest_ns)
index 51410c2ac2cb617b9a98b9c0fd869e1ddd541870..4d978a3c88f743faac72af94b27cd72070507bf9 100644 (file)
  */
 
 #define SRC_CR                 0x00U
+#define SRC_CR_T0_ENSEL                BIT(15)
+#define SRC_CR_T1_ENSEL                BIT(17)
+#define SRC_CR_T2_ENSEL                BIT(19)
+#define SRC_CR_T3_ENSEL                BIT(21)
+#define SRC_CR_T4_ENSEL                BIT(23)
+#define SRC_CR_T5_ENSEL                BIT(25)
+#define SRC_CR_T6_ENSEL                BIT(27)
+#define SRC_CR_T7_ENSEL                BIT(29)
 #define SRC_XTALCR             0x0CU
 #define SRC_XTALCR_XTALTIMEN   BIT(20)
 #define SRC_XTALCR_SXTALDIS    BIT(19)
@@ -543,6 +551,19 @@ void __init nomadik_clk_init(void)
                       __func__, np->name);
                return;
        }
+
+       /* Set all timers to use the 2.4 MHz TIMCLK */
+       val = readl(src_base + SRC_CR);
+       val |= SRC_CR_T0_ENSEL;
+       val |= SRC_CR_T1_ENSEL;
+       val |= SRC_CR_T2_ENSEL;
+       val |= SRC_CR_T3_ENSEL;
+       val |= SRC_CR_T4_ENSEL;
+       val |= SRC_CR_T5_ENSEL;
+       val |= SRC_CR_T6_ENSEL;
+       val |= SRC_CR_T7_ENSEL;
+       writel(val, src_base + SRC_CR);
+
        val = readl(src_base + SRC_XTALCR);
        pr_info("SXTALO is %s\n",
                (val & SRC_XTALCR_SXTALDIS) ? "disabled" : "enabled");
index fc777bdc1886586d2f7e0e253ddc1e78758d4927..81a202d12a7ad89ab56909fce6782baa7fb94ec2 100644 (file)
@@ -39,8 +39,8 @@ static const struct coreclk_ratio a370_coreclk_ratios[] __initconst = {
 };
 
 static const u32 a370_tclk_freqs[] __initconst = {
-       16600000,
-       20000000,
+       166000000,
+       200000000,
 };
 
 static u32 __init a370_get_tclk_freq(void __iomem *sar)
index 5bb848cac6ece40ab0e5bceef82d4facafdbd80d..81dd31a686df9e467b7c111f9c808ee568139551 100644 (file)
@@ -49,7 +49,7 @@
 #define SOCFPGA_L4_SP_CLK              "l4_sp_clk"
 #define SOCFPGA_NAND_CLK               "nand_clk"
 #define SOCFPGA_NAND_X_CLK             "nand_x_clk"
-#define SOCFPGA_MMC_CLK                        "mmc_clk"
+#define SOCFPGA_MMC_CLK                        "sdmmc_clk"
 #define SOCFPGA_DB_CLK                 "gpio_db_clk"
 
 #define div_mask(width)        ((1 << (width)) - 1)
index 67ccf4aa72773520baa89708c30737c3ad13c493..f5e4c21b301f6438c32d3552cae4bb9aef9d2a44 100644 (file)
@@ -107,7 +107,7 @@ static int icst_set_rate(struct clk_hw *hw, unsigned long rate,
 
        vco = icst_hz_to_vco(icst->params, rate);
        icst->rate = icst_hz(icst->params, vco);
-       vco_set(icst->vcoreg, icst->lockreg, vco);
+       vco_set(icst->lockreg, icst->vcoreg, vco);
        return 0;
 }
 
index db59bb9fbe23f75b6be9e182937b4ff6588cfafe..10d1de5bce6ff7a35921fa19d1327863b85ab86d 100644 (file)
@@ -107,8 +107,6 @@ static unsigned int intel_crt_get_flags(struct intel_encoder *encoder)
 static void intel_crt_get_config(struct intel_encoder *encoder,
                                 struct intel_crtc_config *pipe_config)
 {
-       struct drm_device *dev = encoder->base.dev;
-
        pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder);
 }
 
index c45f9c0a1b3493f8f5566c87d6b8702f9fc8116c..b21d553c245b51e3c6582485d8feb50f35cffee4 100644 (file)
@@ -904,6 +904,7 @@ static struct usb_device_id id_table_combined [] = {
        { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) },
        /* Crucible Devices */
        { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) },
        { }                                     /* Terminating entry */
 };
 
index 1b8af461b522c65a111983226ba1e704f012ac8c..a7019d1e305814867bb43792ebe55eb00dd7c3fd 100644 (file)
  * Manufacturer: Crucible Technologies
  */
 #define FTDI_CT_COMET_PID      0x8e08
+
+/*
+ * Product: Z3X Box
+ * Manufacturer: Smart GSM Team
+ */
+#define FTDI_Z3X_PID           0x0011
index bedf8e47713be02dfc80b8a2e3512cb24a455a15..1e6de4cd079d6a2c0e3b4b3c7679d6145bcf6f2c 100644 (file)
@@ -4,11 +4,6 @@
  * Copyright (C) 2001-2007 Greg Kroah-Hartman (greg@kroah.com)
  * Copyright (C) 2003 IBM Corp.
  *
- * Copyright (C) 2009, 2013 Frank Schäfer <fschaefer.oss@googlemail.com>
- *  - fixes, improvements and documentation for the baud rate encoding methods
- * Copyright (C) 2013 Reinhard Max <max@suse.de>
- *  - fixes and improvements for the divisor based baud rate encoding method
- *
  * Original driver for 2.2.x by anonymous
  *
  *     This program is free software; you can redistribute it and/or
@@ -134,18 +129,10 @@ MODULE_DEVICE_TABLE(usb, id_table);
 
 
 enum pl2303_type {
-       type_0,         /* H version ? */
-       type_1,         /* H version ? */
-       HX_TA,          /* HX(A) / X(A) / TA version  */ /* TODO: improve */
-       HXD_EA_RA_SA,   /* HXD / EA / RA / SA version */ /* TODO: improve */
-       TB,             /* TB version */
-       HX_CLONE,       /* Cheap and less functional clone of the HX chip */
+       type_0,         /* don't know the difference between type 0 and */
+       type_1,         /* type 1, until someone from prolific tells us... */
+       HX,             /* HX version of the pl2303 chip */
 };
-/*
- * NOTE: don't know the difference between type 0 and type 1,
- * until someone from Prolific tells us...
- * TODO: distinguish between X/HX, TA and HXD, EA, RA, SA variants
- */
 
 struct pl2303_serial_private {
        enum pl2303_type type;
@@ -185,7 +172,6 @@ static int pl2303_startup(struct usb_serial *serial)
 {
        struct pl2303_serial_private *spriv;
        enum pl2303_type type = type_0;
-       char *type_str = "unknown (treating as type_0)";
        unsigned char *buf;
 
        spriv = kzalloc(sizeof(*spriv), GFP_KERNEL);
@@ -198,53 +184,15 @@ static int pl2303_startup(struct usb_serial *serial)
                return -ENOMEM;
        }
 
-       if (serial->dev->descriptor.bDeviceClass == 0x02) {
+       if (serial->dev->descriptor.bDeviceClass == 0x02)
                type = type_0;
-               type_str = "type_0";
-       } else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) {
-               /*
-                * NOTE: The bcdDevice version is the only difference between
-                * the device descriptors of the X/HX, HXD, EA, RA, SA, TA, TB
-                */
-               if (le16_to_cpu(serial->dev->descriptor.bcdDevice) == 0x300) {
-                       /* Check if the device is a clone */
-                       pl2303_vendor_read(0x9494, 0, serial, buf);
-                       /*
-                        * NOTE: Not sure if this read is really needed.
-                        * The HX returns 0x00, the clone 0x02, but the Windows
-                        * driver seems to ignore the value and continues.
-                        */
-                       pl2303_vendor_write(0x0606, 0xaa, serial);
-                       pl2303_vendor_read(0x8686, 0, serial, buf);
-                       if (buf[0] != 0xaa) {
-                               type = HX_CLONE;
-                               type_str = "X/HX clone (limited functionality)";
-                       } else {
-                               type = HX_TA;
-                               type_str = "X/HX/TA";
-                       }
-                       pl2303_vendor_write(0x0606, 0x00, serial);
-               } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice)
-                                                                    == 0x400) {
-                       type = HXD_EA_RA_SA;
-                       type_str = "HXD/EA/RA/SA";
-               } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice)
-                                                                    == 0x500) {
-                       type = TB;
-                       type_str = "TB";
-               } else {
-                       dev_info(&serial->interface->dev,
-                                          "unknown/unsupported device type\n");
-                       kfree(spriv);
-                       kfree(buf);
-                       return -ENODEV;
-               }
-       } else if (serial->dev->descriptor.bDeviceClass == 0x00
-                  || serial->dev->descriptor.bDeviceClass == 0xFF) {
+       else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40)
+               type = HX;
+       else if (serial->dev->descriptor.bDeviceClass == 0x00)
                type = type_1;
-               type_str = "type_1";
-       }
-       dev_dbg(&serial->interface->dev, "device type: %s\n", type_str);
+       else if (serial->dev->descriptor.bDeviceClass == 0xFF)
+               type = type_1;
+       dev_dbg(&serial->interface->dev, "device type: %d\n", type);
 
        spriv->type = type;
        usb_set_serial_data(serial, spriv);
@@ -259,10 +207,10 @@ static int pl2303_startup(struct usb_serial *serial)
        pl2303_vendor_read(0x8383, 0, serial, buf);
        pl2303_vendor_write(0, 1, serial);
        pl2303_vendor_write(1, 0, serial);
-       if (type == type_0 || type == type_1)
-               pl2303_vendor_write(2, 0x24, serial);
-       else
+       if (type == HX)
                pl2303_vendor_write(2, 0x44, serial);
+       else
+               pl2303_vendor_write(2, 0x24, serial);
 
        kfree(buf);
        return 0;
@@ -316,174 +264,65 @@ static int pl2303_set_control_lines(struct usb_serial_port *port, u8 value)
        return retval;
 }
 
-static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type,
-                                                                     u8 buf[4])
+static void pl2303_encode_baudrate(struct tty_struct *tty,
+                                       struct usb_serial_port *port,
+                                       u8 buf[4])
 {
-       /*
-        * NOTE: Only the values defined in baud_sup are supported !
-        * => if unsupported values are set, the PL2303 uses 9600 baud instead
-        * => HX clones just don't work at unsupported baud rates < 115200 baud,
-        *    for baud rates > 115200 they run at 115200 baud
-        */
        const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600,
-                                4800, 7200, 9600, 14400, 19200, 28800, 38400,
-                                57600, 115200, 230400, 460800, 614400, 921600,
-                                1228800, 2457600, 3000000, 6000000, 12000000 };
+                                4800, 7200, 9600, 14400, 19200, 28800, 38400,
+                                57600, 115200, 230400, 460800, 500000, 614400,
+                                921600, 1228800, 2457600, 3000000, 6000000 };
+
+       struct usb_serial *serial = port->serial;
+       struct pl2303_serial_private *spriv = usb_get_serial_data(serial);
+       int baud;
+       int i;
+
        /*
-        * NOTE: With the exception of type_0/1 devices, the following
-        * additional baud rates are supported (tested with HX rev. 3A only):
-        * 110*, 56000*, 128000, 134400, 161280, 201600, 256000*, 268800,
-        * 403200, 806400.      (*: not HX and HX clones)
-        *
-        * Maximum values: HXD, TB: 12000000; HX, TA: 6000000;
-        *                 type_0+1: 1228800; RA: 921600; HX clones, SA: 115200
-        *
-        * As long as we are not using this encoding method for anything else
-        * than the type_0+1, HX and HX clone chips, there is no point in
-        * complicating the code to support them.
+        * NOTE: Only the values defined in baud_sup are supported!
+        *       => if unsupported values are set, the PL2303 seems to use
+        *          9600 baud (at least my PL2303X always does)
         */
-       int i;
+       baud = tty_get_baud_rate(tty);
+       dev_dbg(&port->dev, "baud requested = %d\n", baud);
+       if (!baud)
+               return;
 
        /* Set baudrate to nearest supported value */
        for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) {
                if (baud_sup[i] > baud)
                        break;
        }
+
        if (i == ARRAY_SIZE(baud_sup))
                baud = baud_sup[i - 1];
        else if (i > 0 && (baud_sup[i] - baud) > (baud - baud_sup[i - 1]))
                baud = baud_sup[i - 1];
        else
                baud = baud_sup[i];
-       /* Respect the chip type specific baud rate limits */
-       /*
-        * FIXME: as long as we don't know how to distinguish between the
-        * HXD, EA, RA, and SA chip variants, allow the max. value of 12M.
-        */
-       if (type == HX_TA)
-               baud = min_t(int, baud, 6000000);
-       else if (type == type_0 || type == type_1)
-               baud = min_t(int, baud, 1228800);
-       else if (type == HX_CLONE)
-               baud = min_t(int, baud, 115200);
-       /* Direct (standard) baud rate encoding method */
-       put_unaligned_le32(baud, buf);
-
-       return baud;
-}
 
-static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type,
-                                                                     u8 buf[4])
-{
-       /*
-        * Divisor based baud rate encoding method
-        *
-        * NOTE: HX clones do NOT support this method.
-        * It's not clear if the type_0/1 chips support it.
-        *
-        * divisor = 12MHz * 32 / baudrate = 2^A * B
-        *
-        * with
-        *
-        * A = buf[1] & 0x0e
-        * B = buf[0]  +  (buf[1] & 0x01) << 8
-        *
-        * Special cases:
-        * => 8 < B < 16: device seems to work not properly
-        * => B <= 8: device uses the max. value B = 512 instead
-        */
-       unsigned int A, B;
+       /* type_0, type_1 only support up to 1228800 baud */
+       if (spriv->type != HX)
+               baud = min_t(int, baud, 1228800);
 
-       /*
-        * NOTE: The Windows driver allows maximum baud rates of 110% of the
-        * specified maximium value.
-        * Quick tests with early (2004) HX (rev. A) chips suggest, that even
-        * higher baud rates (up to the maximum of 24M baud !) are working fine,
-        * but that should really be tested carefully in "real life" scenarios
-        * before removing the upper limit completely.
-        * Baud rates smaller than the specified 75 baud are definitely working
-        * fine.
-        */
-       if (type == type_0 || type == type_1)
-               baud = min_t(int, baud, 1228800 * 1.1);
-       else if (type == HX_TA)
-               baud = min_t(int, baud, 6000000 * 1.1);
-       else if (type == HXD_EA_RA_SA)
-               /* HXD, EA: 12Mbps; RA: 1Mbps; SA: 115200 bps */
-               /*
-                * FIXME: as long as we don't know how to distinguish between
-                * these chip variants, allow the max. of these values
-                */
-               baud = min_t(int, baud, 12000000 * 1.1);
-       else if (type == TB)
-               baud = min_t(int, baud, 12000000 * 1.1);
-       /* Determine factors A and B */
-       A = 0;
-       B = 12000000 * 32 / baud;  /* 12MHz */
-       B <<= 1; /* Add one bit for rounding */
-       while (B > (512 << 1) && A <= 14) {
-               A += 2;
-               B >>= 2;
-       }
-       if (A > 14) { /* max. divisor = min. baudrate reached */
-               A = 14;
-               B = 512;
-               /* => ~45.78 baud */
+       if (baud <= 115200) {
+               put_unaligned_le32(baud, buf);
        } else {
-               B = (B + 1) >> 1; /* Round the last bit */
-       }
-       /* Handle special cases */
-       if (B == 512)
-               B = 0; /* also: 1 to 8 */
-       else if (B < 16)
                /*
-                * NOTE: With the current algorithm this happens
-                * only for A=0 and means that the min. divisor
-                * (respectively: the max. baudrate) is reached.
+                * Apparently the formula for higher speeds is:
+                * baudrate = 12M * 32 / (2^buf[1]) / buf[0]
                 */
-               B = 16;         /* => 24 MBaud */
-       /* Encode the baud rate */
-       buf[3] = 0x80;     /* Select divisor encoding method */
-       buf[2] = 0;
-       buf[1] = (A & 0x0e);            /* A */
-       buf[1] |= ((B & 0x100) >> 8);   /* MSB of B */
-       buf[0] = B & 0xff;              /* 8 LSBs of B */
-       /* Calculate the actual/resulting baud rate */
-       if (B <= 8)
-               B = 512;
-       baud = 12000000 * 32 / ((1 << A) * B);
-
-       return baud;
-}
-
-static void pl2303_encode_baudrate(struct tty_struct *tty,
-                                       struct usb_serial_port *port,
-                                       enum pl2303_type type,
-                                       u8 buf[4])
-{
-       int baud;
+               unsigned tmp = 12000000 * 32 / baud;
+               buf[3] = 0x80;
+               buf[2] = 0;
+               buf[1] = (tmp >= 256);
+               while (tmp >= 256) {
+                       tmp >>= 2;
+                       buf[1] <<= 1;
+               }
+               buf[0] = tmp;
+       }
 
-       baud = tty_get_baud_rate(tty);
-       dev_dbg(&port->dev, "baud requested = %d\n", baud);
-       if (!baud)
-               return;
-       /*
-        * There are two methods for setting/encoding the baud rate
-        * 1) Direct method: encodes the baud rate value directly
-        *    => supported by all chip types
-        * 2) Divisor based method: encodes a divisor to a base value (12MHz*32)
-        *    => not supported by HX clones (and likely type_0/1 chips)
-        *
-        * NOTE: Although the divisor based baud rate encoding method is much
-        * more flexible, some of the standard baud rate values can not be
-        * realized exactly. But the difference is very small (max. 0.2%) and
-        * the device likely uses the same baud rate generator for both methods
-        * so that there is likley no difference.
-        */
-       if (type == type_0 || type == type_1 || type == HX_CLONE)
-               baud = pl2303_baudrate_encode_direct(baud, type, buf);
-       else
-               baud = pl2303_baudrate_encode_divisor(baud, type, buf);
        /* Save resulting baud rate */
        tty_encode_baud_rate(tty, baud, baud);
        dev_dbg(&port->dev, "baud set = %d\n", baud);
@@ -540,8 +379,8 @@ static void pl2303_set_termios(struct tty_struct *tty,
                dev_dbg(&port->dev, "data bits = %d\n", buf[6]);
        }
 
-       /* For reference:   buf[0]:buf[3] baud rate value */
-       pl2303_encode_baudrate(tty, port, spriv->type, buf);
+       /* For reference buf[0]:buf[3] baud rate value */
+       pl2303_encode_baudrate(tty, port, &buf[0]);
 
        /* For reference buf[4]=0 is 1 stop bits */
        /* For reference buf[4]=1 is 1.5 stop bits */
@@ -618,10 +457,10 @@ static void pl2303_set_termios(struct tty_struct *tty,
        dev_dbg(&port->dev, "0xa1:0x21:0:0  %d - %7ph\n", i, buf);
 
        if (C_CRTSCTS(tty)) {
-               if (spriv->type == type_0 || spriv->type == type_1)
-                       pl2303_vendor_write(0x0, 0x41, serial);
-               else
+               if (spriv->type == HX)
                        pl2303_vendor_write(0x0, 0x61, serial);
+               else
+                       pl2303_vendor_write(0x0, 0x41, serial);
        } else {
                pl2303_vendor_write(0x0, 0x0, serial);
        }
@@ -658,7 +497,7 @@ static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port)
        struct pl2303_serial_private *spriv = usb_get_serial_data(serial);
        int result;
 
-       if (spriv->type == type_0 || spriv->type == type_1) {
+       if (spriv->type != HX) {
                usb_clear_halt(serial->dev, port->write_urb->pipe);
                usb_clear_halt(serial->dev, port->read_urb->pipe);
        } else {
@@ -833,7 +672,6 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state)
        result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
                                 BREAK_REQUEST, BREAK_REQUEST_TYPE, state,
                                 0, NULL, 0, 100);
-       /* NOTE: HX clones don't support sending breaks, -EPIPE is returned */
        if (result)
                dev_err(&port->dev, "error sending break = %d\n", result);
 }
index 20532cb0b06e7ebfc6887e87fd3781e65eb41999..ae6ebb88ceff15ccef25f26804532d8b89cbe45a 100644 (file)
@@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop);
  * If ref is non-zero, then decrement the refcount too.
  * Returns dentry requiring refcount drop, or NULL if we're done.
  */
-static inline struct dentry *
+static struct dentry *
 dentry_kill(struct dentry *dentry, int unlock_on_failure)
        __releases(dentry->d_lock)
 {
@@ -630,7 +630,8 @@ repeat:
                        goto kill_it;
        }
 
-       dentry->d_flags |= DCACHE_REFERENCED;
+       if (!(dentry->d_flags & DCACHE_REFERENCED))
+               dentry->d_flags |= DCACHE_REFERENCED;
        dentry_lru_add(dentry);
 
        dentry->d_lockref.count--;
index 19c19a5eee293396d0ddec9454e1ecb55adfed5b..f6c82de125413e8b6076ab91d68f64771641bfd1 100644 (file)
@@ -34,9 +34,9 @@ struct ipc_namespace {
        int             sem_ctls[4];
        int             used_sems;
 
-       int             msg_ctlmax;
-       int             msg_ctlmnb;
-       int             msg_ctlmni;
+       unsigned int    msg_ctlmax;
+       unsigned int    msg_ctlmnb;
+       unsigned int    msg_ctlmni;
        atomic_t        msg_bytes;
        atomic_t        msg_hdrs;
        int             auto_msgmni;
index 4106721c4e5e39d3d0a08d44f5d50909da6c9b86..45a0a9e81478c41187088d6ce3fb9762f070e335 100644 (file)
  * be used anywhere you would want to use a list_empty_rcu().
  */
 
+/*
+ * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
+ * @list: list to be initialized
+ *
+ * You should instead use INIT_LIST_HEAD() for normal initialization and
+ * cleanup tasks, when readers have no access to the list being initialized.
+ * However, if the list being initialized is visible to readers, you
+ * need to keep the compiler from being too mischievous.
+ */
+static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
+{
+       ACCESS_ONCE(list->next) = list;
+       ACCESS_ONCE(list->prev) = list;
+}
+
 /*
  * return the ->next pointer of a list_head in an rcu safe
  * way, we must not access it directly
@@ -191,9 +206,13 @@ static inline void list_splice_init_rcu(struct list_head *list,
        if (list_empty(list))
                return;
 
-       /* "first" and "last" tracking list, so initialize it. */
+       /*
+        * "first" and "last" tracking list, so initialize it.  RCU readers
+        * have access to this list, so we must use INIT_LIST_HEAD_RCU()
+        * instead of INIT_LIST_HEAD().
+        */
 
-       INIT_LIST_HEAD(list);
+       INIT_LIST_HEAD_RCU(list);
 
        /*
         * At this point, the list body still points to the source list.
index f1f1bc39346b85c303c227e3137b78e0e87bdcc8..39cbb889e20da020c8936aae8124601b718e05a5 100644 (file)
@@ -261,6 +261,10 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
                rcu_irq_exit(); \
        } while (0)
 
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
+extern bool __rcu_is_watching(void);
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
+
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -297,10 +301,6 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
-extern int rcu_is_cpu_idle(void);
-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
-
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
@@ -351,7 +351,7 @@ static inline int rcu_read_lock_held(void)
 {
        if (!debug_lockdep_rcu_enabled())
                return 1;
-       if (rcu_is_cpu_idle())
+       if (!rcu_is_watching())
                return 0;
        if (!rcu_lockdep_current_cpu_online())
                return 0;
@@ -402,7 +402,7 @@ static inline int rcu_read_lock_sched_held(void)
 
        if (!debug_lockdep_rcu_enabled())
                return 1;
-       if (rcu_is_cpu_idle())
+       if (!rcu_is_watching())
                return 0;
        if (!rcu_lockdep_current_cpu_online())
                return 0;
@@ -771,7 +771,7 @@ static inline void rcu_read_lock(void)
        __rcu_read_lock();
        __acquire(RCU);
        rcu_lock_acquire(&rcu_lock_map);
-       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+       rcu_lockdep_assert(rcu_is_watching(),
                           "rcu_read_lock() used illegally while idle");
 }
 
@@ -792,7 +792,7 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
-       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+       rcu_lockdep_assert(rcu_is_watching(),
                           "rcu_read_unlock() used illegally while idle");
        rcu_lock_release(&rcu_lock_map);
        __release(RCU);
@@ -821,7 +821,7 @@ static inline void rcu_read_lock_bh(void)
        local_bh_disable();
        __acquire(RCU_BH);
        rcu_lock_acquire(&rcu_bh_lock_map);
-       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+       rcu_lockdep_assert(rcu_is_watching(),
                           "rcu_read_lock_bh() used illegally while idle");
 }
 
@@ -832,7 +832,7 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
-       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+       rcu_lockdep_assert(rcu_is_watching(),
                           "rcu_read_unlock_bh() used illegally while idle");
        rcu_lock_release(&rcu_bh_lock_map);
        __release(RCU_BH);
@@ -857,7 +857,7 @@ static inline void rcu_read_lock_sched(void)
        preempt_disable();
        __acquire(RCU_SCHED);
        rcu_lock_acquire(&rcu_sched_lock_map);
-       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+       rcu_lockdep_assert(rcu_is_watching(),
                           "rcu_read_lock_sched() used illegally while idle");
 }
 
@@ -875,7 +875,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
-       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+       rcu_lockdep_assert(rcu_is_watching(),
                           "rcu_read_unlock_sched() used illegally while idle");
        rcu_lock_release(&rcu_sched_lock_map);
        __release(RCU_SCHED);
index e31005ee339ebde022bd7dabace245ee79474d9b..09ebcbe9fd780932e2dee223daaa8426395e77d8 100644 (file)
@@ -132,4 +132,21 @@ static inline void rcu_scheduler_starting(void)
 }
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
+
+static inline bool rcu_is_watching(void)
+{
+       return __rcu_is_watching();
+}
+
+#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
+
+static inline bool rcu_is_watching(void)
+{
+       return true;
+}
+
+
+#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
+
 #endif /* __LINUX_RCUTINY_H */
index 226169d1bd2bc07fa465d9c4b4b21cf8131de162..4b9c8154874269d66cc3fa1312bc6e53d2584669 100644 (file)
@@ -90,4 +90,6 @@ extern void exit_rcu(void);
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
+extern bool rcu_is_watching(void);
+
 #endif /* __LINUX_RCUTREE_H */
index 10d16c4fbe89e45e157e173a98607d8e315c0f42..41467f8ff8ec8c7c5766021abe00e358f63e93cc 100644 (file)
@@ -2,8 +2,8 @@
 #define _SCHED_SYSCTL_H
 
 #ifdef CONFIG_DETECT_HUNG_TASK
+extern int          sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
index ee2376cfaab3a668d01374844c6757d07e7e1be8..aca382266411620f6bd7b052311cee6d6b04e8e7 100644 (file)
@@ -39,15 +39,26 @@ TRACE_EVENT(rcu_utilization,
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 
 /*
- * Tracepoint for grace-period events: starting and ending a grace
- * period ("start" and "end", respectively), a CPU noting the start
- * of a new grace period or the end of an old grace period ("cpustart"
- * and "cpuend", respectively), a CPU passing through a quiescent
- * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
- * and "cpuofl", respectively), a CPU being kicked for being too
- * long in dyntick-idle mode ("kick"), a CPU accelerating its new
- * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
- * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
+ * Tracepoint for grace-period events.  Takes a string identifying the
+ * RCU flavor, the grace-period number, and a string identifying the
+ * grace-period-related event as follows:
+ *
+ *     "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL.
+ *     "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL.
+ *     "newreq": Request a new grace period.
+ *     "start": Start a grace period.
+ *     "cpustart": CPU first notices a grace-period start.
+ *     "cpuqs": CPU passes through a quiescent state.
+ *     "cpuonl": CPU comes online.
+ *     "cpuofl": CPU goes offline.
+ *     "reqwait": GP kthread sleeps waiting for grace-period request.
+ *     "reqwaitsig": GP kthread awakened by signal from reqwait state.
+ *     "fqswait": GP kthread waiting until time to force quiescent states.
+ *     "fqsstart": GP kthread starts forcing quiescent states.
+ *     "fqsend": GP kthread done forcing quiescent states.
+ *     "fqswaitsig": GP kthread awakened by signal from fqswait state.
+ *     "end": End a grace period.
+ *     "cpuend": CPU first notices a grace-period end.
  */
 TRACE_EVENT(rcu_grace_period,
 
@@ -160,6 +171,46 @@ TRACE_EVENT(rcu_grace_period_init,
                  __entry->grplo, __entry->grphi, __entry->qsmask)
 );
 
+/*
+ * Tracepoint for RCU no-CBs CPU callback handoffs.  This event is intended
+ * to assist debugging of these handoffs.
+ *
+ * The first argument is the name of the RCU flavor, and the second is
+ * the number of the offloaded CPU are extracted.  The third and final
+ * argument is a string as follows:
+ *
+ *     "WakeEmpty": Wake rcuo kthread, first CB to empty list.
+ *     "WakeOvf": Wake rcuo kthread, CB list is huge.
+ *     "WakeNot": Don't wake rcuo kthread.
+ *     "WakeNotPoll": Don't wake rcuo kthread because it is polling.
+ *     "Poll": Start of new polling cycle for rcu_nocb_poll.
+ *     "Sleep": Sleep waiting for CBs for !rcu_nocb_poll.
+ *     "WokeEmpty": rcuo kthread woke to find empty list.
+ *     "WokeNonEmpty": rcuo kthread woke to find non-empty list.
+ *     "WaitQueue": Enqueue partially done, timed wait for it to complete.
+ *     "WokeQueue": Partial enqueue now complete.
+ */
+TRACE_EVENT(rcu_nocb_wake,
+
+       TP_PROTO(const char *rcuname, int cpu, const char *reason),
+
+       TP_ARGS(rcuname, cpu, reason),
+
+       TP_STRUCT__entry(
+               __field(const char *, rcuname)
+               __field(int, cpu)
+               __field(const char *, reason)
+       ),
+
+       TP_fast_assign(
+               __entry->rcuname = rcuname;
+               __entry->cpu = cpu;
+               __entry->reason = reason;
+       ),
+
+       TP_printk("%s %d %s", __entry->rcuname, __entry->cpu, __entry->reason)
+);
+
 /*
  * Tracepoint for tasks blocking within preemptible-RCU read-side
  * critical sections.  Track the type of RCU (which one day might
@@ -540,17 +591,17 @@ TRACE_EVENT(rcu_invoke_kfree_callback,
 TRACE_EVENT(rcu_batch_end,
 
        TP_PROTO(const char *rcuname, int callbacks_invoked,
-                bool cb, bool nr, bool iit, bool risk),
+                char cb, char nr, char iit, char risk),
 
        TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk),
 
        TP_STRUCT__entry(
                __field(const char *, rcuname)
                __field(int, callbacks_invoked)
-               __field(bool, cb)
-               __field(bool, nr)
-               __field(bool, iit)
-               __field(bool, risk)
+               __field(char, cb)
+               __field(char, nr)
+               __field(char, iit)
+               __field(char, risk)
        ),
 
        TP_fast_assign(
@@ -656,6 +707,7 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
                                      level, grplo, grphi, event) \
                                      do { } while (0)
+#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
index 613381bcde40a8c68a0917cd19d36d6a603c9e2a..04c308413a5dd3b2295b33c8c78bf56a11fa6b20 100644 (file)
@@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio,
                        __entry->oldprio, __entry->newprio)
 );
 
+#ifdef CONFIG_DETECT_HUNG_TASK
+TRACE_EVENT(sched_process_hang,
+       TP_PROTO(struct task_struct *tsk),
+       TP_ARGS(tsk),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid = tsk->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+);
+#endif /* CONFIG_DETECT_HUNG_TASK */
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
index 009a655a5d354c51e20fb34cb12ca653e94f9b71..2fc1602e23bb041b9087597370096fcc90e84527 100644 (file)
@@ -456,13 +456,15 @@ struct perf_event_mmap_page {
        /*
         * Control data for the mmap() data buffer.
         *
-        * User-space reading the @data_head value should issue an rmb(), on
-        * SMP capable platforms, after reading this value -- see
-        * perf_event_wakeup().
+        * User-space reading the @data_head value should issue an smp_rmb(),
+        * after reading this value.
         *
         * When the mapping is PROT_WRITE the @data_tail value should be
-        * written by userspace to reflect the last read data. In this case
-        * the kernel will not over-write unread data.
+        * written by userspace to reflect the last read data, after issueing
+        * an smp_mb() to separate the data read from the ->data_tail store.
+        * In this case the kernel will not over-write unread data.
+        *
+        * See perf_output_put_handle() for the data ordering.
         */
        __u64   data_head;              /* head in the data section */
        __u64   data_tail;              /* user-space written tail */
index 130dfece27ac7cc74c40a60c6b8eeceb97963deb..b0e99deb6d05330482c8ec98f1a511f07f9fa5f1 100644 (file)
@@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
        return err;
 }
 
-static int proc_ipc_callback_dointvec(ctl_table *table, int write,
+static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write,
        void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table ipc_table;
@@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
        memcpy(&ipc_table, table, sizeof(ipc_table));
        ipc_table.data = get_ipc(table);
 
-       rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
+       rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 
        if (write && !rc && lenp_bef == *lenp)
                /*
@@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 #define proc_ipc_dointvec         NULL
 #define proc_ipc_dointvec_minmax   NULL
 #define proc_ipc_dointvec_minmax_orphans   NULL
-#define proc_ipc_callback_dointvec NULL
+#define proc_ipc_callback_dointvec_minmax  NULL
 #define proc_ipcauto_dointvec_minmax NULL
 #endif
 
 static int zero;
 static int one = 1;
-#ifdef CONFIG_CHECKPOINT_RESTORE
 static int int_max = INT_MAX;
-#endif
 
 static struct ctl_table ipc_kern_table[] = {
        {
@@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = {
                .data           = &init_ipc_ns.msg_ctlmax,
                .maxlen         = sizeof (init_ipc_ns.msg_ctlmax),
                .mode           = 0644,
-               .proc_handler   = proc_ipc_dointvec,
+               .proc_handler   = proc_ipc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &int_max,
        },
        {
                .procname       = "msgmni",
                .data           = &init_ipc_ns.msg_ctlmni,
                .maxlen         = sizeof (init_ipc_ns.msg_ctlmni),
                .mode           = 0644,
-               .proc_handler   = proc_ipc_callback_dointvec,
+               .proc_handler   = proc_ipc_callback_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &int_max,
        },
        {
                .procname       =  "msgmnb",
                .data           = &init_ipc_ns.msg_ctlmnb,
                .maxlen         = sizeof (init_ipc_ns.msg_ctlmnb),
                .mode           = 0644,
-               .proc_handler   = proc_ipc_dointvec,
+               .proc_handler   = proc_ipc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &int_max,
        },
        {
                .procname       = "sem",
index b3d51e229356db26978e2244053ba93ebf686c3c..a4d1aa8da9bc7180ad279a709e4d22cc5627543d 100644 (file)
@@ -6,9 +6,9 @@ obj-y     = fork.o exec_domain.o panic.o \
            cpu.o exit.o itimer.o time.o softirq.o resource.o \
            sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
            signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
-           rcupdate.o extable.o params.o posix-timers.o \
+           extable.o params.o posix-timers.o \
            kthread.o sys_ni.o posix-cpu-timers.o mutex.o \
-           hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
+           hrtimer.o rwsem.o nsproxy.o semaphore.o \
            notifier.o ksysfs.o cred.o reboot.o \
            async.o range.o groups.o lglock.o smpboot.o
 
@@ -27,6 +27,7 @@ obj-y += power/
 obj-y += printk/
 obj-y += cpu/
 obj-y += irq/
+obj-y += rcu/
 
 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -81,12 +82,6 @@ obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
-obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
-obj-$(CONFIG_TREE_RCU) += rcutree.o
-obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
-obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
-obj-$(CONFIG_TINY_RCU) += rcutiny.o
-obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
index cd55144270b5401030f4b5ce5576f97b6b976b63..9c2ddfbf452554902d9ad1c23deba5794d0ed664 100644 (file)
@@ -87,10 +87,31 @@ again:
                goto out;
 
        /*
-        * Publish the known good head. Rely on the full barrier implied
-        * by atomic_dec_and_test() order the rb->head read and this
-        * write.
+        * Since the mmap() consumer (userspace) can run on a different CPU:
+        *
+        *   kernel                             user
+        *
+        *   READ ->data_tail                   READ ->data_head
+        *   smp_mb()   (A)                     smp_rmb()       (C)
+        *   WRITE $data                        READ $data
+        *   smp_wmb()  (B)                     smp_mb()        (D)
+        *   STORE ->data_head                  WRITE ->data_tail
+        *
+        * Where A pairs with D, and B pairs with C.
+        *
+        * I don't think A needs to be a full barrier because we won't in fact
+        * write data until we see the store from userspace. So we simply don't
+        * issue the data WRITE until we observe it. Be conservative for now.
+        *
+        * OTOH, D needs to be a full barrier since it separates the data READ
+        * from the tail WRITE.
+        *
+        * For B a WMB is sufficient since it separates two WRITEs, and for C
+        * an RMB is sufficient since it separates two READs.
+        *
+        * See perf_output_begin().
         */
+       smp_wmb();
        rb->user_page->data_head = head;
 
        /*
@@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle,
                 * Userspace could choose to issue a mb() before updating the
                 * tail pointer. So that all reads will be completed before the
                 * write is issued.
+                *
+                * See perf_output_put_handle().
                 */
                tail = ACCESS_ONCE(rb->user_page->data_tail);
-               smp_rmb();
+               smp_mb();
                offset = head = local_read(&rb->head);
                head += size;
                if (unlikely(!perf_output_space(rb, tail, offset, head)))
index 3e97fb126e6b9255a852887923f39230cd52c513..8807061ca004cc0a532460244e01a56391eb0469 100644 (file)
 #include <linux/export.h>
 #include <linux/sysctl.h>
 #include <linux/utsname.h>
+#include <trace/events/sched.h>
 
 /*
  * The number of tasks checked:
  */
-unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
 
 /*
  * Limit number of tasks checked in a batch.
@@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
                t->last_switch_count = switch_count;
                return;
        }
+
+       trace_sched_process_hang(t);
+
        if (!sysctl_hung_task_warnings)
                return;
        sysctl_hung_task_warnings--;
index e16c45b9ee77054f80becd37a51da00776f796c1..4e8e14c34e428d6a75580ec862c4975b0425dfc0 100644 (file)
@@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
        printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
               !rcu_lockdep_current_cpu_online()
                        ? "RCU used illegally from offline CPU!\n"
-                       : rcu_is_cpu_idle()
+                       : !rcu_is_watching()
                                ? "RCU used illegally from idle CPU!\n"
                                : "",
               rcu_scheduler_active, debug_locks);
@@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
         * So complain bitterly if someone does call rcu_read_lock(),
         * rcu_read_lock_bh() and so on from extended quiescent states.
         */
-       if (rcu_is_cpu_idle())
+       if (!rcu_is_watching())
                printk("RCU used illegally from extended quiescent state!\n");
 
        lockdep_print_held_locks(curr);
index b2c71c5873e441ae1d9a89d8caa7517eb5bcf976..09220656d888b64f9415368218a3323147c4967f 100644 (file)
@@ -421,6 +421,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
        seq_time(m, lt->min);
        seq_time(m, lt->max);
        seq_time(m, lt->total);
+       seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0);
 }
 
 static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
@@ -518,20 +519,20 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
        }
        if (i) {
                seq_puts(m, "\n");
-               seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
+               seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1));
                seq_puts(m, "\n");
        }
 }
 
 static void seq_header(struct seq_file *m)
 {
-       seq_printf(m, "lock_stat version 0.3\n");
+       seq_puts(m, "lock_stat version 0.4\n");
 
        if (unlikely(!debug_locks))
                seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
 
-       seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+       seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
+       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
                        "%14s %14s\n",
                        "class name",
                        "con-bounces",
@@ -539,12 +540,14 @@ static void seq_header(struct seq_file *m)
                        "waittime-min",
                        "waittime-max",
                        "waittime-total",
+                       "waittime-avg",
                        "acq-bounces",
                        "acquisitions",
                        "holdtime-min",
                        "holdtime-max",
-                       "holdtime-total");
-       seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+                       "holdtime-total",
+                       "holdtime-avg");
+       seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
        seq_printf(m, "\n");
 }
 
diff --git a/kernel/rcu.h b/kernel/rcu.h
deleted file mode 100644 (file)
index 7713196..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Read-Copy Update definitions shared among RCU implementations.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2011
- *
- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- */
-
-#ifndef __LINUX_RCU_H
-#define __LINUX_RCU_H
-
-#ifdef CONFIG_RCU_TRACE
-#define RCU_TRACE(stmt) stmt
-#else /* #ifdef CONFIG_RCU_TRACE */
-#define RCU_TRACE(stmt)
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
-/*
- * Process-level increment to ->dynticks_nesting field.  This allows for
- * architectures that use half-interrupts and half-exceptions from
- * process context.
- *
- * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
- * that counts the number of process-based reasons why RCU cannot
- * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
- * is the value used to increment or decrement this field.
- *
- * The rest of the bits could in principle be used to count interrupts,
- * but this would mean that a negative-one value in the interrupt
- * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
- * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
- * that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
- * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
- * initial exit from idle.
- */
-#define DYNTICK_TASK_NEST_WIDTH 7
-#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
-#define DYNTICK_TASK_NEST_MASK  (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
-#define DYNTICK_TASK_FLAG         ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
-#define DYNTICK_TASK_MASK         ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
-#define DYNTICK_TASK_EXIT_IDLE    (DYNTICK_TASK_NEST_VALUE + \
-                                   DYNTICK_TASK_FLAG)
-
-/*
- * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
- * by call_rcu() and rcu callback execution, and are therefore not part of the
- * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
- */
-
-#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-# define STATE_RCU_HEAD_READY  0
-# define STATE_RCU_HEAD_QUEUED 1
-
-extern struct debug_obj_descr rcuhead_debug_descr;
-
-static inline int debug_rcu_head_queue(struct rcu_head *head)
-{
-       int r1;
-
-       r1 = debug_object_activate(head, &rcuhead_debug_descr);
-       debug_object_active_state(head, &rcuhead_debug_descr,
-                                 STATE_RCU_HEAD_READY,
-                                 STATE_RCU_HEAD_QUEUED);
-       return r1;
-}
-
-static inline void debug_rcu_head_unqueue(struct rcu_head *head)
-{
-       debug_object_active_state(head, &rcuhead_debug_descr,
-                                 STATE_RCU_HEAD_QUEUED,
-                                 STATE_RCU_HEAD_READY);
-       debug_object_deactivate(head, &rcuhead_debug_descr);
-}
-#else  /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-static inline int debug_rcu_head_queue(struct rcu_head *head)
-{
-       return 0;
-}
-
-static inline void debug_rcu_head_unqueue(struct rcu_head *head)
-{
-}
-#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-
-extern void kfree(const void *);
-
-static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
-{
-       unsigned long offset = (unsigned long)head->func;
-
-       if (__is_kfree_rcu_offset(offset)) {
-               RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
-               kfree((void *)head - offset);
-               return 1;
-       } else {
-               RCU_TRACE(trace_rcu_invoke_callback(rn, head));
-               head->func(head);
-               return 0;
-       }
-}
-
-extern int rcu_expedited;
-
-#ifdef CONFIG_RCU_STALL_COMMON
-
-extern int rcu_cpu_stall_suppress;
-int rcu_jiffies_till_stall_check(void);
-
-#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
-
-#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
new file mode 100644 (file)
index 0000000..01e9ec3
--- /dev/null
@@ -0,0 +1,6 @@
+obj-y += update.o srcu.o
+obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o
+obj-$(CONFIG_TREE_RCU) += tree.o
+obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o
+obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
+obj-$(CONFIG_TINY_RCU) += tiny.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
new file mode 100644 (file)
index 0000000..7859a0a
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Read-Copy Update definitions shared among RCU implementations.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2011
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_RCU_H
+#define __LINUX_RCU_H
+
+#ifdef CONFIG_RCU_TRACE
+#define RCU_TRACE(stmt) stmt
+#else /* #ifdef CONFIG_RCU_TRACE */
+#define RCU_TRACE(stmt)
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+/*
+ * Process-level increment to ->dynticks_nesting field.  This allows for
+ * architectures that use half-interrupts and half-exceptions from
+ * process context.
+ *
+ * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
+ * that counts the number of process-based reasons why RCU cannot
+ * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
+ * is the value used to increment or decrement this field.
+ *
+ * The rest of the bits could in principle be used to count interrupts,
+ * but this would mean that a negative-one value in the interrupt
+ * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
+ * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
+ * that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
+ * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
+ * initial exit from idle.
+ */
+#define DYNTICK_TASK_NEST_WIDTH 7
+#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
+#define DYNTICK_TASK_NEST_MASK  (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
+#define DYNTICK_TASK_FLAG         ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
+#define DYNTICK_TASK_MASK         ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
+#define DYNTICK_TASK_EXIT_IDLE    (DYNTICK_TASK_NEST_VALUE + \
+                                   DYNTICK_TASK_FLAG)
+
+/*
+ * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
+ * by call_rcu() and rcu callback execution, and are therefore not part of the
+ * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
+ */
+
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+# define STATE_RCU_HEAD_READY  0
+# define STATE_RCU_HEAD_QUEUED 1
+
+extern struct debug_obj_descr rcuhead_debug_descr;
+
+static inline int debug_rcu_head_queue(struct rcu_head *head)
+{
+       int r1;
+
+       r1 = debug_object_activate(head, &rcuhead_debug_descr);
+       debug_object_active_state(head, &rcuhead_debug_descr,
+                                 STATE_RCU_HEAD_READY,
+                                 STATE_RCU_HEAD_QUEUED);
+       return r1;
+}
+
+static inline void debug_rcu_head_unqueue(struct rcu_head *head)
+{
+       debug_object_active_state(head, &rcuhead_debug_descr,
+                                 STATE_RCU_HEAD_QUEUED,
+                                 STATE_RCU_HEAD_READY);
+       debug_object_deactivate(head, &rcuhead_debug_descr);
+}
+#else  /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+static inline int debug_rcu_head_queue(struct rcu_head *head)
+{
+       return 0;
+}
+
+static inline void debug_rcu_head_unqueue(struct rcu_head *head)
+{
+}
+#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+
+extern void kfree(const void *);
+
+static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
+{
+       unsigned long offset = (unsigned long)head->func;
+
+       if (__is_kfree_rcu_offset(offset)) {
+               RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
+               kfree((void *)head - offset);
+               return 1;
+       } else {
+               RCU_TRACE(trace_rcu_invoke_callback(rn, head));
+               head->func(head);
+               return 0;
+       }
+}
+
+extern int rcu_expedited;
+
+#ifdef CONFIG_RCU_STALL_COMMON
+
+extern int rcu_cpu_stall_suppress;
+int rcu_jiffies_till_stall_check(void);
+
+#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
+/*
+ * Strings used in tracepoints need to be exported via the
+ * tracing system such that tools like perf and trace-cmd can
+ * translate the string address pointers to actual text.
+ */
+#define TPS(x)  tracepoint_string(x)
+
+#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
new file mode 100644 (file)
index 0000000..01d5ccb
--- /dev/null
@@ -0,0 +1,651 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *        Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *             Documentation/RCU/ *.txt
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+
+#include <trace/events/rcu.h>
+
+#include "rcu.h"
+
+/*
+ * Initialize an rcu_batch structure to empty.
+ */
+static inline void rcu_batch_init(struct rcu_batch *b)
+{
+       b->head = NULL;
+       b->tail = &b->head;
+}
+
+/*
+ * Enqueue a callback onto the tail of the specified rcu_batch structure.
+ */
+static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
+{
+       *b->tail = head;
+       b->tail = &head->next;
+}
+
+/*
+ * Is the specified rcu_batch structure empty?
+ */
+static inline bool rcu_batch_empty(struct rcu_batch *b)
+{
+       return b->tail == &b->head;
+}
+
+/*
+ * Remove the callback at the head of the specified rcu_batch structure
+ * and return a pointer to it, or return NULL if the structure is empty.
+ */
+static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
+{
+       struct rcu_head *head;
+
+       if (rcu_batch_empty(b))
+               return NULL;
+
+       head = b->head;
+       b->head = head->next;
+       if (b->tail == &head->next)
+               rcu_batch_init(b);
+
+       return head;
+}
+
+/*
+ * Move all callbacks from the rcu_batch structure specified by "from" to
+ * the structure specified by "to".
+ */
+static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
+{
+       if (!rcu_batch_empty(from)) {
+               *to->tail = from->head;
+               to->tail = from->tail;
+               rcu_batch_init(from);
+       }
+}
+
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+       sp->completed = 0;
+       spin_lock_init(&sp->queue_lock);
+       sp->running = false;
+       rcu_batch_init(&sp->batch_queue);
+       rcu_batch_init(&sp->batch_check0);
+       rcu_batch_init(&sp->batch_check1);
+       rcu_batch_init(&sp->batch_done);
+       INIT_DELAYED_WORK(&sp->work, process_srcu);
+       sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+       return sp->per_cpu_ref ? 0 : -ENOMEM;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+                      struct lock_class_key *key)
+{
+       /* Don't re-initialize a lock while it is held. */
+       debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+       lockdep_init_map(&sp->dep_map, name, key, 0);
+       return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function.  Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+       return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * Returns approximate total of the readers' ->seq[] values for the
+ * rank of per-CPU counters specified by idx.
+ */
+static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
+{
+       int cpu;
+       unsigned long sum = 0;
+       unsigned long t;
+
+       for_each_possible_cpu(cpu) {
+               t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
+               sum += t;
+       }
+       return sum;
+}
+
+/*
+ * Returns approximate number of readers active on the specified rank
+ * of the per-CPU ->c[] counters.
+ */
+static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
+{
+       int cpu;
+       unsigned long sum = 0;
+       unsigned long t;
+
+       for_each_possible_cpu(cpu) {
+               t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
+               sum += t;
+       }
+       return sum;
+}
+
+/*
+ * Return true if the number of pre-existing readers is determined to
+ * be stably zero.  An example unstable zero can occur if the call
+ * to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
+ * but due to task migration, sees the corresponding __srcu_read_unlock()
+ * decrement.  This can happen because srcu_readers_active_idx() takes
+ * time to sum the array, and might in fact be interrupted or preempted
+ * partway through the summation.
+ */
+static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
+{
+       unsigned long seq;
+
+       seq = srcu_readers_seq_idx(sp, idx);
+
+       /*
+        * The following smp_mb() A pairs with the smp_mb() B located in
+        * __srcu_read_lock().  This pairing ensures that if an
+        * __srcu_read_lock() increments its counter after the summation
+        * in srcu_readers_active_idx(), then the corresponding SRCU read-side
+        * critical section will see any changes made prior to the start
+        * of the current SRCU grace period.
+        *
+        * Also, if the above call to srcu_readers_seq_idx() saw the
+        * increment of ->seq[], then the call to srcu_readers_active_idx()
+        * must see the increment of ->c[].
+        */
+       smp_mb(); /* A */
+
+       /*
+        * Note that srcu_readers_active_idx() can incorrectly return
+        * zero even though there is a pre-existing reader throughout.
+        * To see this, suppose that task A is in a very long SRCU
+        * read-side critical section that started on CPU 0, and that
+        * no other reader exists, so that the sum of the counters
+        * is equal to one.  Then suppose that task B starts executing
+        * srcu_readers_active_idx(), summing up to CPU 1, and then that
+        * task C starts reading on CPU 0, so that its increment is not
+        * summed, but finishes reading on CPU 2, so that its decrement
+        * -is- summed.  Then when task B completes its sum, it will
+        * incorrectly get zero, despite the fact that task A has been
+        * in its SRCU read-side critical section the whole time.
+        *
+        * We therefore do a validation step should srcu_readers_active_idx()
+        * return zero.
+        */
+       if (srcu_readers_active_idx(sp, idx) != 0)
+               return false;
+
+       /*
+        * The remainder of this function is the validation step.
+        * The following smp_mb() D pairs with the smp_mb() C in
+        * __srcu_read_unlock().  If the __srcu_read_unlock() was seen
+        * by srcu_readers_active_idx() above, then any destructive
+        * operation performed after the grace period will happen after
+        * the corresponding SRCU read-side critical section.
+        *
+        * Note that there can be at most NR_CPUS worth of readers using
+        * the old index, which is not enough to overflow even a 32-bit
+        * integer.  (Yes, this does mean that systems having more than
+        * a billion or so CPUs need to be 64-bit systems.)  Therefore,
+        * the sum of the ->seq[] counters cannot possibly overflow.
+        * Therefore, the only way that the return values of the two
+        * calls to srcu_readers_seq_idx() can be equal is if there were
+        * no increments of the corresponding rank of ->seq[] counts
+        * in the interim.  But the missed-increment scenario laid out
+        * above includes an increment of the ->seq[] counter by
+        * the corresponding __srcu_read_lock().  Therefore, if this
+        * scenario occurs, the return values from the two calls to
+        * srcu_readers_seq_idx() will differ, and thus the validation
+        * step below suffices.
+        */
+       smp_mb(); /* D */
+
+       return srcu_readers_seq_idx(sp, idx) == seq;
+}
+
+/**
+ * srcu_readers_active - returns approximate number of readers.
+ * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ *
+ * Note that this is not an atomic primitive, and can therefore suffer
+ * severe errors when invoked on an active srcu_struct.  That said, it
+ * can be useful as an error check at cleanup time.
+ */
+static int srcu_readers_active(struct srcu_struct *sp)
+{
+       int cpu;
+       unsigned long sum = 0;
+
+       for_each_possible_cpu(cpu) {
+               sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
+               sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
+       }
+       return sum;
+}
+
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+       if (WARN_ON(srcu_readers_active(sp)))
+               return; /* Leakage unless caller handles error. */
+       free_percpu(sp->per_cpu_ref);
+       sp->per_cpu_ref = NULL;
+}
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Must be called from process context.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int __srcu_read_lock(struct srcu_struct *sp)
+{
+       int idx;
+
+       idx = ACCESS_ONCE(sp->completed) & 0x1;
+       preempt_disable();
+       ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
+       smp_mb(); /* B */  /* Avoid leaking the critical section. */
+       ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
+       preempt_enable();
+       return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
+
+/*
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct.  Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ * Must be called from process context.
+ */
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+       smp_mb(); /* C */  /* Avoid leaking the critical section. */
+       this_cpu_dec(sp->per_cpu_ref->c[idx]);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited().  We spin for a fixed time period
+ * (defined below) to allow SRCU readers to exit their read-side critical
+ * sections.  If there are still some readers after 10 microseconds,
+ * we repeatedly block for 1-millisecond time periods.  This approach
+ * has done well in testing, so there is no need for a config parameter.
+ */
+#define SRCU_RETRY_CHECK_DELAY         5
+#define SYNCHRONIZE_SRCU_TRYCOUNT      2
+#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT  12
+
+/*
+ * @@@ Wait until all pre-existing readers complete.  Such readers
+ * will have used the index specified by "idx".
+ * the caller should ensures the ->completed is not changed while checking
+ * and idx = (->completed & 1) ^ 1
+ */
+static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
+{
+       for (;;) {
+               if (srcu_readers_active_idx_check(sp, idx))
+                       return true;
+               if (--trycount <= 0)
+                       return false;
+               udelay(SRCU_RETRY_CHECK_DELAY);
+       }
+}
+
+/*
+ * Increment the ->completed counter so that future SRCU readers will
+ * use the other rank of the ->c[] and ->seq[] arrays.  This allows
+ * us to wait for pre-existing readers in a starvation-free manner.
+ */
+static void srcu_flip(struct srcu_struct *sp)
+{
+       sp->completed++;
+}
+
+/*
+ * Enqueue an SRCU callback on the specified srcu_struct structure,
+ * initiating grace-period processing if it is not already running.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+               void (*func)(struct rcu_head *head))
+{
+       unsigned long flags;
+
+       head->next = NULL;
+       head->func = func;
+       spin_lock_irqsave(&sp->queue_lock, flags);
+       rcu_batch_queue(&sp->batch_queue, head);
+       if (!sp->running) {
+               sp->running = true;
+               schedule_delayed_work(&sp->work, 0);
+       }
+       spin_unlock_irqrestore(&sp->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(call_srcu);
+
+struct rcu_synchronize {
+       struct rcu_head head;
+       struct completion completion;
+};
+
+/*
+ * Awaken the corresponding synchronize_srcu() instance now that a
+ * grace period has elapsed.
+ */
+static void wakeme_after_rcu(struct rcu_head *head)
+{
+       struct rcu_synchronize *rcu;
+
+       rcu = container_of(head, struct rcu_synchronize, head);
+       complete(&rcu->completion);
+}
+
+static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
+static void srcu_reschedule(struct srcu_struct *sp);
+
+/*
+ * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
+ */
+static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
+{
+       struct rcu_synchronize rcu;
+       struct rcu_head *head = &rcu.head;
+       bool done = false;
+
+       rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
+                          !lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
+
+       might_sleep();
+       init_completion(&rcu.completion);
+
+       head->next = NULL;
+       head->func = wakeme_after_rcu;
+       spin_lock_irq(&sp->queue_lock);
+       if (!sp->running) {
+               /* steal the processing owner */
+               sp->running = true;
+               rcu_batch_queue(&sp->batch_check0, head);
+               spin_unlock_irq(&sp->queue_lock);
+
+               srcu_advance_batches(sp, trycount);
+               if (!rcu_batch_empty(&sp->batch_done)) {
+                       BUG_ON(sp->batch_done.head != head);
+                       rcu_batch_dequeue(&sp->batch_done);
+                       done = true;
+               }
+               /* give the processing owner to work_struct */
+               srcu_reschedule(sp);
+       } else {
+               rcu_batch_queue(&sp->batch_queue, head);
+               spin_unlock_irq(&sp->queue_lock);
+       }
+
+       if (!done)
+               wait_for_completion(&rcu.completion);
+}
+
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for the count to drain to zero of both indexes. To avoid the
+ * possible starvation of synchronize_srcu(), it waits for the count of
+ * the index=((->completed & 1) ^ 1) to drain to zero at first,
+ * and then flip the completed and wait for the count of the other index.
+ *
+ * Can block; must be called from process context.
+ *
+ * Note that it is illegal to call synchronize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+       __synchronize_srcu(sp, rcu_expedited
+                          ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
+                          : SYNCHRONIZE_SRCU_TRYCOUNT);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+
+/**
+ * synchronize_srcu_expedited - Brute-force SRCU grace period
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for an SRCU grace period to elapse, but be more aggressive about
+ * spinning rather than blocking when waiting.
+ *
+ * Note that it is also illegal to call synchronize_srcu_expedited()
+ * from the corresponding SRCU read-side critical section;
+ * doing so will result in deadlock.  However, it is perfectly legal
+ * to call synchronize_srcu_expedited() on one srcu_struct from some
+ * other srcu_struct's read-side critical section, as long as
+ * the resulting graph of srcu_structs is acyclic.
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+       __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
+/**
+ * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
+ */
+void srcu_barrier(struct srcu_struct *sp)
+{
+       synchronize_srcu(sp);
+}
+EXPORT_SYMBOL_GPL(srcu_barrier);
+
+/**
+ * srcu_batches_completed - return batches completed.
+ * @sp: srcu_struct on which to report batch completion.
+ *
+ * Report the number of batches, correlated with, but not necessarily
+ * precisely the same as, the number of grace periods that have elapsed.
+ */
+long srcu_batches_completed(struct srcu_struct *sp)
+{
+       return sp->completed;
+}
+EXPORT_SYMBOL_GPL(srcu_batches_completed);
+
+#define SRCU_CALLBACK_BATCH    10
+#define SRCU_INTERVAL          1
+
+/*
+ * Move any new SRCU callbacks to the first stage of the SRCU grace
+ * period pipeline.
+ */
+static void srcu_collect_new(struct srcu_struct *sp)
+{
+       if (!rcu_batch_empty(&sp->batch_queue)) {
+               spin_lock_irq(&sp->queue_lock);
+               rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
+               spin_unlock_irq(&sp->queue_lock);
+       }
+}
+
+/*
+ * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
+ * ->batch_check1 and then to ->batch_done as readers drain.
+ */
+static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
+{
+       int idx = 1 ^ (sp->completed & 1);
+
+       /*
+        * Because readers might be delayed for an extended period after
+        * fetching ->completed for their index, at any point in time there
+        * might well be readers using both idx=0 and idx=1.  We therefore
+        * need to wait for readers to clear from both index values before
+        * invoking a callback.
+        */
+
+       if (rcu_batch_empty(&sp->batch_check0) &&
+           rcu_batch_empty(&sp->batch_check1))
+               return; /* no callbacks need to be advanced */
+
+       if (!try_check_zero(sp, idx, trycount))
+               return; /* failed to advance, will try after SRCU_INTERVAL */
+
+       /*
+        * The callbacks in ->batch_check1 have already done with their
+        * first zero check and flip back when they were enqueued on
+        * ->batch_check0 in a previous invocation of srcu_advance_batches().
+        * (Presumably try_check_zero() returned false during that
+        * invocation, leaving the callbacks stranded on ->batch_check1.)
+        * They are therefore ready to invoke, so move them to ->batch_done.
+        */
+       rcu_batch_move(&sp->batch_done, &sp->batch_check1);
+
+       if (rcu_batch_empty(&sp->batch_check0))
+               return; /* no callbacks need to be advanced */
+       srcu_flip(sp);
+
+       /*
+        * The callbacks in ->batch_check0 just finished their
+        * first check zero and flip, so move them to ->batch_check1
+        * for future checking on the other idx.
+        */
+       rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
+
+       /*
+        * SRCU read-side critical sections are normally short, so check
+        * at least twice in quick succession after a flip.
+        */
+       trycount = trycount < 2 ? 2 : trycount;
+       if (!try_check_zero(sp, idx^1, trycount))
+               return; /* failed to advance, will try after SRCU_INTERVAL */
+
+       /*
+        * The callbacks in ->batch_check1 have now waited for all
+        * pre-existing readers using both idx values.  They are therefore
+        * ready to invoke, so move them to ->batch_done.
+        */
+       rcu_batch_move(&sp->batch_done, &sp->batch_check1);
+}
+
+/*
+ * Invoke a limited number of SRCU callbacks that have passed through
+ * their grace period.  If there are more to do, SRCU will reschedule
+ * the workqueue.
+ */
+static void srcu_invoke_callbacks(struct srcu_struct *sp)
+{
+       int i;
+       struct rcu_head *head;
+
+       for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
+               head = rcu_batch_dequeue(&sp->batch_done);
+               if (!head)
+                       break;
+               local_bh_disable();
+               head->func(head);
+               local_bh_enable();
+       }
+}
+
+/*
+ * Finished one round of SRCU grace period.  Start another if there are
+ * more SRCU callbacks queued, otherwise put SRCU into not-running state.
+ */
+static void srcu_reschedule(struct srcu_struct *sp)
+{
+       bool pending = true;
+
+       if (rcu_batch_empty(&sp->batch_done) &&
+           rcu_batch_empty(&sp->batch_check1) &&
+           rcu_batch_empty(&sp->batch_check0) &&
+           rcu_batch_empty(&sp->batch_queue)) {
+               spin_lock_irq(&sp->queue_lock);
+               if (rcu_batch_empty(&sp->batch_done) &&
+                   rcu_batch_empty(&sp->batch_check1) &&
+                   rcu_batch_empty(&sp->batch_check0) &&
+                   rcu_batch_empty(&sp->batch_queue)) {
+                       sp->running = false;
+                       pending = false;
+               }
+               spin_unlock_irq(&sp->queue_lock);
+       }
+
+       if (pending)
+               schedule_delayed_work(&sp->work, SRCU_INTERVAL);
+}
+
+/*
+ * This is the work-queue function that handles SRCU grace periods.
+ */
+void process_srcu(struct work_struct *work)
+{
+       struct srcu_struct *sp;
+
+       sp = container_of(work, struct srcu_struct, work.work);
+
+       srcu_collect_new(sp);
+       srcu_advance_batches(sp, 1);
+       srcu_invoke_callbacks(sp);
+       srcu_reschedule(sp);
+}
+EXPORT_SYMBOL_GPL(process_srcu);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
new file mode 100644 (file)
index 0000000..0c9a934
--- /dev/null
@@ -0,0 +1,388 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *             Documentation/RCU
+ */
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/cpu.h>
+#include <linux/prefetch.h>
+#include <linux/ftrace_event.h>
+
+#ifdef CONFIG_RCU_TRACE
+#include <trace/events/rcu.h>
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+#include "rcu.h"
+
+/* Forward declarations for tiny_plugin.h. */
+struct rcu_ctrlblk;
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static void rcu_process_callbacks(struct softirq_action *unused);
+static void __call_rcu(struct rcu_head *head,
+                      void (*func)(struct rcu_head *rcu),
+                      struct rcu_ctrlblk *rcp);
+
+static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+
+#include "tiny_plugin.h"
+
+/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
+static void rcu_idle_enter_common(long long newval)
+{
+       if (newval) {
+               RCU_TRACE(trace_rcu_dyntick(TPS("--="),
+                                           rcu_dynticks_nesting, newval));
+               rcu_dynticks_nesting = newval;
+               return;
+       }
+       RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
+                                   rcu_dynticks_nesting, newval));
+       if (!is_idle_task(current)) {
+               struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
+
+               RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
+                                           rcu_dynticks_nesting, newval));
+               ftrace_dump(DUMP_ALL);
+               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+                         current->pid, current->comm,
+                         idle->pid, idle->comm); /* must be idle task! */
+       }
+       rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+       barrier();
+       rcu_dynticks_nesting = newval;
+}
+
+/*
+ * Enter idle, which is an extended quiescent state if we have fully
+ * entered that mode (i.e., if the new value of dynticks_nesting is zero).
+ */
+void rcu_idle_enter(void)
+{
+       unsigned long flags;
+       long long newval;
+
+       local_irq_save(flags);
+       WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
+       if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
+           DYNTICK_TASK_NEST_VALUE)
+               newval = 0;
+       else
+               newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
+       rcu_idle_enter_common(newval);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
+
+/*
+ * Exit an interrupt handler towards idle.
+ */
+void rcu_irq_exit(void)
+{
+       unsigned long flags;
+       long long newval;
+
+       local_irq_save(flags);
+       newval = rcu_dynticks_nesting - 1;
+       WARN_ON_ONCE(newval < 0);
+       rcu_idle_enter_common(newval);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_irq_exit);
+
+/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
+static void rcu_idle_exit_common(long long oldval)
+{
+       if (oldval) {
+               RCU_TRACE(trace_rcu_dyntick(TPS("++="),
+                                           oldval, rcu_dynticks_nesting));
+               return;
+       }
+       RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
+       if (!is_idle_task(current)) {
+               struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
+
+               RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
+                         oldval, rcu_dynticks_nesting));
+               ftrace_dump(DUMP_ALL);
+               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+                         current->pid, current->comm,
+                         idle->pid, idle->comm); /* must be idle task! */
+       }
+}
+
+/*
+ * Exit idle, so that we are no longer in an extended quiescent state.
+ */
+void rcu_idle_exit(void)
+{
+       unsigned long flags;
+       long long oldval;
+
+       local_irq_save(flags);
+       oldval = rcu_dynticks_nesting;
+       WARN_ON_ONCE(rcu_dynticks_nesting < 0);
+       if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
+               rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
+       else
+               rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+       rcu_idle_exit_common(oldval);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
+
+/*
+ * Enter an interrupt handler, moving away from idle.
+ */
+void rcu_irq_enter(void)
+{
+       unsigned long flags;
+       long long oldval;
+
+       local_irq_save(flags);
+       oldval = rcu_dynticks_nesting;
+       rcu_dynticks_nesting++;
+       WARN_ON_ONCE(rcu_dynticks_nesting == 0);
+       rcu_idle_exit_common(oldval);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_irq_enter);
+
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
+
+/*
+ * Test whether RCU thinks that the current CPU is idle.
+ */
+bool __rcu_is_watching(void)
+{
+       return rcu_dynticks_nesting;
+}
+EXPORT_SYMBOL(__rcu_is_watching);
+
+#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
+
+/*
+ * Test whether the current CPU was interrupted from idle.  Nested
+ * interrupts don't count, we must be running at the first interrupt
+ * level.
+ */
+static int rcu_is_cpu_rrupt_from_idle(void)
+{
+       return rcu_dynticks_nesting <= 1;
+}
+
+/*
+ * Helper function for rcu_sched_qs() and rcu_bh_qs().
+ * Also irqs are disabled to avoid confusion due to interrupt handlers
+ * invoking call_rcu().
+ */
+static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
+{
+       RCU_TRACE(reset_cpu_stall_ticks(rcp));
+       if (rcp->rcucblist != NULL &&
+           rcp->donetail != rcp->curtail) {
+               rcp->donetail = rcp->curtail;
+               return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
+ * are at it, given that any rcu quiescent state is also an rcu_bh
+ * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
+ */
+void rcu_sched_qs(int cpu)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
+           rcu_qsctr_help(&rcu_bh_ctrlblk))
+               raise_softirq(RCU_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+/*
+ * Record an rcu_bh quiescent state.
+ */
+void rcu_bh_qs(int cpu)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+               raise_softirq(RCU_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+/*
+ * Check to see if the scheduling-clock interrupt came from an extended
+ * quiescent state, and, if so, tell RCU about it.  This function must
+ * be called from hardirq context.  It is normally called from the
+ * scheduling-clock interrupt.
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+       RCU_TRACE(check_cpu_stalls());
+       if (user || rcu_is_cpu_rrupt_from_idle())
+               rcu_sched_qs(cpu);
+       else if (!in_softirq())
+               rcu_bh_qs(cpu);
+}
+
+/*
+ * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
+ * whose grace period has elapsed.
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+{
+       const char *rn = NULL;
+       struct rcu_head *next, *list;
+       unsigned long flags;
+       RCU_TRACE(int cb_count = 0);
+
+       /* If no RCU callbacks ready to invoke, just return. */
+       if (&rcp->rcucblist == rcp->donetail) {
+               RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
+               RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
+                                             !!ACCESS_ONCE(rcp->rcucblist),
+                                             need_resched(),
+                                             is_idle_task(current),
+                                             false));
+               return;
+       }
+
+       /* Move the ready-to-invoke callbacks to a local list. */
+       local_irq_save(flags);
+       RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
+       list = rcp->rcucblist;
+       rcp->rcucblist = *rcp->donetail;
+       *rcp->donetail = NULL;
+       if (rcp->curtail == rcp->donetail)
+               rcp->curtail = &rcp->rcucblist;
+       rcp->donetail = &rcp->rcucblist;
+       local_irq_restore(flags);
+
+       /* Invoke the callbacks on the local list. */
+       RCU_TRACE(rn = rcp->name);
+       while (list) {
+               next = list->next;
+               prefetch(next);
+               debug_rcu_head_unqueue(list);
+               local_bh_disable();
+               __rcu_reclaim(rn, list);
+               local_bh_enable();
+               list = next;
+               RCU_TRACE(cb_count++);
+       }
+       RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
+       RCU_TRACE(trace_rcu_batch_end(rcp->name,
+                                     cb_count, 0, need_resched(),
+                                     is_idle_task(current),
+                                     false));
+}
+
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+       __rcu_process_callbacks(&rcu_sched_ctrlblk);
+       __rcu_process_callbacks(&rcu_bh_ctrlblk);
+}
+
+/*
+ * Wait for a grace period to elapse.  But it is illegal to invoke
+ * synchronize_sched() from within an RCU read-side critical section.
+ * Therefore, any legal call to synchronize_sched() is a quiescent
+ * state, and so on a UP system, synchronize_sched() need do nothing.
+ * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
+ * benefits of doing might_sleep() to reduce latency.)
+ *
+ * Cool, huh?  (Due to Josh Triplett.)
+ *
+ * But we want to make this a static inline later.  The cond_resched()
+ * currently makes this problematic.
+ */
+void synchronize_sched(void)
+{
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_sched() in RCU read-side critical section");
+       cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+/*
+ * Helper function for call_rcu() and call_rcu_bh().
+ */
+static void __call_rcu(struct rcu_head *head,
+                      void (*func)(struct rcu_head *rcu),
+                      struct rcu_ctrlblk *rcp)
+{
+       unsigned long flags;
+
+       debug_rcu_head_queue(head);
+       head->func = func;
+       head->next = NULL;
+
+       local_irq_save(flags);
+       *rcp->curtail = head;
+       rcp->curtail = &head->next;
+       RCU_TRACE(rcp->qlen++);
+       local_irq_restore(flags);
+}
+
+/*
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
+ * period.  But since we have but one CPU, that would be after any
+ * quiescent state.
+ */
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_sched_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
+/*
+ * Post an RCU bottom-half callback to be invoked after any subsequent
+ * quiescent state.
+ */
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_bh_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+void rcu_init(void)
+{
+       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
new file mode 100644 (file)
index 0000000..280d06c
--- /dev/null
@@ -0,0 +1,174 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
+ * Internal non-public definitions that provide either classic
+ * or preemptible semantics.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2010 Linaro
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
+       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
+       struct rcu_head **curtail;      /* ->next pointer of last CB. */
+       RCU_TRACE(long qlen);           /* Number of pending CBs. */
+       RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
+       RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
+       RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
+       RCU_TRACE(const char *name);    /* Name of RCU type. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+       .donetail       = &rcu_sched_ctrlblk.rcucblist,
+       .curtail        = &rcu_sched_ctrlblk.rcucblist,
+       RCU_TRACE(.name = "rcu_sched")
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+       .donetail       = &rcu_bh_ctrlblk.rcucblist,
+       .curtail        = &rcu_bh_ctrlblk.rcucblist,
+       RCU_TRACE(.name = "rcu_bh")
+};
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#include <linux/kernel_stat.h>
+
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+
+/*
+ * During boot, we forgive RCU lockdep issues.  After this function is
+ * invoked, we start taking RCU lockdep issues seriously.
+ */
+void __init rcu_scheduler_starting(void)
+{
+       WARN_ON(nr_context_switches() > 0);
+       rcu_scheduler_active = 1;
+}
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_RCU_TRACE
+
+static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       rcp->qlen -= n;
+       local_irq_restore(flags);
+}
+
+/*
+ * Dump statistics for TINY_RCU, such as they are.
+ */
+static int show_tiny_stats(struct seq_file *m, void *unused)
+{
+       seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
+       seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
+       return 0;
+}
+
+static int show_tiny_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_tiny_stats, NULL);
+}
+
+static const struct file_operations show_tiny_stats_fops = {
+       .owner = THIS_MODULE,
+       .open = show_tiny_stats_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static struct dentry *rcudir;
+
+static int __init rcutiny_trace_init(void)
+{
+       struct dentry *retval;
+
+       rcudir = debugfs_create_dir("rcu", NULL);
+       if (!rcudir)
+               goto free_out;
+       retval = debugfs_create_file("rcudata", 0444, rcudir,
+                                    NULL, &show_tiny_stats_fops);
+       if (!retval)
+               goto free_out;
+       return 0;
+free_out:
+       debugfs_remove_recursive(rcudir);
+       return 1;
+}
+
+static void __exit rcutiny_trace_cleanup(void)
+{
+       debugfs_remove_recursive(rcudir);
+}
+
+module_init(rcutiny_trace_init);
+module_exit(rcutiny_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
+MODULE_LICENSE("GPL");
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long j;
+       unsigned long js;
+
+       if (rcu_cpu_stall_suppress)
+               return;
+       rcp->ticks_this_gp++;
+       j = jiffies;
+       js = rcp->jiffies_stall;
+       if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
+               pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
+                      rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
+                      jiffies - rcp->gp_start, rcp->qlen);
+               dump_stack();
+       }
+       if (*rcp->curtail && ULONG_CMP_GE(j, js))
+               rcp->jiffies_stall = jiffies +
+                       3 * rcu_jiffies_till_stall_check() + 3;
+       else if (ULONG_CMP_GE(j, js))
+               rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+}
+
+static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
+{
+       rcp->ticks_this_gp = 0;
+       rcp->gp_start = jiffies;
+       rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+}
+
+static void check_cpu_stalls(void)
+{
+       RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
+       RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c
new file mode 100644 (file)
index 0000000..3929cd4
--- /dev/null
@@ -0,0 +1,2145 @@
+/*
+ * Read-Copy Update module-based torture test facility
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2005, 2006
+ *
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ *       Josh Triplett <josh@freedesktop.org>
+ *
+ * See also:  Documentation/RCU/torture.txt
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/trace_clock.h>
+#include <asm/byteorder.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
+
+MODULE_ALIAS("rcutorture");
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "rcutorture."
+
+static int fqs_duration;
+module_param(fqs_duration, int, 0444);
+MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable");
+static int fqs_holdoff;
+module_param(fqs_holdoff, int, 0444);
+MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
+static int fqs_stutter = 3;
+module_param(fqs_stutter, int, 0444);
+MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
+static bool gp_exp;
+module_param(gp_exp, bool, 0444);
+MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives");
+static bool gp_normal;
+module_param(gp_normal, bool, 0444);
+MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives");
+static int irqreader = 1;
+module_param(irqreader, int, 0444);
+MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
+static int n_barrier_cbs;
+module_param(n_barrier_cbs, int, 0444);
+MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
+static int nfakewriters = 4;
+module_param(nfakewriters, int, 0444);
+MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
+static int nreaders = -1;
+module_param(nreaders, int, 0444);
+MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
+static int object_debug;
+module_param(object_debug, int, 0444);
+MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing");
+static int onoff_holdoff;
+module_param(onoff_holdoff, int, 0444);
+MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)");
+static int onoff_interval;
+module_param(onoff_interval, int, 0444);
+MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
+static int shuffle_interval = 3;
+module_param(shuffle_interval, int, 0444);
+MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
+static int shutdown_secs;
+module_param(shutdown_secs, int, 0444);
+MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable.");
+static int stall_cpu;
+module_param(stall_cpu, int, 0444);
+MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable.");
+static int stall_cpu_holdoff = 10;
+module_param(stall_cpu_holdoff, int, 0444);
+MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s).");
+static int stat_interval = 60;
+module_param(stat_interval, int, 0644);
+MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
+static int stutter = 5;
+module_param(stutter, int, 0444);
+MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
+static int test_boost = 1;
+module_param(test_boost, int, 0444);
+MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
+static int test_boost_duration = 4;
+module_param(test_boost_duration, int, 0444);
+MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
+static int test_boost_interval = 7;
+module_param(test_boost_interval, int, 0444);
+MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
+static bool test_no_idle_hz = true;
+module_param(test_no_idle_hz, bool, 0444);
+MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
+static char *torture_type = "rcu";
+module_param(torture_type, charp, 0444);
+MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)");
+static bool verbose;
+module_param(verbose, bool, 0444);
+MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
+
+#define TORTURE_FLAG "-torture:"
+#define PRINTK_STRING(s) \
+       do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
+#define VERBOSE_PRINTK_STRING(s) \
+       do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
+#define VERBOSE_PRINTK_ERRSTRING(s) \
+       do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0)
+
+static char printk_buf[4096];
+
+static int nrealreaders;
+static struct task_struct *writer_task;
+static struct task_struct **fakewriter_tasks;
+static struct task_struct **reader_tasks;
+static struct task_struct *stats_task;
+static struct task_struct *shuffler_task;
+static struct task_struct *stutter_task;
+static struct task_struct *fqs_task;
+static struct task_struct *boost_tasks[NR_CPUS];
+static struct task_struct *shutdown_task;
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct *onoff_task;
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static struct task_struct *stall_task;
+static struct task_struct **barrier_cbs_tasks;
+static struct task_struct *barrier_task;
+
+#define RCU_TORTURE_PIPE_LEN 10
+
+struct rcu_torture {
+       struct rcu_head rtort_rcu;
+       int rtort_pipe_count;
+       struct list_head rtort_free;
+       int rtort_mbtest;
+};
+
+static LIST_HEAD(rcu_torture_freelist);
+static struct rcu_torture __rcu *rcu_torture_current;
+static unsigned long rcu_torture_current_version;
+static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
+static DEFINE_SPINLOCK(rcu_torture_lock);
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
+       { 0 };
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) =
+       { 0 };
+static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
+static atomic_t n_rcu_torture_alloc;
+static atomic_t n_rcu_torture_alloc_fail;
+static atomic_t n_rcu_torture_free;
+static atomic_t n_rcu_torture_mberror;
+static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_barrier_error;
+static long n_rcu_torture_boost_ktrerror;
+static long n_rcu_torture_boost_rterror;
+static long n_rcu_torture_boost_failure;
+static long n_rcu_torture_boosts;
+static long n_rcu_torture_timers;
+static long n_offline_attempts;
+static long n_offline_successes;
+static unsigned long sum_offline;
+static int min_offline = -1;
+static int max_offline;
+static long n_online_attempts;
+static long n_online_successes;
+static unsigned long sum_online;
+static int min_online = -1;
+static int max_online;
+static long n_barrier_attempts;
+static long n_barrier_successes;
+static struct list_head rcu_torture_removed;
+static cpumask_var_t shuffle_tmp_mask;
+
+static int stutter_pause_test;
+
+#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
+#define RCUTORTURE_RUNNABLE_INIT 1
+#else
+#define RCUTORTURE_RUNNABLE_INIT 0
+#endif
+int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
+module_param(rcutorture_runnable, int, 0444);
+MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
+
+#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
+#define rcu_can_boost() 1
+#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
+#define rcu_can_boost() 0
+#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
+
+#ifdef CONFIG_RCU_TRACE
+static u64 notrace rcu_trace_clock_local(void)
+{
+       u64 ts = trace_clock_local();
+       unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
+       return ts;
+}
+#else /* #ifdef CONFIG_RCU_TRACE */
+static u64 notrace rcu_trace_clock_local(void)
+{
+       return 0ULL;
+}
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+static unsigned long shutdown_time;    /* jiffies to system shutdown. */
+static unsigned long boost_starttime;  /* jiffies of next boost test start. */
+DEFINE_MUTEX(boost_mutex);             /* protect setting boost_starttime */
+                                       /*  and boost task create/destroy. */
+static atomic_t barrier_cbs_count;     /* Barrier callbacks registered. */
+static bool barrier_phase;             /* Test phase. */
+static atomic_t barrier_cbs_invoked;   /* Barrier callbacks invoked. */
+static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
+static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
+
+/* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
+
+#define FULLSTOP_DONTSTOP 0    /* Normal operation. */
+#define FULLSTOP_SHUTDOWN 1    /* System shutdown with rcutorture running. */
+#define FULLSTOP_RMMOD    2    /* Normal rmmod of rcutorture. */
+static int fullstop = FULLSTOP_RMMOD;
+/*
+ * Protect fullstop transitions and spawning of kthreads.
+ */
+static DEFINE_MUTEX(fullstop_mutex);
+
+/* Forward reference. */
+static void rcu_torture_cleanup(void);
+
+/*
+ * Detect and respond to a system shutdown.
+ */
+static int
+rcutorture_shutdown_notify(struct notifier_block *unused1,
+                          unsigned long unused2, void *unused3)
+{
+       mutex_lock(&fullstop_mutex);
+       if (fullstop == FULLSTOP_DONTSTOP)
+               fullstop = FULLSTOP_SHUTDOWN;
+       else
+               pr_warn(/* but going down anyway, so... */
+                      "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
+       mutex_unlock(&fullstop_mutex);
+       return NOTIFY_DONE;
+}
+
+/*
+ * Absorb kthreads into a kernel function that won't return, so that
+ * they won't ever access module text or data again.
+ */
+static void rcutorture_shutdown_absorb(const char *title)
+{
+       if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
+               pr_notice(
+                      "rcutorture thread %s parking due to system shutdown\n",
+                      title);
+               schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
+       }
+}
+
+/*
+ * Allocate an element from the rcu_tortures pool.
+ */
+static struct rcu_torture *
+rcu_torture_alloc(void)
+{
+       struct list_head *p;
+
+       spin_lock_bh(&rcu_torture_lock);
+       if (list_empty(&rcu_torture_freelist)) {
+               atomic_inc(&n_rcu_torture_alloc_fail);
+               spin_unlock_bh(&rcu_torture_lock);
+               return NULL;
+       }
+       atomic_inc(&n_rcu_torture_alloc);
+       p = rcu_torture_freelist.next;
+       list_del_init(p);
+       spin_unlock_bh(&rcu_torture_lock);
+       return container_of(p, struct rcu_torture, rtort_free);
+}
+
+/*
+ * Free an element to the rcu_tortures pool.
+ */
+static void
+rcu_torture_free(struct rcu_torture *p)
+{
+       atomic_inc(&n_rcu_torture_free);
+       spin_lock_bh(&rcu_torture_lock);
+       list_add_tail(&p->rtort_free, &rcu_torture_freelist);
+       spin_unlock_bh(&rcu_torture_lock);
+}
+
+struct rcu_random_state {
+       unsigned long rrs_state;
+       long rrs_count;
+};
+
+#define RCU_RANDOM_MULT 39916801  /* prime */
+#define RCU_RANDOM_ADD 479001701 /* prime */
+#define RCU_RANDOM_REFRESH 10000
+
+#define DEFINE_RCU_RANDOM(name) struct rcu_random_state name = { 0, 0 }
+
+/*
+ * Crude but fast random-number generator.  Uses a linear congruential
+ * generator, with occasional help from cpu_clock().
+ */
+static unsigned long
+rcu_random(struct rcu_random_state *rrsp)
+{
+       if (--rrsp->rrs_count < 0) {
+               rrsp->rrs_state += (unsigned long)local_clock();
+               rrsp->rrs_count = RCU_RANDOM_REFRESH;
+       }
+       rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
+       return swahw32(rrsp->rrs_state);
+}
+
+static void
+rcu_stutter_wait(const char *title)
+{
+       while (stutter_pause_test || !rcutorture_runnable) {
+               if (rcutorture_runnable)
+                       schedule_timeout_interruptible(1);
+               else
+                       schedule_timeout_interruptible(round_jiffies_relative(HZ));
+               rcutorture_shutdown_absorb(title);
+       }
+}
+
+/*
+ * Operations vector for selecting different types of tests.
+ */
+
+struct rcu_torture_ops {
+       void (*init)(void);
+       int (*readlock)(void);
+       void (*read_delay)(struct rcu_random_state *rrsp);
+       void (*readunlock)(int idx);
+       int (*completed)(void);
+       void (*deferred_free)(struct rcu_torture *p);
+       void (*sync)(void);
+       void (*exp_sync)(void);
+       void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+       void (*cb_barrier)(void);
+       void (*fqs)(void);
+       int (*stats)(char *page);
+       int irq_capable;
+       int can_boost;
+       const char *name;
+};
+
+static struct rcu_torture_ops *cur_ops;
+
+/*
+ * Definitions for rcu torture testing.
+ */
+
+static int rcu_torture_read_lock(void) __acquires(RCU)
+{
+       rcu_read_lock();
+       return 0;
+}
+
+static void rcu_read_delay(struct rcu_random_state *rrsp)
+{
+       const unsigned long shortdelay_us = 200;
+       const unsigned long longdelay_ms = 50;
+
+       /* We want a short delay sometimes to make a reader delay the grace
+        * period, and we want a long delay occasionally to trigger
+        * force_quiescent_state. */
+
+       if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms)))
+               mdelay(longdelay_ms);
+       if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
+               udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+       if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000)))
+               preempt_schedule();  /* No QS if preempt_disable() in effect */
+#endif
+}
+
+static void rcu_torture_read_unlock(int idx) __releases(RCU)
+{
+       rcu_read_unlock();
+}
+
+static int rcu_torture_completed(void)
+{
+       return rcu_batches_completed();
+}
+
+static void
+rcu_torture_cb(struct rcu_head *p)
+{
+       int i;
+       struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
+
+       if (fullstop != FULLSTOP_DONTSTOP) {
+               /* Test is ending, just drop callbacks on the floor. */
+               /* The next initialization will pick up the pieces. */
+               return;
+       }
+       i = rp->rtort_pipe_count;
+       if (i > RCU_TORTURE_PIPE_LEN)
+               i = RCU_TORTURE_PIPE_LEN;
+       atomic_inc(&rcu_torture_wcount[i]);
+       if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
+               rp->rtort_mbtest = 0;
+               rcu_torture_free(rp);
+       } else {
+               cur_ops->deferred_free(rp);
+       }
+}
+
+static int rcu_no_completed(void)
+{
+       return 0;
+}
+
+static void rcu_torture_deferred_free(struct rcu_torture *p)
+{
+       call_rcu(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static void rcu_sync_torture_init(void)
+{
+       INIT_LIST_HEAD(&rcu_torture_removed);
+}
+
+static struct rcu_torture_ops rcu_ops = {
+       .init           = rcu_sync_torture_init,
+       .readlock       = rcu_torture_read_lock,
+       .read_delay     = rcu_read_delay,
+       .readunlock     = rcu_torture_read_unlock,
+       .completed      = rcu_torture_completed,
+       .deferred_free  = rcu_torture_deferred_free,
+       .sync           = synchronize_rcu,
+       .exp_sync       = synchronize_rcu_expedited,
+       .call           = call_rcu,
+       .cb_barrier     = rcu_barrier,
+       .fqs            = rcu_force_quiescent_state,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
+       .name           = "rcu"
+};
+
+/*
+ * Definitions for rcu_bh torture testing.
+ */
+
+static int rcu_bh_torture_read_lock(void) __acquires(RCU_BH)
+{
+       rcu_read_lock_bh();
+       return 0;
+}
+
+static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH)
+{
+       rcu_read_unlock_bh();
+}
+
+static int rcu_bh_torture_completed(void)
+{
+       return rcu_batches_completed_bh();
+}
+
+static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
+{
+       call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static struct rcu_torture_ops rcu_bh_ops = {
+       .init           = rcu_sync_torture_init,
+       .readlock       = rcu_bh_torture_read_lock,
+       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+       .readunlock     = rcu_bh_torture_read_unlock,
+       .completed      = rcu_bh_torture_completed,
+       .deferred_free  = rcu_bh_torture_deferred_free,
+       .sync           = synchronize_rcu_bh,
+       .exp_sync       = synchronize_rcu_bh_expedited,
+       .call           = call_rcu_bh,
+       .cb_barrier     = rcu_barrier_bh,
+       .fqs            = rcu_bh_force_quiescent_state,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .name           = "rcu_bh"
+};
+
+/*
+ * Definitions for srcu torture testing.
+ */
+
+DEFINE_STATIC_SRCU(srcu_ctl);
+
+static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
+{
+       return srcu_read_lock(&srcu_ctl);
+}
+
+static void srcu_read_delay(struct rcu_random_state *rrsp)
+{
+       long delay;
+       const long uspertick = 1000000 / HZ;
+       const long longdelay = 10;
+
+       /* We want there to be long-running readers, but not all the time. */
+
+       delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
+       if (!delay)
+               schedule_timeout_interruptible(longdelay);
+       else
+               rcu_read_delay(rrsp);
+}
+
+static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
+{
+       srcu_read_unlock(&srcu_ctl, idx);
+}
+
+static int srcu_torture_completed(void)
+{
+       return srcu_batches_completed(&srcu_ctl);
+}
+
+static void srcu_torture_deferred_free(struct rcu_torture *rp)
+{
+       call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
+}
+
+static void srcu_torture_synchronize(void)
+{
+       synchronize_srcu(&srcu_ctl);
+}
+
+static void srcu_torture_call(struct rcu_head *head,
+                             void (*func)(struct rcu_head *head))
+{
+       call_srcu(&srcu_ctl, head, func);
+}
+
+static void srcu_torture_barrier(void)
+{
+       srcu_barrier(&srcu_ctl);
+}
+
+static int srcu_torture_stats(char *page)
+{
+       int cnt = 0;
+       int cpu;
+       int idx = srcu_ctl.completed & 0x1;
+
+       cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
+                      torture_type, TORTURE_FLAG, idx);
+       for_each_possible_cpu(cpu) {
+               cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu,
+                              per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
+                              per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
+       }
+       cnt += sprintf(&page[cnt], "\n");
+       return cnt;
+}
+
+static void srcu_torture_synchronize_expedited(void)
+{
+       synchronize_srcu_expedited(&srcu_ctl);
+}
+
+static struct rcu_torture_ops srcu_ops = {
+       .init           = rcu_sync_torture_init,
+       .readlock       = srcu_torture_read_lock,
+       .read_delay     = srcu_read_delay,
+       .readunlock     = srcu_torture_read_unlock,
+       .completed      = srcu_torture_completed,
+       .deferred_free  = srcu_torture_deferred_free,
+       .sync           = srcu_torture_synchronize,
+       .exp_sync       = srcu_torture_synchronize_expedited,
+       .call           = srcu_torture_call,
+       .cb_barrier     = srcu_torture_barrier,
+       .stats          = srcu_torture_stats,
+       .name           = "srcu"
+};
+
+/*
+ * Definitions for sched torture testing.
+ */
+
+static int sched_torture_read_lock(void)
+{
+       preempt_disable();
+       return 0;
+}
+
+static void sched_torture_read_unlock(int idx)
+{
+       preempt_enable();
+}
+
+static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
+{
+       call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static struct rcu_torture_ops sched_ops = {
+       .init           = rcu_sync_torture_init,
+       .readlock       = sched_torture_read_lock,
+       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+       .readunlock     = sched_torture_read_unlock,
+       .completed      = rcu_no_completed,
+       .deferred_free  = rcu_sched_torture_deferred_free,
+       .sync           = synchronize_sched,
+       .exp_sync       = synchronize_sched_expedited,
+       .call           = call_rcu_sched,
+       .cb_barrier     = rcu_barrier_sched,
+       .fqs            = rcu_sched_force_quiescent_state,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .name           = "sched"
+};
+
+/*
+ * RCU torture priority-boost testing.  Runs one real-time thread per
+ * CPU for moderate bursts, repeatedly registering RCU callbacks and
+ * spinning waiting for them to be invoked.  If a given callback takes
+ * too long to be invoked, we assume that priority inversion has occurred.
+ */
+
+struct rcu_boost_inflight {
+       struct rcu_head rcu;
+       int inflight;
+};
+
+static void rcu_torture_boost_cb(struct rcu_head *head)
+{
+       struct rcu_boost_inflight *rbip =
+               container_of(head, struct rcu_boost_inflight, rcu);
+
+       smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
+       rbip->inflight = 0;
+}
+
+static int rcu_torture_boost(void *arg)
+{
+       unsigned long call_rcu_time;
+       unsigned long endtime;
+       unsigned long oldstarttime;
+       struct rcu_boost_inflight rbi = { .inflight = 0 };
+       struct sched_param sp;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_boost started");
+
+       /* Set real-time priority. */
+       sp.sched_priority = 1;
+       if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
+               n_rcu_torture_boost_rterror++;
+       }
+
+       init_rcu_head_on_stack(&rbi.rcu);
+       /* Each pass through the following loop does one boost-test cycle. */
+       do {
+               /* Wait for the next test interval. */
+               oldstarttime = boost_starttime;
+               while (ULONG_CMP_LT(jiffies, oldstarttime)) {
+                       schedule_timeout_interruptible(oldstarttime - jiffies);
+                       rcu_stutter_wait("rcu_torture_boost");
+                       if (kthread_should_stop() ||
+                           fullstop != FULLSTOP_DONTSTOP)
+                               goto checkwait;
+               }
+
+               /* Do one boost-test interval. */
+               endtime = oldstarttime + test_boost_duration * HZ;
+               call_rcu_time = jiffies;
+               while (ULONG_CMP_LT(jiffies, endtime)) {
+                       /* If we don't have a callback in flight, post one. */
+                       if (!rbi.inflight) {
+                               smp_mb(); /* RCU core before ->inflight = 1. */
+                               rbi.inflight = 1;
+                               call_rcu(&rbi.rcu, rcu_torture_boost_cb);
+                               if (jiffies - call_rcu_time >
+                                        test_boost_duration * HZ - HZ / 2) {
+                                       VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
+                                       n_rcu_torture_boost_failure++;
+                               }
+                               call_rcu_time = jiffies;
+                       }
+                       cond_resched();
+                       rcu_stutter_wait("rcu_torture_boost");
+                       if (kthread_should_stop() ||
+                           fullstop != FULLSTOP_DONTSTOP)
+                               goto checkwait;
+               }
+
+               /*
+                * Set the start time of the next test interval.
+                * Yes, this is vulnerable to long delays, but such
+                * delays simply cause a false negative for the next
+                * interval.  Besides, we are running at RT priority,
+                * so delays should be relatively rare.
+                */
+               while (oldstarttime == boost_starttime &&
+                      !kthread_should_stop()) {
+                       if (mutex_trylock(&boost_mutex)) {
+                               boost_starttime = jiffies +
+                                                 test_boost_interval * HZ;
+                               n_rcu_torture_boosts++;
+                               mutex_unlock(&boost_mutex);
+                               break;
+                       }
+                       schedule_timeout_uninterruptible(1);
+               }
+
+               /* Go do the stutter. */
+checkwait:     rcu_stutter_wait("rcu_torture_boost");
+       } while (!kthread_should_stop() && fullstop  == FULLSTOP_DONTSTOP);
+
+       /* Clean up and exit. */
+       VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_boost");
+       while (!kthread_should_stop() || rbi.inflight)
+               schedule_timeout_uninterruptible(1);
+       smp_mb(); /* order accesses to ->inflight before stack-frame death. */
+       destroy_rcu_head_on_stack(&rbi.rcu);
+       return 0;
+}
+
+/*
+ * RCU torture force-quiescent-state kthread.  Repeatedly induces
+ * bursts of calls to force_quiescent_state(), increasing the probability
+ * of occurrence of some important types of race conditions.
+ */
+static int
+rcu_torture_fqs(void *arg)
+{
+       unsigned long fqs_resume_time;
+       int fqs_burst_remaining;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
+       do {
+               fqs_resume_time = jiffies + fqs_stutter * HZ;
+               while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
+                      !kthread_should_stop()) {
+                       schedule_timeout_interruptible(1);
+               }
+               fqs_burst_remaining = fqs_duration;
+               while (fqs_burst_remaining > 0 &&
+                      !kthread_should_stop()) {
+                       cur_ops->fqs();
+                       udelay(fqs_holdoff);
+                       fqs_burst_remaining -= fqs_holdoff;
+               }
+               rcu_stutter_wait("rcu_torture_fqs");
+       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+       VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_fqs");
+       while (!kthread_should_stop())
+               schedule_timeout_uninterruptible(1);
+       return 0;
+}
+
+/*
+ * RCU torture writer kthread.  Repeatedly substitutes a new structure
+ * for that pointed to by rcu_torture_current, freeing the old structure
+ * after a series of grace periods (the "pipeline").
+ */
+static int
+rcu_torture_writer(void *arg)
+{
+       bool exp;
+       int i;
+       struct rcu_torture *rp;
+       struct rcu_torture *rp1;
+       struct rcu_torture *old_rp;
+       static DEFINE_RCU_RANDOM(rand);
+
+       VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
+       set_user_nice(current, 19);
+
+       do {
+               schedule_timeout_uninterruptible(1);
+               rp = rcu_torture_alloc();
+               if (rp == NULL)
+                       continue;
+               rp->rtort_pipe_count = 0;
+               udelay(rcu_random(&rand) & 0x3ff);
+               old_rp = rcu_dereference_check(rcu_torture_current,
+                                              current == writer_task);
+               rp->rtort_mbtest = 1;
+               rcu_assign_pointer(rcu_torture_current, rp);
+               smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
+               if (old_rp) {
+                       i = old_rp->rtort_pipe_count;
+                       if (i > RCU_TORTURE_PIPE_LEN)
+                               i = RCU_TORTURE_PIPE_LEN;
+                       atomic_inc(&rcu_torture_wcount[i]);
+                       old_rp->rtort_pipe_count++;
+                       if (gp_normal == gp_exp)
+                               exp = !!(rcu_random(&rand) & 0x80);
+                       else
+                               exp = gp_exp;
+                       if (!exp) {
+                               cur_ops->deferred_free(old_rp);
+                       } else {
+                               cur_ops->exp_sync();
+                               list_add(&old_rp->rtort_free,
+                                        &rcu_torture_removed);
+                               list_for_each_entry_safe(rp, rp1,
+                                                        &rcu_torture_removed,
+                                                        rtort_free) {
+                                       i = rp->rtort_pipe_count;
+                                       if (i > RCU_TORTURE_PIPE_LEN)
+                                               i = RCU_TORTURE_PIPE_LEN;
+                                       atomic_inc(&rcu_torture_wcount[i]);
+                                       if (++rp->rtort_pipe_count >=
+                                           RCU_TORTURE_PIPE_LEN) {
+                                               rp->rtort_mbtest = 0;
+                                               list_del(&rp->rtort_free);
+                                               rcu_torture_free(rp);
+                                       }
+                                }
+                       }
+               }
+               rcutorture_record_progress(++rcu_torture_current_version);
+               rcu_stutter_wait("rcu_torture_writer");
+       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+       VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_writer");
+       while (!kthread_should_stop())
+               schedule_timeout_uninterruptible(1);
+       return 0;
+}
+
+/*
+ * RCU torture fake writer kthread.  Repeatedly calls sync, with a random
+ * delay between calls.
+ */
+static int
+rcu_torture_fakewriter(void *arg)
+{
+       DEFINE_RCU_RANDOM(rand);
+
+       VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
+       set_user_nice(current, 19);
+
+       do {
+               schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
+               udelay(rcu_random(&rand) & 0x3ff);
+               if (cur_ops->cb_barrier != NULL &&
+                   rcu_random(&rand) % (nfakewriters * 8) == 0) {
+                       cur_ops->cb_barrier();
+               } else if (gp_normal == gp_exp) {
+                       if (rcu_random(&rand) & 0x80)
+                               cur_ops->sync();
+                       else
+                               cur_ops->exp_sync();
+               } else if (gp_normal) {
+                       cur_ops->sync();
+               } else {
+                       cur_ops->exp_sync();
+               }
+               rcu_stutter_wait("rcu_torture_fakewriter");
+       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+
+       VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_fakewriter");
+       while (!kthread_should_stop())
+               schedule_timeout_uninterruptible(1);
+       return 0;
+}
+
+void rcutorture_trace_dump(void)
+{
+       static atomic_t beenhere = ATOMIC_INIT(0);
+
+       if (atomic_read(&beenhere))
+               return;
+       if (atomic_xchg(&beenhere, 1) != 0)
+               return;
+       ftrace_dump(DUMP_ALL);
+}
+
+/*
+ * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
+ * incrementing the corresponding element of the pipeline array.  The
+ * counter in the element should never be greater than 1, otherwise, the
+ * RCU implementation is broken.
+ */
+static void rcu_torture_timer(unsigned long unused)
+{
+       int idx;
+       int completed;
+       int completed_end;
+       static DEFINE_RCU_RANDOM(rand);
+       static DEFINE_SPINLOCK(rand_lock);
+       struct rcu_torture *p;
+       int pipe_count;
+       unsigned long long ts;
+
+       idx = cur_ops->readlock();
+       completed = cur_ops->completed();
+       ts = rcu_trace_clock_local();
+       p = rcu_dereference_check(rcu_torture_current,
+                                 rcu_read_lock_bh_held() ||
+                                 rcu_read_lock_sched_held() ||
+                                 srcu_read_lock_held(&srcu_ctl));
+       if (p == NULL) {
+               /* Leave because rcu_torture_writer is not yet underway */
+               cur_ops->readunlock(idx);
+               return;
+       }
+       if (p->rtort_mbtest == 0)
+               atomic_inc(&n_rcu_torture_mberror);
+       spin_lock(&rand_lock);
+       cur_ops->read_delay(&rand);
+       n_rcu_torture_timers++;
+       spin_unlock(&rand_lock);
+       preempt_disable();
+       pipe_count = p->rtort_pipe_count;
+       if (pipe_count > RCU_TORTURE_PIPE_LEN) {
+               /* Should not happen, but... */
+               pipe_count = RCU_TORTURE_PIPE_LEN;
+       }
+       completed_end = cur_ops->completed();
+       if (pipe_count > 1) {
+               do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
+                                         completed, completed_end);
+               rcutorture_trace_dump();
+       }
+       __this_cpu_inc(rcu_torture_count[pipe_count]);
+       completed = completed_end - completed;
+       if (completed > RCU_TORTURE_PIPE_LEN) {
+               /* Should not happen, but... */
+               completed = RCU_TORTURE_PIPE_LEN;
+       }
+       __this_cpu_inc(rcu_torture_batch[completed]);
+       preempt_enable();
+       cur_ops->readunlock(idx);
+}
+
+/*
+ * RCU torture reader kthread.  Repeatedly dereferences rcu_torture_current,
+ * incrementing the corresponding element of the pipeline array.  The
+ * counter in the element should never be greater than 1, otherwise, the
+ * RCU implementation is broken.
+ */
+static int
+rcu_torture_reader(void *arg)
+{
+       int completed;
+       int completed_end;
+       int idx;
+       DEFINE_RCU_RANDOM(rand);
+       struct rcu_torture *p;
+       int pipe_count;
+       struct timer_list t;
+       unsigned long long ts;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
+       set_user_nice(current, 19);
+       if (irqreader && cur_ops->irq_capable)
+               setup_timer_on_stack(&t, rcu_torture_timer, 0);
+
+       do {
+               if (irqreader && cur_ops->irq_capable) {
+                       if (!timer_pending(&t))
+                               mod_timer(&t, jiffies + 1);
+               }
+               idx = cur_ops->readlock();
+               completed = cur_ops->completed();
+               ts = rcu_trace_clock_local();
+               p = rcu_dereference_check(rcu_torture_current,
+                                         rcu_read_lock_bh_held() ||
+                                         rcu_read_lock_sched_held() ||
+                                         srcu_read_lock_held(&srcu_ctl));
+               if (p == NULL) {
+                       /* Wait for rcu_torture_writer to get underway */
+                       cur_ops->readunlock(idx);
+                       schedule_timeout_interruptible(HZ);
+                       continue;
+               }
+               if (p->rtort_mbtest == 0)
+                       atomic_inc(&n_rcu_torture_mberror);
+               cur_ops->read_delay(&rand);
+               preempt_disable();
+               pipe_count = p->rtort_pipe_count;
+               if (pipe_count > RCU_TORTURE_PIPE_LEN) {
+                       /* Should not happen, but... */
+                       pipe_count = RCU_TORTURE_PIPE_LEN;
+               }
+               completed_end = cur_ops->completed();
+               if (pipe_count > 1) {
+                       do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
+                                                 ts, completed, completed_end);
+                       rcutorture_trace_dump();
+               }
+               __this_cpu_inc(rcu_torture_count[pipe_count]);
+               completed = completed_end - completed;
+               if (completed > RCU_TORTURE_PIPE_LEN) {
+                       /* Should not happen, but... */
+                       completed = RCU_TORTURE_PIPE_LEN;
+               }
+               __this_cpu_inc(rcu_torture_batch[completed]);
+               preempt_enable();
+               cur_ops->readunlock(idx);
+               schedule();
+               rcu_stutter_wait("rcu_torture_reader");
+       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+       VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_reader");
+       if (irqreader && cur_ops->irq_capable)
+               del_timer_sync(&t);
+       while (!kthread_should_stop())
+               schedule_timeout_uninterruptible(1);
+       return 0;
+}
+
+/*
+ * Create an RCU-torture statistics message in the specified buffer.
+ */
+static int
+rcu_torture_printk(char *page)
+{
+       int cnt = 0;
+       int cpu;
+       int i;
+       long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+       long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+
+       for_each_possible_cpu(cpu) {
+               for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
+                       pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i];
+                       batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i];
+               }
+       }
+       for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) {
+               if (pipesummary[i] != 0)
+                       break;
+       }
+       cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
+       cnt += sprintf(&page[cnt],
+                      "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
+                      rcu_torture_current,
+                      rcu_torture_current_version,
+                      list_empty(&rcu_torture_freelist),
+                      atomic_read(&n_rcu_torture_alloc),
+                      atomic_read(&n_rcu_torture_alloc_fail),
+                      atomic_read(&n_rcu_torture_free));
+       cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
+                      atomic_read(&n_rcu_torture_mberror),
+                      n_rcu_torture_boost_ktrerror,
+                      n_rcu_torture_boost_rterror);
+       cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
+                      n_rcu_torture_boost_failure,
+                      n_rcu_torture_boosts,
+                      n_rcu_torture_timers);
+       cnt += sprintf(&page[cnt],
+                      "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
+                      n_online_successes, n_online_attempts,
+                      n_offline_successes, n_offline_attempts,
+                      min_online, max_online,
+                      min_offline, max_offline,
+                      sum_online, sum_offline, HZ);
+       cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
+                      n_barrier_successes,
+                      n_barrier_attempts,
+                      n_rcu_torture_barrier_error);
+       cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
+       if (atomic_read(&n_rcu_torture_mberror) != 0 ||
+           n_rcu_torture_barrier_error != 0 ||
+           n_rcu_torture_boost_ktrerror != 0 ||
+           n_rcu_torture_boost_rterror != 0 ||
+           n_rcu_torture_boost_failure != 0 ||
+           i > 1) {
+               cnt += sprintf(&page[cnt], "!!! ");
+               atomic_inc(&n_rcu_torture_error);
+               WARN_ON_ONCE(1);
+       }
+       cnt += sprintf(&page[cnt], "Reader Pipe: ");
+       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
+               cnt += sprintf(&page[cnt], " %ld", pipesummary[i]);
+       cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
+       cnt += sprintf(&page[cnt], "Reader Batch: ");
+       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
+               cnt += sprintf(&page[cnt], " %ld", batchsummary[i]);
+       cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
+       cnt += sprintf(&page[cnt], "Free-Block Circulation: ");
+       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
+               cnt += sprintf(&page[cnt], " %d",
+                              atomic_read(&rcu_torture_wcount[i]));
+       }
+       cnt += sprintf(&page[cnt], "\n");
+       if (cur_ops->stats)
+               cnt += cur_ops->stats(&page[cnt]);
+       return cnt;
+}
+
+/*
+ * Print torture statistics.  Caller must ensure that there is only
+ * one call to this function at a given time!!!  This is normally
+ * accomplished by relying on the module system to only have one copy
+ * of the module loaded, and then by giving the rcu_torture_stats
+ * kthread full control (or the init/cleanup functions when rcu_torture_stats
+ * thread is not running).
+ */
+static void
+rcu_torture_stats_print(void)
+{
+       int cnt;
+
+       cnt = rcu_torture_printk(printk_buf);
+       pr_alert("%s", printk_buf);
+}
+
+/*
+ * Periodically prints torture statistics, if periodic statistics printing
+ * was specified via the stat_interval module parameter.
+ *
+ * No need to worry about fullstop here, since this one doesn't reference
+ * volatile state or register callbacks.
+ */
+static int
+rcu_torture_stats(void *arg)
+{
+       VERBOSE_PRINTK_STRING("rcu_torture_stats task started");
+       do {
+               schedule_timeout_interruptible(stat_interval * HZ);
+               rcu_torture_stats_print();
+               rcutorture_shutdown_absorb("rcu_torture_stats");
+       } while (!kthread_should_stop());
+       VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
+       return 0;
+}
+
+static int rcu_idle_cpu;       /* Force all torture tasks off this CPU */
+
+/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
+ * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
+ */
+static void rcu_torture_shuffle_tasks(void)
+{
+       int i;
+
+       cpumask_setall(shuffle_tmp_mask);
+       get_online_cpus();
+
+       /* No point in shuffling if there is only one online CPU (ex: UP) */
+       if (num_online_cpus() == 1) {
+               put_online_cpus();
+               return;
+       }
+
+       if (rcu_idle_cpu != -1)
+               cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask);
+
+       set_cpus_allowed_ptr(current, shuffle_tmp_mask);
+
+       if (reader_tasks) {
+               for (i = 0; i < nrealreaders; i++)
+                       if (reader_tasks[i])
+                               set_cpus_allowed_ptr(reader_tasks[i],
+                                                    shuffle_tmp_mask);
+       }
+       if (fakewriter_tasks) {
+               for (i = 0; i < nfakewriters; i++)
+                       if (fakewriter_tasks[i])
+                               set_cpus_allowed_ptr(fakewriter_tasks[i],
+                                                    shuffle_tmp_mask);
+       }
+       if (writer_task)
+               set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
+       if (stats_task)
+               set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
+       if (stutter_task)
+               set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
+       if (fqs_task)
+               set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
+       if (shutdown_task)
+               set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
+#ifdef CONFIG_HOTPLUG_CPU
+       if (onoff_task)
+               set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+       if (stall_task)
+               set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
+       if (barrier_cbs_tasks)
+               for (i = 0; i < n_barrier_cbs; i++)
+                       if (barrier_cbs_tasks[i])
+                               set_cpus_allowed_ptr(barrier_cbs_tasks[i],
+                                                    shuffle_tmp_mask);
+       if (barrier_task)
+               set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
+
+       if (rcu_idle_cpu == -1)
+               rcu_idle_cpu = num_online_cpus() - 1;
+       else
+               rcu_idle_cpu--;
+
+       put_online_cpus();
+}
+
+/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
+ * system to become idle at a time and cut off its timer ticks. This is meant
+ * to test the support for such tickless idle CPU in RCU.
+ */
+static int
+rcu_torture_shuffle(void *arg)
+{
+       VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started");
+       do {
+               schedule_timeout_interruptible(shuffle_interval * HZ);
+               rcu_torture_shuffle_tasks();
+               rcutorture_shutdown_absorb("rcu_torture_shuffle");
+       } while (!kthread_should_stop());
+       VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
+       return 0;
+}
+
+/* Cause the rcutorture test to "stutter", starting and stopping all
+ * threads periodically.
+ */
+static int
+rcu_torture_stutter(void *arg)
+{
+       VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
+       do {
+               schedule_timeout_interruptible(stutter * HZ);
+               stutter_pause_test = 1;
+               if (!kthread_should_stop())
+                       schedule_timeout_interruptible(stutter * HZ);
+               stutter_pause_test = 0;
+               rcutorture_shutdown_absorb("rcu_torture_stutter");
+       } while (!kthread_should_stop());
+       VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
+       return 0;
+}
+
+static inline void
+rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
+{
+       pr_alert("%s" TORTURE_FLAG
+                "--- %s: nreaders=%d nfakewriters=%d "
+                "stat_interval=%d verbose=%d test_no_idle_hz=%d "
+                "shuffle_interval=%d stutter=%d irqreader=%d "
+                "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
+                "test_boost=%d/%d test_boost_interval=%d "
+                "test_boost_duration=%d shutdown_secs=%d "
+                "stall_cpu=%d stall_cpu_holdoff=%d "
+                "n_barrier_cbs=%d "
+                "onoff_interval=%d onoff_holdoff=%d\n",
+                torture_type, tag, nrealreaders, nfakewriters,
+                stat_interval, verbose, test_no_idle_hz, shuffle_interval,
+                stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
+                test_boost, cur_ops->can_boost,
+                test_boost_interval, test_boost_duration, shutdown_secs,
+                stall_cpu, stall_cpu_holdoff,
+                n_barrier_cbs,
+                onoff_interval, onoff_holdoff);
+}
+
+static struct notifier_block rcutorture_shutdown_nb = {
+       .notifier_call = rcutorture_shutdown_notify,
+};
+
+static void rcutorture_booster_cleanup(int cpu)
+{
+       struct task_struct *t;
+
+       if (boost_tasks[cpu] == NULL)
+               return;
+       mutex_lock(&boost_mutex);
+       VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
+       t = boost_tasks[cpu];
+       boost_tasks[cpu] = NULL;
+       mutex_unlock(&boost_mutex);
+
+       /* This must be outside of the mutex, otherwise deadlock! */
+       kthread_stop(t);
+       boost_tasks[cpu] = NULL;
+}
+
+static int rcutorture_booster_init(int cpu)
+{
+       int retval;
+
+       if (boost_tasks[cpu] != NULL)
+               return 0;  /* Already created, nothing more to do. */
+
+       /* Don't allow time recalculation while creating a new task. */
+       mutex_lock(&boost_mutex);
+       VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
+       boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
+                                                 cpu_to_node(cpu),
+                                                 "rcu_torture_boost");
+       if (IS_ERR(boost_tasks[cpu])) {
+               retval = PTR_ERR(boost_tasks[cpu]);
+               VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
+               n_rcu_torture_boost_ktrerror++;
+               boost_tasks[cpu] = NULL;
+               mutex_unlock(&boost_mutex);
+               return retval;
+       }
+       kthread_bind(boost_tasks[cpu], cpu);
+       wake_up_process(boost_tasks[cpu]);
+       mutex_unlock(&boost_mutex);
+       return 0;
+}
+
+/*
+ * Cause the rcutorture test to shutdown the system after the test has
+ * run for the time specified by the shutdown_secs module parameter.
+ */
+static int
+rcu_torture_shutdown(void *arg)
+{
+       long delta;
+       unsigned long jiffies_snap;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started");
+       jiffies_snap = ACCESS_ONCE(jiffies);
+       while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
+              !kthread_should_stop()) {
+               delta = shutdown_time - jiffies_snap;
+               if (verbose)
+                       pr_alert("%s" TORTURE_FLAG
+                                "rcu_torture_shutdown task: %lu jiffies remaining\n",
+                                torture_type, delta);
+               schedule_timeout_interruptible(delta);
+               jiffies_snap = ACCESS_ONCE(jiffies);
+       }
+       if (kthread_should_stop()) {
+               VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping");
+               return 0;
+       }
+
+       /* OK, shut down the system. */
+
+       VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system");
+       shutdown_task = NULL;   /* Avoid self-kill deadlock. */
+       rcu_torture_cleanup();  /* Get the success/failure message. */
+       kernel_power_off();     /* Shut down the system. */
+       return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Execute random CPU-hotplug operations at the interval specified
+ * by the onoff_interval.
+ */
+static int
+rcu_torture_onoff(void *arg)
+{
+       int cpu;
+       unsigned long delta;
+       int maxcpu = -1;
+       DEFINE_RCU_RANDOM(rand);
+       int ret;
+       unsigned long starttime;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
+       for_each_online_cpu(cpu)
+               maxcpu = cpu;
+       WARN_ON(maxcpu < 0);
+       if (onoff_holdoff > 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff");
+               schedule_timeout_interruptible(onoff_holdoff * HZ);
+               VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff");
+       }
+       while (!kthread_should_stop()) {
+               cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
+               if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
+                       if (verbose)
+                               pr_alert("%s" TORTURE_FLAG
+                                        "rcu_torture_onoff task: offlining %d\n",
+                                        torture_type, cpu);
+                       starttime = jiffies;
+                       n_offline_attempts++;
+                       ret = cpu_down(cpu);
+                       if (ret) {
+                               if (verbose)
+                                       pr_alert("%s" TORTURE_FLAG
+                                                "rcu_torture_onoff task: offline %d failed: errno %d\n",
+                                                torture_type, cpu, ret);
+                       } else {
+                               if (verbose)
+                                       pr_alert("%s" TORTURE_FLAG
+                                                "rcu_torture_onoff task: offlined %d\n",
+                                                torture_type, cpu);
+                               n_offline_successes++;
+                               delta = jiffies - starttime;
+                               sum_offline += delta;
+                               if (min_offline < 0) {
+                                       min_offline = delta;
+                                       max_offline = delta;
+                               }
+                               if (min_offline > delta)
+                                       min_offline = delta;
+                               if (max_offline < delta)
+                                       max_offline = delta;
+                       }
+               } else if (cpu_is_hotpluggable(cpu)) {
+                       if (verbose)
+                               pr_alert("%s" TORTURE_FLAG
+                                        "rcu_torture_onoff task: onlining %d\n",
+                                        torture_type, cpu);
+                       starttime = jiffies;
+                       n_online_attempts++;
+                       ret = cpu_up(cpu);
+                       if (ret) {
+                               if (verbose)
+                                       pr_alert("%s" TORTURE_FLAG
+                                                "rcu_torture_onoff task: online %d failed: errno %d\n",
+                                                torture_type, cpu, ret);
+                       } else {
+                               if (verbose)
+                                       pr_alert("%s" TORTURE_FLAG
+                                                "rcu_torture_onoff task: onlined %d\n",
+                                                torture_type, cpu);
+                               n_online_successes++;
+                               delta = jiffies - starttime;
+                               sum_online += delta;
+                               if (min_online < 0) {
+                                       min_online = delta;
+                                       max_online = delta;
+                               }
+                               if (min_online > delta)
+                                       min_online = delta;
+                               if (max_online < delta)
+                                       max_online = delta;
+                       }
+               }
+               schedule_timeout_interruptible(onoff_interval * HZ);
+       }
+       VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping");
+       return 0;
+}
+
+static int
+rcu_torture_onoff_init(void)
+{
+       int ret;
+
+       if (onoff_interval <= 0)
+               return 0;
+       onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
+       if (IS_ERR(onoff_task)) {
+               ret = PTR_ERR(onoff_task);
+               onoff_task = NULL;
+               return ret;
+       }
+       return 0;
+}
+
+static void rcu_torture_onoff_cleanup(void)
+{
+       if (onoff_task == NULL)
+               return;
+       VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
+       kthread_stop(onoff_task);
+       onoff_task = NULL;
+}
+
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+
+static int
+rcu_torture_onoff_init(void)
+{
+       return 0;
+}
+
+static void rcu_torture_onoff_cleanup(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * CPU-stall kthread.  It waits as specified by stall_cpu_holdoff, then
+ * induces a CPU stall for the time specified by stall_cpu.
+ */
+static int rcu_torture_stall(void *args)
+{
+       unsigned long stop_at;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_stall task started");
+       if (stall_cpu_holdoff > 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff");
+               schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
+               VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff");
+       }
+       if (!kthread_should_stop()) {
+               stop_at = get_seconds() + stall_cpu;
+               /* RCU CPU stall is expected behavior in following code. */
+               pr_alert("rcu_torture_stall start.\n");
+               rcu_read_lock();
+               preempt_disable();
+               while (ULONG_CMP_LT(get_seconds(), stop_at))
+                       continue;  /* Induce RCU CPU stall warning. */
+               preempt_enable();
+               rcu_read_unlock();
+               pr_alert("rcu_torture_stall end.\n");
+       }
+       rcutorture_shutdown_absorb("rcu_torture_stall");
+       while (!kthread_should_stop())
+               schedule_timeout_interruptible(10 * HZ);
+       return 0;
+}
+
+/* Spawn CPU-stall kthread, if stall_cpu specified. */
+static int __init rcu_torture_stall_init(void)
+{
+       int ret;
+
+       if (stall_cpu <= 0)
+               return 0;
+       stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall");
+       if (IS_ERR(stall_task)) {
+               ret = PTR_ERR(stall_task);
+               stall_task = NULL;
+               return ret;
+       }
+       return 0;
+}
+
+/* Clean up after the CPU-stall kthread, if one was spawned. */
+static void rcu_torture_stall_cleanup(void)
+{
+       if (stall_task == NULL)
+               return;
+       VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
+       kthread_stop(stall_task);
+       stall_task = NULL;
+}
+
+/* Callback function for RCU barrier testing. */
+void rcu_torture_barrier_cbf(struct rcu_head *rcu)
+{
+       atomic_inc(&barrier_cbs_invoked);
+}
+
+/* kthread function to register callbacks used to test RCU barriers. */
+static int rcu_torture_barrier_cbs(void *arg)
+{
+       long myid = (long)arg;
+       bool lastphase = 0;
+       struct rcu_head rcu;
+
+       init_rcu_head_on_stack(&rcu);
+       VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started");
+       set_user_nice(current, 19);
+       do {
+               wait_event(barrier_cbs_wq[myid],
+                          barrier_phase != lastphase ||
+                          kthread_should_stop() ||
+                          fullstop != FULLSTOP_DONTSTOP);
+               lastphase = barrier_phase;
+               smp_mb(); /* ensure barrier_phase load before ->call(). */
+               if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
+                       break;
+               cur_ops->call(&rcu, rcu_torture_barrier_cbf);
+               if (atomic_dec_and_test(&barrier_cbs_count))
+                       wake_up(&barrier_wq);
+       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+       VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
+       while (!kthread_should_stop())
+               schedule_timeout_interruptible(1);
+       cur_ops->cb_barrier();
+       destroy_rcu_head_on_stack(&rcu);
+       return 0;
+}
+
+/* kthread function to drive and coordinate RCU barrier testing. */
+static int rcu_torture_barrier(void *arg)
+{
+       int i;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting");
+       do {
+               atomic_set(&barrier_cbs_invoked, 0);
+               atomic_set(&barrier_cbs_count, n_barrier_cbs);
+               smp_mb(); /* Ensure barrier_phase after prior assignments. */
+               barrier_phase = !barrier_phase;
+               for (i = 0; i < n_barrier_cbs; i++)
+                       wake_up(&barrier_cbs_wq[i]);
+               wait_event(barrier_wq,
+                          atomic_read(&barrier_cbs_count) == 0 ||
+                          kthread_should_stop() ||
+                          fullstop != FULLSTOP_DONTSTOP);
+               if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
+                       break;
+               n_barrier_attempts++;
+               cur_ops->cb_barrier();
+               if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
+                       n_rcu_torture_barrier_error++;
+                       WARN_ON_ONCE(1);
+               }
+               n_barrier_successes++;
+               schedule_timeout_interruptible(HZ / 10);
+       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+       VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_barrier");
+       while (!kthread_should_stop())
+               schedule_timeout_interruptible(1);
+       return 0;
+}
+
+/* Initialize RCU barrier testing. */
+static int rcu_torture_barrier_init(void)
+{
+       int i;
+       int ret;
+
+       if (n_barrier_cbs == 0)
+               return 0;
+       if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
+               pr_alert("%s" TORTURE_FLAG
+                        " Call or barrier ops missing for %s,\n",
+                        torture_type, cur_ops->name);
+               pr_alert("%s" TORTURE_FLAG
+                        " RCU barrier testing omitted from run.\n",
+                        torture_type);
+               return 0;
+       }
+       atomic_set(&barrier_cbs_count, 0);
+       atomic_set(&barrier_cbs_invoked, 0);
+       barrier_cbs_tasks =
+               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
+                       GFP_KERNEL);
+       barrier_cbs_wq =
+               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
+                       GFP_KERNEL);
+       if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
+               return -ENOMEM;
+       for (i = 0; i < n_barrier_cbs; i++) {
+               init_waitqueue_head(&barrier_cbs_wq[i]);
+               barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs,
+                                                  (void *)(long)i,
+                                                  "rcu_torture_barrier_cbs");
+               if (IS_ERR(barrier_cbs_tasks[i])) {
+                       ret = PTR_ERR(barrier_cbs_tasks[i]);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs");
+                       barrier_cbs_tasks[i] = NULL;
+                       return ret;
+               }
+       }
+       barrier_task = kthread_run(rcu_torture_barrier, NULL,
+                                  "rcu_torture_barrier");
+       if (IS_ERR(barrier_task)) {
+               ret = PTR_ERR(barrier_task);
+               VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier");
+               barrier_task = NULL;
+       }
+       return 0;
+}
+
+/* Clean up after RCU barrier testing. */
+static void rcu_torture_barrier_cleanup(void)
+{
+       int i;
+
+       if (barrier_task != NULL) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task");
+               kthread_stop(barrier_task);
+               barrier_task = NULL;
+       }
+       if (barrier_cbs_tasks != NULL) {
+               for (i = 0; i < n_barrier_cbs; i++) {
+                       if (barrier_cbs_tasks[i] != NULL) {
+                               VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task");
+                               kthread_stop(barrier_cbs_tasks[i]);
+                               barrier_cbs_tasks[i] = NULL;
+                       }
+               }
+               kfree(barrier_cbs_tasks);
+               barrier_cbs_tasks = NULL;
+       }
+       if (barrier_cbs_wq != NULL) {
+               kfree(barrier_cbs_wq);
+               barrier_cbs_wq = NULL;
+       }
+}
+
+static int rcutorture_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+       long cpu = (long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               (void)rcutorture_booster_init(cpu);
+               break;
+       case CPU_DOWN_PREPARE:
+               rcutorture_booster_cleanup(cpu);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block rcutorture_cpu_nb = {
+       .notifier_call = rcutorture_cpu_notify,
+};
+
+static void
+rcu_torture_cleanup(void)
+{
+       int i;
+
+       mutex_lock(&fullstop_mutex);
+       rcutorture_record_test_transition();
+       if (fullstop == FULLSTOP_SHUTDOWN) {
+               pr_warn(/* but going down anyway, so... */
+                      "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
+               mutex_unlock(&fullstop_mutex);
+               schedule_timeout_uninterruptible(10);
+               if (cur_ops->cb_barrier != NULL)
+                       cur_ops->cb_barrier();
+               return;
+       }
+       fullstop = FULLSTOP_RMMOD;
+       mutex_unlock(&fullstop_mutex);
+       unregister_reboot_notifier(&rcutorture_shutdown_nb);
+       rcu_torture_barrier_cleanup();
+       rcu_torture_stall_cleanup();
+       if (stutter_task) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
+               kthread_stop(stutter_task);
+       }
+       stutter_task = NULL;
+       if (shuffler_task) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
+               kthread_stop(shuffler_task);
+               free_cpumask_var(shuffle_tmp_mask);
+       }
+       shuffler_task = NULL;
+
+       if (writer_task) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
+               kthread_stop(writer_task);
+       }
+       writer_task = NULL;
+
+       if (reader_tasks) {
+               for (i = 0; i < nrealreaders; i++) {
+                       if (reader_tasks[i]) {
+                               VERBOSE_PRINTK_STRING(
+                                       "Stopping rcu_torture_reader task");
+                               kthread_stop(reader_tasks[i]);
+                       }
+                       reader_tasks[i] = NULL;
+               }
+               kfree(reader_tasks);
+               reader_tasks = NULL;
+       }
+       rcu_torture_current = NULL;
+
+       if (fakewriter_tasks) {
+               for (i = 0; i < nfakewriters; i++) {
+                       if (fakewriter_tasks[i]) {
+                               VERBOSE_PRINTK_STRING(
+                                       "Stopping rcu_torture_fakewriter task");
+                               kthread_stop(fakewriter_tasks[i]);
+                       }
+                       fakewriter_tasks[i] = NULL;
+               }
+               kfree(fakewriter_tasks);
+               fakewriter_tasks = NULL;
+       }
+
+       if (stats_task) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
+               kthread_stop(stats_task);
+       }
+       stats_task = NULL;
+
+       if (fqs_task) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
+               kthread_stop(fqs_task);
+       }
+       fqs_task = NULL;
+       if ((test_boost == 1 && cur_ops->can_boost) ||
+           test_boost == 2) {
+               unregister_cpu_notifier(&rcutorture_cpu_nb);
+               for_each_possible_cpu(i)
+                       rcutorture_booster_cleanup(i);
+       }
+       if (shutdown_task != NULL) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
+               kthread_stop(shutdown_task);
+       }
+       shutdown_task = NULL;
+       rcu_torture_onoff_cleanup();
+
+       /* Wait for all RCU callbacks to fire.  */
+
+       if (cur_ops->cb_barrier != NULL)
+               cur_ops->cb_barrier();
+
+       rcu_torture_stats_print();  /* -After- the stats thread is stopped! */
+
+       if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
+               rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
+       else if (n_online_successes != n_online_attempts ||
+                n_offline_successes != n_offline_attempts)
+               rcu_torture_print_module_parms(cur_ops,
+                                              "End of test: RCU_HOTPLUG");
+       else
+               rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
+}
+
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+static void rcu_torture_leak_cb(struct rcu_head *rhp)
+{
+}
+
+static void rcu_torture_err_cb(struct rcu_head *rhp)
+{
+       /*
+        * This -might- happen due to race conditions, but is unlikely.
+        * The scenario that leads to this happening is that the
+        * first of the pair of duplicate callbacks is queued,
+        * someone else starts a grace period that includes that
+        * callback, then the second of the pair must wait for the
+        * next grace period.  Unlikely, but can happen.  If it
+        * does happen, the debug-objects subsystem won't have splatted.
+        */
+       pr_alert("rcutorture: duplicated callback was invoked.\n");
+}
+#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+
+/*
+ * Verify that double-free causes debug-objects to complain, but only
+ * if CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.  Otherwise, say that the test
+ * cannot be carried out.
+ */
+static void rcu_test_debug_objects(void)
+{
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+       struct rcu_head rh1;
+       struct rcu_head rh2;
+
+       init_rcu_head_on_stack(&rh1);
+       init_rcu_head_on_stack(&rh2);
+       pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n");
+
+       /* Try to queue the rh2 pair of callbacks for the same grace period. */
+       preempt_disable(); /* Prevent preemption from interrupting test. */
+       rcu_read_lock(); /* Make it impossible to finish a grace period. */
+       call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */
+       local_irq_disable(); /* Make it harder to start a new grace period. */
+       call_rcu(&rh2, rcu_torture_leak_cb);
+       call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
+       local_irq_enable();
+       rcu_read_unlock();
+       preempt_enable();
+
+       /* Wait for them all to get done so we can safely return. */
+       rcu_barrier();
+       pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n");
+       destroy_rcu_head_on_stack(&rh1);
+       destroy_rcu_head_on_stack(&rh2);
+#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+       pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n");
+#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+}
+
+static int __init
+rcu_torture_init(void)
+{
+       int i;
+       int cpu;
+       int firsterr = 0;
+       int retval;
+       static struct rcu_torture_ops *torture_ops[] = {
+               &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+       };
+
+       mutex_lock(&fullstop_mutex);
+
+       /* Process args and tell the world that the torturer is on the job. */
+       for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
+               cur_ops = torture_ops[i];
+               if (strcmp(torture_type, cur_ops->name) == 0)
+                       break;
+       }
+       if (i == ARRAY_SIZE(torture_ops)) {
+               pr_alert("rcu-torture: invalid torture type: \"%s\"\n",
+                        torture_type);
+               pr_alert("rcu-torture types:");
+               for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
+                       pr_alert(" %s", torture_ops[i]->name);
+               pr_alert("\n");
+               mutex_unlock(&fullstop_mutex);
+               return -EINVAL;
+       }
+       if (cur_ops->fqs == NULL && fqs_duration != 0) {
+               pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
+               fqs_duration = 0;
+       }
+       if (cur_ops->init)
+               cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+
+       if (nreaders >= 0)
+               nrealreaders = nreaders;
+       else
+               nrealreaders = 2 * num_online_cpus();
+       rcu_torture_print_module_parms(cur_ops, "Start of test");
+       fullstop = FULLSTOP_DONTSTOP;
+
+       /* Set up the freelist. */
+
+       INIT_LIST_HEAD(&rcu_torture_freelist);
+       for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) {
+               rcu_tortures[i].rtort_mbtest = 0;
+               list_add_tail(&rcu_tortures[i].rtort_free,
+                             &rcu_torture_freelist);
+       }
+
+       /* Initialize the statistics so that each run gets its own numbers. */
+
+       rcu_torture_current = NULL;
+       rcu_torture_current_version = 0;
+       atomic_set(&n_rcu_torture_alloc, 0);
+       atomic_set(&n_rcu_torture_alloc_fail, 0);
+       atomic_set(&n_rcu_torture_free, 0);
+       atomic_set(&n_rcu_torture_mberror, 0);
+       atomic_set(&n_rcu_torture_error, 0);
+       n_rcu_torture_barrier_error = 0;
+       n_rcu_torture_boost_ktrerror = 0;
+       n_rcu_torture_boost_rterror = 0;
+       n_rcu_torture_boost_failure = 0;
+       n_rcu_torture_boosts = 0;
+       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
+               atomic_set(&rcu_torture_wcount[i], 0);
+       for_each_possible_cpu(cpu) {
+               for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
+                       per_cpu(rcu_torture_count, cpu)[i] = 0;
+                       per_cpu(rcu_torture_batch, cpu)[i] = 0;
+               }
+       }
+
+       /* Start up the kthreads. */
+
+       VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task");
+       writer_task = kthread_create(rcu_torture_writer, NULL,
+                                    "rcu_torture_writer");
+       if (IS_ERR(writer_task)) {
+               firsterr = PTR_ERR(writer_task);
+               VERBOSE_PRINTK_ERRSTRING("Failed to create writer");
+               writer_task = NULL;
+               goto unwind;
+       }
+       wake_up_process(writer_task);
+       fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
+                                  GFP_KERNEL);
+       if (fakewriter_tasks == NULL) {
+               VERBOSE_PRINTK_ERRSTRING("out of memory");
+               firsterr = -ENOMEM;
+               goto unwind;
+       }
+       for (i = 0; i < nfakewriters; i++) {
+               VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
+               fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
+                                                 "rcu_torture_fakewriter");
+               if (IS_ERR(fakewriter_tasks[i])) {
+                       firsterr = PTR_ERR(fakewriter_tasks[i]);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
+                       fakewriter_tasks[i] = NULL;
+                       goto unwind;
+               }
+       }
+       reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
+                              GFP_KERNEL);
+       if (reader_tasks == NULL) {
+               VERBOSE_PRINTK_ERRSTRING("out of memory");
+               firsterr = -ENOMEM;
+               goto unwind;
+       }
+       for (i = 0; i < nrealreaders; i++) {
+               VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task");
+               reader_tasks[i] = kthread_run(rcu_torture_reader, NULL,
+                                             "rcu_torture_reader");
+               if (IS_ERR(reader_tasks[i])) {
+                       firsterr = PTR_ERR(reader_tasks[i]);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create reader");
+                       reader_tasks[i] = NULL;
+                       goto unwind;
+               }
+       }
+       if (stat_interval > 0) {
+               VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task");
+               stats_task = kthread_run(rcu_torture_stats, NULL,
+                                       "rcu_torture_stats");
+               if (IS_ERR(stats_task)) {
+                       firsterr = PTR_ERR(stats_task);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create stats");
+                       stats_task = NULL;
+                       goto unwind;
+               }
+       }
+       if (test_no_idle_hz) {
+               rcu_idle_cpu = num_online_cpus() - 1;
+
+               if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
+                       firsterr = -ENOMEM;
+                       VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask");
+                       goto unwind;
+               }
+
+               /* Create the shuffler thread */
+               shuffler_task = kthread_run(rcu_torture_shuffle, NULL,
+                                         "rcu_torture_shuffle");
+               if (IS_ERR(shuffler_task)) {
+                       free_cpumask_var(shuffle_tmp_mask);
+                       firsterr = PTR_ERR(shuffler_task);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler");
+                       shuffler_task = NULL;
+                       goto unwind;
+               }
+       }
+       if (stutter < 0)
+               stutter = 0;
+       if (stutter) {
+               /* Create the stutter thread */
+               stutter_task = kthread_run(rcu_torture_stutter, NULL,
+                                         "rcu_torture_stutter");
+               if (IS_ERR(stutter_task)) {
+                       firsterr = PTR_ERR(stutter_task);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
+                       stutter_task = NULL;
+                       goto unwind;
+               }
+       }
+       if (fqs_duration < 0)
+               fqs_duration = 0;
+       if (fqs_duration) {
+               /* Create the stutter thread */
+               fqs_task = kthread_run(rcu_torture_fqs, NULL,
+                                      "rcu_torture_fqs");
+               if (IS_ERR(fqs_task)) {
+                       firsterr = PTR_ERR(fqs_task);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
+                       fqs_task = NULL;
+                       goto unwind;
+               }
+       }
+       if (test_boost_interval < 1)
+               test_boost_interval = 1;
+       if (test_boost_duration < 2)
+               test_boost_duration = 2;
+       if ((test_boost == 1 && cur_ops->can_boost) ||
+           test_boost == 2) {
+
+               boost_starttime = jiffies + test_boost_interval * HZ;
+               register_cpu_notifier(&rcutorture_cpu_nb);
+               for_each_possible_cpu(i) {
+                       if (cpu_is_offline(i))
+                               continue;  /* Heuristic: CPU can go offline. */
+                       retval = rcutorture_booster_init(i);
+                       if (retval < 0) {
+                               firsterr = retval;
+                               goto unwind;
+                       }
+               }
+       }
+       if (shutdown_secs > 0) {
+               shutdown_time = jiffies + shutdown_secs * HZ;
+               shutdown_task = kthread_create(rcu_torture_shutdown, NULL,
+                                              "rcu_torture_shutdown");
+               if (IS_ERR(shutdown_task)) {
+                       firsterr = PTR_ERR(shutdown_task);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
+                       shutdown_task = NULL;
+                       goto unwind;
+               }
+               wake_up_process(shutdown_task);
+       }
+       i = rcu_torture_onoff_init();
+       if (i != 0) {
+               firsterr = i;
+               goto unwind;
+       }
+       register_reboot_notifier(&rcutorture_shutdown_nb);
+       i = rcu_torture_stall_init();
+       if (i != 0) {
+               firsterr = i;
+               goto unwind;
+       }
+       retval = rcu_torture_barrier_init();
+       if (retval != 0) {
+               firsterr = retval;
+               goto unwind;
+       }
+       if (object_debug)
+               rcu_test_debug_objects();
+       rcutorture_record_test_transition();
+       mutex_unlock(&fullstop_mutex);
+       return 0;
+
+unwind:
+       mutex_unlock(&fullstop_mutex);
+       rcu_torture_cleanup();
+       return firsterr;
+}
+
+module_init(rcu_torture_init);
+module_exit(rcu_torture_cleanup);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
new file mode 100644 (file)
index 0000000..4c06ddf
--- /dev/null
@@ -0,0 +1,3416 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Authors: Dipankar Sarma <dipankar@in.ibm.com>
+ *         Manfred Spraul <manfred@colorfullife.com>
+ *         Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
+ *
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *     Documentation/RCU
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/nmi.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+#include <linux/kernel_stat.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
+#include <linux/prefetch.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
+#include <linux/random.h>
+#include <linux/ftrace_event.h>
+#include <linux/suspend.h>
+
+#include "tree.h"
+#include <trace/events/rcu.h>
+
+#include "rcu.h"
+
+MODULE_ALIAS("rcutree");
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "rcutree."
+
+/* Data structures. */
+
+static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
+static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
+
+/*
+ * In order to export the rcu_state name to the tracing tools, it
+ * needs to be added in the __tracepoint_string section.
+ * This requires defining a separate variable tp_<sname>_varname
+ * that points to the string being used, and this will allow
+ * the tracing userspace tools to be able to decipher the string
+ * address to the matching string.
+ */
+#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+static char sname##_varname[] = #sname; \
+static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
+struct rcu_state sname##_state = { \
+       .level = { &sname##_state.node[0] }, \
+       .call = cr, \
+       .fqs_state = RCU_GP_IDLE, \
+       .gpnum = 0UL - 300UL, \
+       .completed = 0UL - 300UL, \
+       .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
+       .orphan_nxttail = &sname##_state.orphan_nxtlist, \
+       .orphan_donetail = &sname##_state.orphan_donelist, \
+       .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+       .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
+       .name = sname##_varname, \
+       .abbr = sabbr, \
+}; \
+DEFINE_PER_CPU(struct rcu_data, sname##_data)
+
+RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
+RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
+
+static struct rcu_state *rcu_state;
+LIST_HEAD(rcu_struct_flavors);
+
+/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
+static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
+module_param(rcu_fanout_leaf, int, 0444);
+int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
+static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */
+       NUM_RCU_LVL_0,
+       NUM_RCU_LVL_1,
+       NUM_RCU_LVL_2,
+       NUM_RCU_LVL_3,
+       NUM_RCU_LVL_4,
+};
+int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
+
+/*
+ * The rcu_scheduler_active variable transitions from zero to one just
+ * before the first task is spawned.  So when this variable is zero, RCU
+ * can assume that there is but one task, allowing RCU to (for example)
+ * optimize synchronize_sched() to a simple barrier().  When this variable
+ * is one, RCU must actually do all the hard work required to detect real
+ * grace periods.  This variable is also used to suppress boot-time false
+ * positives from lockdep-RCU error checking.
+ */
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+
+/*
+ * The rcu_scheduler_fully_active variable transitions from zero to one
+ * during the early_initcall() processing, which is after the scheduler
+ * is capable of creating new tasks.  So RCU processing (for example,
+ * creating tasks for RCU priority boosting) must be delayed until after
+ * rcu_scheduler_fully_active transitions from zero to one.  We also
+ * currently delay invocation of any RCU callbacks until after this point.
+ *
+ * It might later prove better for people registering RCU callbacks during
+ * early boot to take responsibility for these callbacks, but one step at
+ * a time.
+ */
+static int rcu_scheduler_fully_active __read_mostly;
+
+#ifdef CONFIG_RCU_BOOST
+
+/*
+ * Control variables for per-CPU and per-rcu_node kthreads.  These
+ * handle all flavors of RCU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
+DEFINE_PER_CPU(char, rcu_cpu_has_work);
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
+static void invoke_rcu_core(void);
+static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
+
+/*
+ * Track the rcutorture test sequence number and the update version
+ * number within a given test.  The rcutorture_testseq is incremented
+ * on every rcutorture module load and unload, so has an odd value
+ * when a test is running.  The rcutorture_vernum is set to zero
+ * when rcutorture starts and is incremented on each rcutorture update.
+ * These variables enable correlating rcutorture output with the
+ * RCU tracing information.
+ */
+unsigned long rcutorture_testseq;
+unsigned long rcutorture_vernum;
+
+/*
+ * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
+ * permit this function to be invoked without holding the root rcu_node
+ * structure's ->lock, but of course results can be subject to change.
+ */
+static int rcu_gp_in_progress(struct rcu_state *rsp)
+{
+       return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
+}
+
+/*
+ * Note a quiescent state.  Because we do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period, this just sets a flag.
+ * The caller must have disabled preemption.
+ */
+void rcu_sched_qs(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
+
+       if (rdp->passed_quiesce == 0)
+               trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
+       rdp->passed_quiesce = 1;
+}
+
+void rcu_bh_qs(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+
+       if (rdp->passed_quiesce == 0)
+               trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
+       rdp->passed_quiesce = 1;
+}
+
+/*
+ * Note a context switch.  This is a quiescent state for RCU-sched,
+ * and requires special handling for preemptible RCU.
+ * The caller must have disabled preemption.
+ */
+void rcu_note_context_switch(int cpu)
+{
+       trace_rcu_utilization(TPS("Start context switch"));
+       rcu_sched_qs(cpu);
+       rcu_preempt_note_context_switch(cpu);
+       trace_rcu_utilization(TPS("End context switch"));
+}
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
+
+static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
+       .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
+       .dynticks = ATOMIC_INIT(1),
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+       .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
+       .dynticks_idle = ATOMIC_INIT(1),
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+};
+
+static long blimit = 10;       /* Maximum callbacks per rcu_do_batch. */
+static long qhimark = 10000;   /* If this many pending, ignore blimit. */
+static long qlowmark = 100;    /* Once only this many pending, use blimit. */
+
+module_param(blimit, long, 0444);
+module_param(qhimark, long, 0444);
+module_param(qlowmark, long, 0444);
+
+static ulong jiffies_till_first_fqs = ULONG_MAX;
+static ulong jiffies_till_next_fqs = ULONG_MAX;
+
+module_param(jiffies_till_first_fqs, ulong, 0644);
+module_param(jiffies_till_next_fqs, ulong, 0644);
+
+static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+                                 struct rcu_data *rdp);
+static void force_qs_rnp(struct rcu_state *rsp,
+                        int (*f)(struct rcu_data *rsp, bool *isidle,
+                                 unsigned long *maxj),
+                        bool *isidle, unsigned long *maxj);
+static void force_quiescent_state(struct rcu_state *rsp);
+static int rcu_pending(int cpu);
+
+/*
+ * Return the number of RCU-sched batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed_sched(void)
+{
+       return rcu_sched_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
+
+/*
+ * Return the number of RCU BH batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed_bh(void)
+{
+       return rcu_bh_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
+
+/*
+ * Force a quiescent state for RCU BH.
+ */
+void rcu_bh_force_quiescent_state(void)
+{
+       force_quiescent_state(&rcu_bh_state);
+}
+EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
+
+/*
+ * Record the number of times rcutorture tests have been initiated and
+ * terminated.  This information allows the debugfs tracing stats to be
+ * correlated to the rcutorture messages, even when the rcutorture module
+ * is being repeatedly loaded and unloaded.  In other words, we cannot
+ * store this state in rcutorture itself.
+ */
+void rcutorture_record_test_transition(void)
+{
+       rcutorture_testseq++;
+       rcutorture_vernum = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
+
+/*
+ * Record the number of writer passes through the current rcutorture test.
+ * This is also used to correlate debugfs tracing stats with the rcutorture
+ * messages.
+ */
+void rcutorture_record_progress(unsigned long vernum)
+{
+       rcutorture_vernum++;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_progress);
+
+/*
+ * Force a quiescent state for RCU-sched.
+ */
+void rcu_sched_force_quiescent_state(void)
+{
+       force_quiescent_state(&rcu_sched_state);
+}
+EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
+
+/*
+ * Does the CPU have callbacks ready to be invoked?
+ */
+static int
+cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
+{
+       return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
+              rdp->nxttail[RCU_DONE_TAIL] != NULL;
+}
+
+/*
+ * Does the current CPU require a not-yet-started grace period?
+ * The caller must have disabled interrupts to prevent races with
+ * normal callback registry.
+ */
+static int
+cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       int i;
+
+       if (rcu_gp_in_progress(rsp))
+               return 0;  /* No, a grace period is already in progress. */
+       if (rcu_nocb_needs_gp(rsp))
+               return 1;  /* Yes, a no-CBs CPU needs one. */
+       if (!rdp->nxttail[RCU_NEXT_TAIL])
+               return 0;  /* No, this is a no-CBs (or offline) CPU. */
+       if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
+               return 1;  /* Yes, this CPU has newly registered callbacks. */
+       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+               if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
+                   ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
+                                rdp->nxtcompleted[i]))
+                       return 1;  /* Yes, CBs for future grace period. */
+       return 0; /* No grace period needed. */
+}
+
+/*
+ * Return the root node of the specified rcu_state structure.
+ */
+static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+{
+       return &rsp->node[0];
+}
+
+/*
+ * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
+ *
+ * If the new value of the ->dynticks_nesting counter now is zero,
+ * we really have entered idle, and must do the appropriate accounting.
+ * The caller must have disabled interrupts.
+ */
+static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
+                               bool user)
+{
+       trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
+       if (!user && !is_idle_task(current)) {
+               struct task_struct *idle __maybe_unused =
+                       idle_task(smp_processor_id());
+
+               trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
+               ftrace_dump(DUMP_ORIG);
+               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+                         current->pid, current->comm,
+                         idle->pid, idle->comm); /* must be idle task! */
+       }
+       rcu_prepare_for_idle(smp_processor_id());
+       /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+       smp_mb__before_atomic_inc();  /* See above. */
+       atomic_inc(&rdtp->dynticks);
+       smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
+       WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+
+       /*
+        * It is illegal to enter an extended quiescent state while
+        * in an RCU read-side critical section.
+        */
+       rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
+                          "Illegal idle entry in RCU read-side critical section.");
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
+                          "Illegal idle entry in RCU-bh read-side critical section.");
+       rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
+                          "Illegal idle entry in RCU-sched read-side critical section.");
+}
+
+/*
+ * Enter an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
+ */
+static void rcu_eqs_enter(bool user)
+{
+       long long oldval;
+       struct rcu_dynticks *rdtp;
+
+       rdtp = this_cpu_ptr(&rcu_dynticks);
+       oldval = rdtp->dynticks_nesting;
+       WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
+       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+               rdtp->dynticks_nesting = 0;
+       else
+               rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
+       rcu_eqs_enter_common(rdtp, oldval, user);
+}
+
+/**
+ * rcu_idle_enter - inform RCU that current CPU is entering idle
+ *
+ * Enter idle mode, in other words, -leave- the mode in which RCU
+ * read-side critical sections can occur.  (Though RCU read-side
+ * critical sections can occur in irq handlers in idle, a possibility
+ * handled by irq_enter() and irq_exit().)
+ *
+ * We crowbar the ->dynticks_nesting field to zero to allow for
+ * the possibility of usermode upcalls having messed up our count
+ * of interrupt nesting level during the prior busy period.
+ */
+void rcu_idle_enter(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       rcu_eqs_enter(false);
+       rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
+
+#ifdef CONFIG_RCU_USER_QS
+/**
+ * rcu_user_enter - inform RCU that we are resuming userspace.
+ *
+ * Enter RCU idle mode right before resuming userspace.  No use of RCU
+ * is permitted between this call and rcu_user_exit(). This way the
+ * CPU doesn't need to maintain the tick for RCU maintenance purposes
+ * when the CPU runs in userspace.
+ */
+void rcu_user_enter(void)
+{
+       rcu_eqs_enter(1);
+}
+#endif /* CONFIG_RCU_USER_QS */
+
+/**
+ * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
+ *
+ * Exit from an interrupt handler, which might possibly result in entering
+ * idle mode, in other words, leaving the mode in which read-side critical
+ * sections can occur.
+ *
+ * This code assumes that the idle loop never does anything that might
+ * result in unbalanced calls to irq_enter() and irq_exit().  If your
+ * architecture violates this assumption, RCU will give you what you
+ * deserve, good and hard.  But very infrequently and irreproducibly.
+ *
+ * Use things like work queues to work around this limitation.
+ *
+ * You have been warned.
+ */
+void rcu_irq_exit(void)
+{
+       unsigned long flags;
+       long long oldval;
+       struct rcu_dynticks *rdtp;
+
+       local_irq_save(flags);
+       rdtp = this_cpu_ptr(&rcu_dynticks);
+       oldval = rdtp->dynticks_nesting;
+       rdtp->dynticks_nesting--;
+       WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
+       if (rdtp->dynticks_nesting)
+               trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
+       else
+               rcu_eqs_enter_common(rdtp, oldval, true);
+       rcu_sysidle_enter(rdtp, 1);
+       local_irq_restore(flags);
+}
+
+/*
+ * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
+ *
+ * If the new value of the ->dynticks_nesting counter was previously zero,
+ * we really have exited idle, and must do the appropriate accounting.
+ * The caller must have disabled interrupts.
+ */
+static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
+                              int user)
+{
+       smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
+       atomic_inc(&rdtp->dynticks);
+       /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
+       smp_mb__after_atomic_inc();  /* See above. */
+       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+       rcu_cleanup_after_idle(smp_processor_id());
+       trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
+       if (!user && !is_idle_task(current)) {
+               struct task_struct *idle __maybe_unused =
+                       idle_task(smp_processor_id());
+
+               trace_rcu_dyntick(TPS("Error on exit: not idle task"),
+                                 oldval, rdtp->dynticks_nesting);
+               ftrace_dump(DUMP_ORIG);
+               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+                         current->pid, current->comm,
+                         idle->pid, idle->comm); /* must be idle task! */
+       }
+}
+
+/*
+ * Exit an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
+ */
+static void rcu_eqs_exit(bool user)
+{
+       struct rcu_dynticks *rdtp;
+       long long oldval;
+
+       rdtp = this_cpu_ptr(&rcu_dynticks);
+       oldval = rdtp->dynticks_nesting;
+       WARN_ON_ONCE(oldval < 0);
+       if (oldval & DYNTICK_TASK_NEST_MASK)
+               rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
+       else
+               rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+       rcu_eqs_exit_common(rdtp, oldval, user);
+}
+
+/**
+ * rcu_idle_exit - inform RCU that current CPU is leaving idle
+ *
+ * Exit idle mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections can occur.
+ *
+ * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
+ * allow for the possibility of usermode upcalls messing up our count
+ * of interrupt nesting level during the busy period that is just
+ * now starting.
+ */
+void rcu_idle_exit(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       rcu_eqs_exit(false);
+       rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
+
+#ifdef CONFIG_RCU_USER_QS
+/**
+ * rcu_user_exit - inform RCU that we are exiting userspace.
+ *
+ * Exit RCU idle mode while entering the kernel because it can
+ * run a RCU read side critical section anytime.
+ */
+void rcu_user_exit(void)
+{
+       rcu_eqs_exit(1);
+}
+#endif /* CONFIG_RCU_USER_QS */
+
+/**
+ * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
+ *
+ * Enter an interrupt handler, which might possibly result in exiting
+ * idle mode, in other words, entering the mode in which read-side critical
+ * sections can occur.
+ *
+ * Note that the Linux kernel is fully capable of entering an interrupt
+ * handler that it never exits, for example when doing upcalls to
+ * user mode!  This code assumes that the idle loop never does upcalls to
+ * user mode.  If your architecture does do upcalls from the idle loop (or
+ * does anything else that results in unbalanced calls to the irq_enter()
+ * and irq_exit() functions), RCU will give you what you deserve, good
+ * and hard.  But very infrequently and irreproducibly.
+ *
+ * Use things like work queues to work around this limitation.
+ *
+ * You have been warned.
+ */
+void rcu_irq_enter(void)
+{
+       unsigned long flags;
+       struct rcu_dynticks *rdtp;
+       long long oldval;
+
+       local_irq_save(flags);
+       rdtp = this_cpu_ptr(&rcu_dynticks);
+       oldval = rdtp->dynticks_nesting;
+       rdtp->dynticks_nesting++;
+       WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
+       if (oldval)
+               trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
+       else
+               rcu_eqs_exit_common(rdtp, oldval, true);
+       rcu_sysidle_exit(rdtp, 1);
+       local_irq_restore(flags);
+}
+
+/**
+ * rcu_nmi_enter - inform RCU of entry to NMI context
+ *
+ * If the CPU was idle with dynamic ticks active, and there is no
+ * irq handler running, this updates rdtp->dynticks_nmi to let the
+ * RCU grace-period handling know that the CPU is active.
+ */
+void rcu_nmi_enter(void)
+{
+       struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+
+       if (rdtp->dynticks_nmi_nesting == 0 &&
+           (atomic_read(&rdtp->dynticks) & 0x1))
+               return;
+       rdtp->dynticks_nmi_nesting++;
+       smp_mb__before_atomic_inc();  /* Force delay from prior write. */
+       atomic_inc(&rdtp->dynticks);
+       /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
+       smp_mb__after_atomic_inc();  /* See above. */
+       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+}
+
+/**
+ * rcu_nmi_exit - inform RCU of exit from NMI context
+ *
+ * If the CPU was idle with dynamic ticks active, and there is no
+ * irq handler running, this updates rdtp->dynticks_nmi to let the
+ * RCU grace-period handling know that the CPU is no longer active.
+ */
+void rcu_nmi_exit(void)
+{
+       struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+
+       if (rdtp->dynticks_nmi_nesting == 0 ||
+           --rdtp->dynticks_nmi_nesting != 0)
+               return;
+       /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+       smp_mb__before_atomic_inc();  /* See above. */
+       atomic_inc(&rdtp->dynticks);
+       smp_mb__after_atomic_inc();  /* Force delay to next write. */
+       WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+}
+
+/**
+ * __rcu_is_watching - are RCU read-side critical sections safe?
+ *
+ * Return true if RCU is watching the running CPU, which means that
+ * this CPU can safely enter RCU read-side critical sections.  Unlike
+ * rcu_is_watching(), the caller of __rcu_is_watching() must have at
+ * least disabled preemption.
+ */
+bool __rcu_is_watching(void)
+{
+       return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
+}
+
+/**
+ * rcu_is_watching - see if RCU thinks that the current CPU is idle
+ *
+ * If the current CPU is in its idle loop and is neither in an interrupt
+ * or NMI handler, return true.
+ */
+bool rcu_is_watching(void)
+{
+       int ret;
+
+       preempt_disable();
+       ret = __rcu_is_watching();
+       preempt_enable();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rcu_is_watching);
+
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
+
+/*
+ * Is the current CPU online?  Disable preemption to avoid false positives
+ * that could otherwise happen due to the current CPU number being sampled,
+ * this task being preempted, its old CPU being taken offline, resuming
+ * on some other CPU, then determining that its old CPU is now offline.
+ * It is OK to use RCU on an offline processor during initial boot, hence
+ * the check for rcu_scheduler_fully_active.  Note also that it is OK
+ * for a CPU coming online to use RCU for one jiffy prior to marking itself
+ * online in the cpu_online_mask.  Similarly, it is OK for a CPU going
+ * offline to continue to use RCU for one jiffy after marking itself
+ * offline in the cpu_online_mask.  This leniency is necessary given the
+ * non-atomic nature of the online and offline processing, for example,
+ * the fact that a CPU enters the scheduler after completing the CPU_DYING
+ * notifiers.
+ *
+ * This is also why RCU internally marks CPUs online during the
+ * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
+ *
+ * Disable checking if in an NMI handler because we cannot safely report
+ * errors from NMI handlers anyway.
+ */
+bool rcu_lockdep_current_cpu_online(void)
+{
+       struct rcu_data *rdp;
+       struct rcu_node *rnp;
+       bool ret;
+
+       if (in_nmi())
+               return 1;
+       preempt_disable();
+       rdp = this_cpu_ptr(&rcu_sched_data);
+       rnp = rdp->mynode;
+       ret = (rdp->grpmask & rnp->qsmaskinit) ||
+             !rcu_scheduler_fully_active;
+       preempt_enable();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
+
+#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
+
+/**
+ * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ *
+ * If the current CPU is idle or running at a first-level (not nested)
+ * interrupt from idle, return true.  The caller must have at least
+ * disabled preemption.
+ */
+static int rcu_is_cpu_rrupt_from_idle(void)
+{
+       return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
+}
+
+/*
+ * Snapshot the specified CPU's dynticks counter so that we can later
+ * credit them with an implicit quiescent state.  Return 1 if this CPU
+ * is in dynticks idle mode, which is an extended quiescent state.
+ */
+static int dyntick_save_progress_counter(struct rcu_data *rdp,
+                                        bool *isidle, unsigned long *maxj)
+{
+       rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+       rcu_sysidle_check_cpu(rdp, isidle, maxj);
+       return (rdp->dynticks_snap & 0x1) == 0;
+}
+
+/*
+ * Return true if the specified CPU has passed through a quiescent
+ * state by virtue of being in or having passed through an dynticks
+ * idle state since the last call to dyntick_save_progress_counter()
+ * for this same CPU, or by virtue of having been offline.
+ */
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
+                                   bool *isidle, unsigned long *maxj)
+{
+       unsigned int curr;
+       unsigned int snap;
+
+       curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
+       snap = (unsigned int)rdp->dynticks_snap;
+
+       /*
+        * If the CPU passed through or entered a dynticks idle phase with
+        * no active irq/NMI handlers, then we can safely pretend that the CPU
+        * already acknowledged the request to pass through a quiescent
+        * state.  Either way, that CPU cannot possibly be in an RCU
+        * read-side critical section that started before the beginning
+        * of the current RCU grace period.
+        */
+       if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
+               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
+               rdp->dynticks_fqs++;
+               return 1;
+       }
+
+       /*
+        * Check for the CPU being offline, but only if the grace period
+        * is old enough.  We don't need to worry about the CPU changing
+        * state: If we see it offline even once, it has been through a
+        * quiescent state.
+        *
+        * The reason for insisting that the grace period be at least
+        * one jiffy old is that CPUs that are not quite online and that
+        * have just gone offline can still execute RCU read-side critical
+        * sections.
+        */
+       if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
+               return 0;  /* Grace period is not old enough. */
+       barrier();
+       if (cpu_is_offline(rdp->cpu)) {
+               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
+               rdp->offline_fqs++;
+               return 1;
+       }
+
+       /*
+        * There is a possibility that a CPU in adaptive-ticks state
+        * might run in the kernel with the scheduling-clock tick disabled
+        * for an extended time period.  Invoke rcu_kick_nohz_cpu() to
+        * force the CPU to restart the scheduling-clock tick in this
+        * CPU is in this state.
+        */
+       rcu_kick_nohz_cpu(rdp->cpu);
+
+       return 0;
+}
+
+static void record_gp_stall_check_time(struct rcu_state *rsp)
+{
+       unsigned long j = ACCESS_ONCE(jiffies);
+
+       rsp->gp_start = j;
+       smp_wmb(); /* Record start time before stall time. */
+       rsp->jiffies_stall = j + rcu_jiffies_till_stall_check();
+}
+
+/*
+ * Dump stacks of all tasks running on stalled CPUs.  This is a fallback
+ * for architectures that do not implement trigger_all_cpu_backtrace().
+ * The NMI-triggered stack traces are more accurate because they are
+ * printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
+{
+       int cpu;
+       unsigned long flags;
+       struct rcu_node *rnp;
+
+       rcu_for_each_leaf_node(rsp, rnp) {
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               if (rnp->qsmask != 0) {
+                       for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+                               if (rnp->qsmask & (1UL << cpu))
+                                       dump_cpu_task(rnp->grplo + cpu);
+               }
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       }
+}
+
+static void print_other_cpu_stall(struct rcu_state *rsp)
+{
+       int cpu;
+       long delta;
+       unsigned long flags;
+       int ndetected = 0;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+       long totqlen = 0;
+
+       /* Only let one CPU complain about others per time interval. */
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       delta = jiffies - rsp->jiffies_stall;
+       if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return;
+       }
+       rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       /*
+        * OK, time to rat on our buddy...
+        * See Documentation/RCU/stallwarn.txt for info on how to debug
+        * RCU CPU stall warnings.
+        */
+       pr_err("INFO: %s detected stalls on CPUs/tasks:",
+              rsp->name);
+       print_cpu_stall_info_begin();
+       rcu_for_each_leaf_node(rsp, rnp) {
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               ndetected += rcu_print_task_stall(rnp);
+               if (rnp->qsmask != 0) {
+                       for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+                               if (rnp->qsmask & (1UL << cpu)) {
+                                       print_cpu_stall_info(rsp,
+                                                            rnp->grplo + cpu);
+                                       ndetected++;
+                               }
+               }
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       }
+
+       /*
+        * Now rat on any tasks that got kicked up to the root rcu_node
+        * due to CPU offlining.
+        */
+       rnp = rcu_get_root(rsp);
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       ndetected += rcu_print_task_stall(rnp);
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       print_cpu_stall_info_end();
+       for_each_possible_cpu(cpu)
+               totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+       pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
+              smp_processor_id(), (long)(jiffies - rsp->gp_start),
+              rsp->gpnum, rsp->completed, totqlen);
+       if (ndetected == 0)
+               pr_err("INFO: Stall ended before state dump start\n");
+       else if (!trigger_all_cpu_backtrace())
+               rcu_dump_cpu_stacks(rsp);
+
+       /* Complain about tasks blocking the grace period. */
+
+       rcu_print_detail_task_stall(rsp);
+
+       force_quiescent_state(rsp);  /* Kick them all. */
+}
+
+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
+static void print_cpu_stall(struct rcu_state *rsp)
+{
+       int cpu;
+       unsigned long flags;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+       long totqlen = 0;
+
+       /*
+        * OK, time to rat on ourselves...
+        * See Documentation/RCU/stallwarn.txt for info on how to debug
+        * RCU CPU stall warnings.
+        */
+       pr_err("INFO: %s self-detected stall on CPU", rsp->name);
+       print_cpu_stall_info_begin();
+       print_cpu_stall_info(rsp, smp_processor_id());
+       print_cpu_stall_info_end();
+       for_each_possible_cpu(cpu)
+               totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+       pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
+               jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
+       if (!trigger_all_cpu_backtrace())
+               dump_stack();
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
+               rsp->jiffies_stall = jiffies +
+                                    3 * rcu_jiffies_till_stall_check() + 3;
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       /*
+        * Attempt to revive the RCU machinery by forcing a context switch.
+        *
+        * A context switch would normally allow the RCU state machine to make
+        * progress and it could be we're stuck in kernel space without context
+        * switches for an entirely unreasonable amount of time.
+        */
+       resched_cpu(smp_processor_id());
+}
+
+static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       unsigned long completed;
+       unsigned long gpnum;
+       unsigned long gps;
+       unsigned long j;
+       unsigned long js;
+       struct rcu_node *rnp;
+
+       if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
+               return;
+       j = ACCESS_ONCE(jiffies);
+
+       /*
+        * Lots of memory barriers to reject false positives.
+        *
+        * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall,
+        * then rsp->gp_start, and finally rsp->completed.  These values
+        * are updated in the opposite order with memory barriers (or
+        * equivalent) during grace-period initialization and cleanup.
+        * Now, a false positive can occur if we get an new value of
+        * rsp->gp_start and a old value of rsp->jiffies_stall.  But given
+        * the memory barriers, the only way that this can happen is if one
+        * grace period ends and another starts between these two fetches.
+        * Detect this by comparing rsp->completed with the previous fetch
+        * from rsp->gpnum.
+        *
+        * Given this check, comparisons of jiffies, rsp->jiffies_stall,
+        * and rsp->gp_start suffice to forestall false positives.
+        */
+       gpnum = ACCESS_ONCE(rsp->gpnum);
+       smp_rmb(); /* Pick up ->gpnum first... */
+       js = ACCESS_ONCE(rsp->jiffies_stall);
+       smp_rmb(); /* ...then ->jiffies_stall before the rest... */
+       gps = ACCESS_ONCE(rsp->gp_start);
+       smp_rmb(); /* ...and finally ->gp_start before ->completed. */
+       completed = ACCESS_ONCE(rsp->completed);
+       if (ULONG_CMP_GE(completed, gpnum) ||
+           ULONG_CMP_LT(j, js) ||
+           ULONG_CMP_GE(gps, js))
+               return; /* No stall or GP completed since entering function. */
+       rnp = rdp->mynode;
+       if (rcu_gp_in_progress(rsp) &&
+           (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) {
+
+               /* We haven't checked in, so go dump stack. */
+               print_cpu_stall(rsp);
+
+       } else if (rcu_gp_in_progress(rsp) &&
+                  ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
+
+               /* They had a few time units to dump stack, so complain. */
+               print_other_cpu_stall(rsp);
+       }
+}
+
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+       struct rcu_state *rsp;
+
+       for_each_rcu_flavor(rsp)
+               rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
+}
+
+/*
+ * Initialize the specified rcu_data structure's callback list to empty.
+ */
+static void init_callback_list(struct rcu_data *rdp)
+{
+       int i;
+
+       if (init_nocb_callback_list(rdp))
+               return;
+       rdp->nxtlist = NULL;
+       for (i = 0; i < RCU_NEXT_SIZE; i++)
+               rdp->nxttail[i] = &rdp->nxtlist;
+}
+
+/*
+ * Determine the value that ->completed will have at the end of the
+ * next subsequent grace period.  This is used to tag callbacks so that
+ * a CPU can invoke callbacks in a timely fashion even if that CPU has
+ * been dyntick-idle for an extended period with callbacks under the
+ * influence of RCU_FAST_NO_HZ.
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
+                                      struct rcu_node *rnp)
+{
+       /*
+        * If RCU is idle, we just wait for the next grace period.
+        * But we can only be sure that RCU is idle if we are looking
+        * at the root rcu_node structure -- otherwise, a new grace
+        * period might have started, but just not yet gotten around
+        * to initializing the current non-root rcu_node structure.
+        */
+       if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
+               return rnp->completed + 1;
+
+       /*
+        * Otherwise, wait for a possible partial grace period and
+        * then the subsequent full grace period.
+        */
+       return rnp->completed + 2;
+}
+
+/*
+ * Trace-event helper function for rcu_start_future_gp() and
+ * rcu_nocb_wait_gp().
+ */
+static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+                               unsigned long c, const char *s)
+{
+       trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
+                                     rnp->completed, c, rnp->level,
+                                     rnp->grplo, rnp->grphi, s);
+}
+
+/*
+ * Start some future grace period, as needed to handle newly arrived
+ * callbacks.  The required future grace periods are recorded in each
+ * rcu_node structure's ->need_future_gp field.
+ *
+ * The caller must hold the specified rcu_node structure's ->lock.
+ */
+static unsigned long __maybe_unused
+rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
+{
+       unsigned long c;
+       int i;
+       struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+
+       /*
+        * Pick up grace-period number for new callbacks.  If this
+        * grace period is already marked as needed, return to the caller.
+        */
+       c = rcu_cbs_completed(rdp->rsp, rnp);
+       trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
+       if (rnp->need_future_gp[c & 0x1]) {
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
+               return c;
+       }
+
+       /*
+        * If either this rcu_node structure or the root rcu_node structure
+        * believe that a grace period is in progress, then we must wait
+        * for the one following, which is in "c".  Because our request
+        * will be noticed at the end of the current grace period, we don't
+        * need to explicitly start one.
+        */
+       if (rnp->gpnum != rnp->completed ||
+           ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
+               rnp->need_future_gp[c & 0x1]++;
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
+               return c;
+       }
+
+       /*
+        * There might be no grace period in progress.  If we don't already
+        * hold it, acquire the root rcu_node structure's lock in order to
+        * start one (if needed).
+        */
+       if (rnp != rnp_root)
+               raw_spin_lock(&rnp_root->lock);
+
+       /*
+        * Get a new grace-period number.  If there really is no grace
+        * period in progress, it will be smaller than the one we obtained
+        * earlier.  Adjust callbacks as needed.  Note that even no-CBs
+        * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
+        */
+       c = rcu_cbs_completed(rdp->rsp, rnp_root);
+       for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
+               if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
+                       rdp->nxtcompleted[i] = c;
+
+       /*
+        * If the needed for the required grace period is already
+        * recorded, trace and leave.
+        */
+       if (rnp_root->need_future_gp[c & 0x1]) {
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
+               goto unlock_out;
+       }
+
+       /* Record the need for the future grace period. */
+       rnp_root->need_future_gp[c & 0x1]++;
+
+       /* If a grace period is not already in progress, start one. */
+       if (rnp_root->gpnum != rnp_root->completed) {
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
+       } else {
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
+               rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+       }
+unlock_out:
+       if (rnp != rnp_root)
+               raw_spin_unlock(&rnp_root->lock);
+       return c;
+}
+
+/*
+ * Clean up any old requests for the just-ended grace period.  Also return
+ * whether any additional grace periods have been requested.  Also invoke
+ * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
+ * waiting for this grace period to complete.
+ */
+static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+       int c = rnp->completed;
+       int needmore;
+       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+
+       rcu_nocb_gp_cleanup(rsp, rnp);
+       rnp->need_future_gp[c & 0x1] = 0;
+       needmore = rnp->need_future_gp[(c + 1) & 0x1];
+       trace_rcu_future_gp(rnp, rdp, c,
+                           needmore ? TPS("CleanupMore") : TPS("Cleanup"));
+       return needmore;
+}
+
+/*
+ * If there is room, assign a ->completed number to any callbacks on
+ * this CPU that have not already been assigned.  Also accelerate any
+ * callbacks that were previously assigned a ->completed number that has
+ * since proven to be too conservative, which can happen if callbacks get
+ * assigned a ->completed number while RCU is idle, but with reference to
+ * a non-root rcu_node structure.  This function is idempotent, so it does
+ * not hurt to call it repeatedly.
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+                              struct rcu_data *rdp)
+{
+       unsigned long c;
+       int i;
+
+       /* If the CPU has no callbacks, nothing to do. */
+       if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
+               return;
+
+       /*
+        * Starting from the sublist containing the callbacks most
+        * recently assigned a ->completed number and working down, find the
+        * first sublist that is not assignable to an upcoming grace period.
+        * Such a sublist has something in it (first two tests) and has
+        * a ->completed number assigned that will complete sooner than
+        * the ->completed number for newly arrived callbacks (last test).
+        *
+        * The key point is that any later sublist can be assigned the
+        * same ->completed number as the newly arrived callbacks, which
+        * means that the callbacks in any of these later sublist can be
+        * grouped into a single sublist, whether or not they have already
+        * been assigned a ->completed number.
+        */
+       c = rcu_cbs_completed(rsp, rnp);
+       for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
+               if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
+                   !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
+                       break;
+
+       /*
+        * If there are no sublist for unassigned callbacks, leave.
+        * At the same time, advance "i" one sublist, so that "i" will
+        * index into the sublist where all the remaining callbacks should
+        * be grouped into.
+        */
+       if (++i >= RCU_NEXT_TAIL)
+               return;
+
+       /*
+        * Assign all subsequent callbacks' ->completed number to the next
+        * full grace period and group them all in the sublist initially
+        * indexed by "i".
+        */
+       for (; i <= RCU_NEXT_TAIL; i++) {
+               rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
+               rdp->nxtcompleted[i] = c;
+       }
+       /* Record any needed additional grace periods. */
+       rcu_start_future_gp(rnp, rdp);
+
+       /* Trace depending on how much we were able to accelerate. */
+       if (!*rdp->nxttail[RCU_WAIT_TAIL])
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
+       else
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
+}
+
+/*
+ * Move any callbacks whose grace period has completed to the
+ * RCU_DONE_TAIL sublist, then compact the remaining sublists and
+ * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
+ * sublist.  This function is idempotent, so it does not hurt to
+ * invoke it repeatedly.  As long as it is not invoked -too- often...
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+                           struct rcu_data *rdp)
+{
+       int i, j;
+
+       /* If the CPU has no callbacks, nothing to do. */
+       if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
+               return;
+
+       /*
+        * Find all callbacks whose ->completed numbers indicate that they
+        * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
+        */
+       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+               if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
+                       break;
+               rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
+       }
+       /* Clean up any sublist tail pointers that were misordered above. */
+       for (j = RCU_WAIT_TAIL; j < i; j++)
+               rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
+
+       /* Copy down callbacks to fill in empty sublists. */
+       for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+               if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
+                       break;
+               rdp->nxttail[j] = rdp->nxttail[i];
+               rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
+       }
+
+       /* Classify any remaining callbacks. */
+       rcu_accelerate_cbs(rsp, rnp, rdp);
+}
+
+/*
+ * Update CPU-local rcu_data state to record the beginnings and ends of
+ * grace periods.  The caller must hold the ->lock of the leaf rcu_node
+ * structure corresponding to the current CPU, and must have irqs disabled.
+ */
+static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
+{
+       /* Handle the ends of any preceding grace periods first. */
+       if (rdp->completed == rnp->completed) {
+
+               /* No grace period end, so just accelerate recent callbacks. */
+               rcu_accelerate_cbs(rsp, rnp, rdp);
+
+       } else {
+
+               /* Advance callbacks. */
+               rcu_advance_cbs(rsp, rnp, rdp);
+
+               /* Remember that we saw this grace-period completion. */
+               rdp->completed = rnp->completed;
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
+       }
+
+       if (rdp->gpnum != rnp->gpnum) {
+               /*
+                * If the current grace period is waiting for this CPU,
+                * set up to detect a quiescent state, otherwise don't
+                * go looking for one.
+                */
+               rdp->gpnum = rnp->gpnum;
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
+               rdp->passed_quiesce = 0;
+               rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
+               zero_cpu_stall_ticks(rdp);
+       }
+}
+
+static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       unsigned long flags;
+       struct rcu_node *rnp;
+
+       local_irq_save(flags);
+       rnp = rdp->mynode;
+       if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
+            rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
+           !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
+               local_irq_restore(flags);
+               return;
+       }
+       __note_gp_changes(rsp, rnp, rdp);
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Initialize a new grace period.  Return 0 if no grace period required.
+ */
+static int rcu_gp_init(struct rcu_state *rsp)
+{
+       struct rcu_data *rdp;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       rcu_bind_gp_kthread();
+       raw_spin_lock_irq(&rnp->lock);
+       if (rsp->gp_flags == 0) {
+               /* Spurious wakeup, tell caller to go back to sleep.  */
+               raw_spin_unlock_irq(&rnp->lock);
+               return 0;
+       }
+       rsp->gp_flags = 0; /* Clear all flags: New grace period. */
+
+       if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
+               /*
+                * Grace period already in progress, don't start another.
+                * Not supposed to be able to happen.
+                */
+               raw_spin_unlock_irq(&rnp->lock);
+               return 0;
+       }
+
+       /* Advance to a new grace period and initialize state. */
+       record_gp_stall_check_time(rsp);
+       smp_wmb(); /* Record GP times before starting GP. */
+       rsp->gpnum++;
+       trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
+       raw_spin_unlock_irq(&rnp->lock);
+
+       /* Exclude any concurrent CPU-hotplug operations. */
+       mutex_lock(&rsp->onoff_mutex);
+
+       /*
+        * Set the quiescent-state-needed bits in all the rcu_node
+        * structures for all currently online CPUs in breadth-first order,
+        * starting from the root rcu_node structure, relying on the layout
+        * of the tree within the rsp->node[] array.  Note that other CPUs
+        * will access only the leaves of the hierarchy, thus seeing that no
+        * grace period is in progress, at least until the corresponding
+        * leaf node has been initialized.  In addition, we have excluded
+        * CPU-hotplug operations.
+        *
+        * The grace period cannot complete until the initialization
+        * process finishes, because this kthread handles both.
+        */
+       rcu_for_each_node_breadth_first(rsp, rnp) {
+               raw_spin_lock_irq(&rnp->lock);
+               rdp = this_cpu_ptr(rsp->rda);
+               rcu_preempt_check_blocked_tasks(rnp);
+               rnp->qsmask = rnp->qsmaskinit;
+               ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
+               WARN_ON_ONCE(rnp->completed != rsp->completed);
+               ACCESS_ONCE(rnp->completed) = rsp->completed;
+               if (rnp == rdp->mynode)
+                       __note_gp_changes(rsp, rnp, rdp);
+               rcu_preempt_boost_start_gp(rnp);
+               trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
+                                           rnp->level, rnp->grplo,
+                                           rnp->grphi, rnp->qsmask);
+               raw_spin_unlock_irq(&rnp->lock);
+#ifdef CONFIG_PROVE_RCU_DELAY
+               if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
+                   system_state == SYSTEM_RUNNING)
+                       udelay(200);
+#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
+               cond_resched();
+       }
+
+       mutex_unlock(&rsp->onoff_mutex);
+       return 1;
+}
+
+/*
+ * Do one round of quiescent-state forcing.
+ */
+static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
+{
+       int fqs_state = fqs_state_in;
+       bool isidle = false;
+       unsigned long maxj;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       rsp->n_force_qs++;
+       if (fqs_state == RCU_SAVE_DYNTICK) {
+               /* Collect dyntick-idle snapshots. */
+               if (is_sysidle_rcu_state(rsp)) {
+                       isidle = 1;
+                       maxj = jiffies - ULONG_MAX / 4;
+               }
+               force_qs_rnp(rsp, dyntick_save_progress_counter,
+                            &isidle, &maxj);
+               rcu_sysidle_report_gp(rsp, isidle, maxj);
+               fqs_state = RCU_FORCE_QS;
+       } else {
+               /* Handle dyntick-idle and offline CPUs. */
+               isidle = 0;
+               force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
+       }
+       /* Clear flag to prevent immediate re-entry. */
+       if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
+               raw_spin_lock_irq(&rnp->lock);
+               rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
+               raw_spin_unlock_irq(&rnp->lock);
+       }
+       return fqs_state;
+}
+
+/*
+ * Clean up after the old grace period.
+ */
+static void rcu_gp_cleanup(struct rcu_state *rsp)
+{
+       unsigned long gp_duration;
+       int nocb = 0;
+       struct rcu_data *rdp;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       raw_spin_lock_irq(&rnp->lock);
+       gp_duration = jiffies - rsp->gp_start;
+       if (gp_duration > rsp->gp_max)
+               rsp->gp_max = gp_duration;
+
+       /*
+        * We know the grace period is complete, but to everyone else
+        * it appears to still be ongoing.  But it is also the case
+        * that to everyone else it looks like there is nothing that
+        * they can do to advance the grace period.  It is therefore
+        * safe for us to drop the lock in order to mark the grace
+        * period as completed in all of the rcu_node structures.
+        */
+       raw_spin_unlock_irq(&rnp->lock);
+
+       /*
+        * Propagate new ->completed value to rcu_node structures so
+        * that other CPUs don't have to wait until the start of the next
+        * grace period to process their callbacks.  This also avoids
+        * some nasty RCU grace-period initialization races by forcing
+        * the end of the current grace period to be completely recorded in
+        * all of the rcu_node structures before the beginning of the next
+        * grace period is recorded in any of the rcu_node structures.
+        */
+       rcu_for_each_node_breadth_first(rsp, rnp) {
+               raw_spin_lock_irq(&rnp->lock);
+               ACCESS_ONCE(rnp->completed) = rsp->gpnum;
+               rdp = this_cpu_ptr(rsp->rda);
+               if (rnp == rdp->mynode)
+                       __note_gp_changes(rsp, rnp, rdp);
+               nocb += rcu_future_gp_cleanup(rsp, rnp);
+               raw_spin_unlock_irq(&rnp->lock);
+               cond_resched();
+       }
+       rnp = rcu_get_root(rsp);
+       raw_spin_lock_irq(&rnp->lock);
+       rcu_nocb_gp_set(rnp, nocb);
+
+       rsp->completed = rsp->gpnum; /* Declare grace period done. */
+       trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
+       rsp->fqs_state = RCU_GP_IDLE;
+       rdp = this_cpu_ptr(rsp->rda);
+       rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
+       if (cpu_needs_another_gp(rsp, rdp)) {
+               rsp->gp_flags = RCU_GP_FLAG_INIT;
+               trace_rcu_grace_period(rsp->name,
+                                      ACCESS_ONCE(rsp->gpnum),
+                                      TPS("newreq"));
+       }
+       raw_spin_unlock_irq(&rnp->lock);
+}
+
+/*
+ * Body of kthread that handles grace periods.
+ */
+static int __noreturn rcu_gp_kthread(void *arg)
+{
+       int fqs_state;
+       int gf;
+       unsigned long j;
+       int ret;
+       struct rcu_state *rsp = arg;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       for (;;) {
+
+               /* Handle grace-period start. */
+               for (;;) {
+                       trace_rcu_grace_period(rsp->name,
+                                              ACCESS_ONCE(rsp->gpnum),
+                                              TPS("reqwait"));
+                       wait_event_interruptible(rsp->gp_wq,
+                                                ACCESS_ONCE(rsp->gp_flags) &
+                                                RCU_GP_FLAG_INIT);
+                       if (rcu_gp_init(rsp))
+                               break;
+                       cond_resched();
+                       flush_signals(current);
+                       trace_rcu_grace_period(rsp->name,
+                                              ACCESS_ONCE(rsp->gpnum),
+                                              TPS("reqwaitsig"));
+               }
+
+               /* Handle quiescent-state forcing. */
+               fqs_state = RCU_SAVE_DYNTICK;
+               j = jiffies_till_first_fqs;
+               if (j > HZ) {
+                       j = HZ;
+                       jiffies_till_first_fqs = HZ;
+               }
+               ret = 0;
+               for (;;) {
+                       if (!ret)
+                               rsp->jiffies_force_qs = jiffies + j;
+                       trace_rcu_grace_period(rsp->name,
+                                              ACCESS_ONCE(rsp->gpnum),
+                                              TPS("fqswait"));
+                       ret = wait_event_interruptible_timeout(rsp->gp_wq,
+                                       ((gf = ACCESS_ONCE(rsp->gp_flags)) &
+                                        RCU_GP_FLAG_FQS) ||
+                                       (!ACCESS_ONCE(rnp->qsmask) &&
+                                        !rcu_preempt_blocked_readers_cgp(rnp)),
+                                       j);
+                       /* If grace period done, leave loop. */
+                       if (!ACCESS_ONCE(rnp->qsmask) &&
+                           !rcu_preempt_blocked_readers_cgp(rnp))
+                               break;
+                       /* If time for quiescent-state forcing, do it. */
+                       if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
+                           (gf & RCU_GP_FLAG_FQS)) {
+                               trace_rcu_grace_period(rsp->name,
+                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      TPS("fqsstart"));
+                               fqs_state = rcu_gp_fqs(rsp, fqs_state);
+                               trace_rcu_grace_period(rsp->name,
+                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      TPS("fqsend"));
+                               cond_resched();
+                       } else {
+                               /* Deal with stray signal. */
+                               cond_resched();
+                               flush_signals(current);
+                               trace_rcu_grace_period(rsp->name,
+                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      TPS("fqswaitsig"));
+                       }
+                       j = jiffies_till_next_fqs;
+                       if (j > HZ) {
+                               j = HZ;
+                               jiffies_till_next_fqs = HZ;
+                       } else if (j < 1) {
+                               j = 1;
+                               jiffies_till_next_fqs = 1;
+                       }
+               }
+
+               /* Handle grace-period end. */
+               rcu_gp_cleanup(rsp);
+       }
+}
+
+static void rsp_wakeup(struct irq_work *work)
+{
+       struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
+
+       /* Wake up rcu_gp_kthread() to start the grace period. */
+       wake_up(&rsp->gp_wq);
+}
+
+/*
+ * Start a new RCU grace period if warranted, re-initializing the hierarchy
+ * in preparation for detecting the next grace period.  The caller must hold
+ * the root node's ->lock and hard irqs must be disabled.
+ *
+ * Note that it is legal for a dying CPU (which is marked as offline) to
+ * invoke this function.  This can happen when the dying CPU reports its
+ * quiescent state.
+ */
+static void
+rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+                     struct rcu_data *rdp)
+{
+       if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
+               /*
+                * Either we have not yet spawned the grace-period
+                * task, this CPU does not need another grace period,
+                * or a grace period is already in progress.
+                * Either way, don't start a new grace period.
+                */
+               return;
+       }
+       rsp->gp_flags = RCU_GP_FLAG_INIT;
+       trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
+                              TPS("newreq"));
+
+       /*
+        * We can't do wakeups while holding the rnp->lock, as that
+        * could cause possible deadlocks with the rq->lock. Defer
+        * the wakeup to interrupt context.  And don't bother waking
+        * up the running kthread.
+        */
+       if (current != rsp->gp_kthread)
+               irq_work_queue(&rsp->wakeup_work);
+}
+
+/*
+ * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
+ * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it
+ * is invoked indirectly from rcu_advance_cbs(), which would result in
+ * endless recursion -- or would do so if it wasn't for the self-deadlock
+ * that is encountered beforehand.
+ */
+static void
+rcu_start_gp(struct rcu_state *rsp)
+{
+       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       /*
+        * If there is no grace period in progress right now, any
+        * callbacks we have up to this point will be satisfied by the
+        * next grace period.  Also, advancing the callbacks reduces the
+        * probability of false positives from cpu_needs_another_gp()
+        * resulting in pointless grace periods.  So, advance callbacks
+        * then start the grace period!
+        */
+       rcu_advance_cbs(rsp, rnp, rdp);
+       rcu_start_gp_advanced(rsp, rnp, rdp);
+}
+
+/*
+ * Report a full set of quiescent states to the specified rcu_state
+ * data structure.  This involves cleaning up after the prior grace
+ * period and letting rcu_start_gp() start up the next grace period
+ * if one is needed.  Note that the caller must hold rnp->lock, which
+ * is released before return.
+ */
+static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
+       __releases(rcu_get_root(rsp)->lock)
+{
+       WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
+       raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+}
+
+/*
+ * Similar to rcu_report_qs_rdp(), for which it is a helper function.
+ * Allows quiescent states for a group of CPUs to be reported at one go
+ * to the specified rcu_node structure, though all the CPUs in the group
+ * must be represented by the same rcu_node structure (which need not be
+ * a leaf rcu_node structure, though it often will be).  That structure's
+ * lock must be held upon entry, and it is released before return.
+ */
+static void
+rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
+                 struct rcu_node *rnp, unsigned long flags)
+       __releases(rnp->lock)
+{
+       struct rcu_node *rnp_c;
+
+       /* Walk up the rcu_node hierarchy. */
+       for (;;) {
+               if (!(rnp->qsmask & mask)) {
+
+                       /* Our bit has already been cleared, so done. */
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                       return;
+               }
+               rnp->qsmask &= ~mask;
+               trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
+                                                mask, rnp->qsmask, rnp->level,
+                                                rnp->grplo, rnp->grphi,
+                                                !!rnp->gp_tasks);
+               if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
+
+                       /* Other bits still set at this level, so done. */
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                       return;
+               }
+               mask = rnp->grpmask;
+               if (rnp->parent == NULL) {
+
+                       /* No more levels.  Exit loop holding root lock. */
+
+                       break;
+               }
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               rnp_c = rnp;
+               rnp = rnp->parent;
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               WARN_ON_ONCE(rnp_c->qsmask);
+       }
+
+       /*
+        * Get here if we are the last CPU to pass through a quiescent
+        * state for this grace period.  Invoke rcu_report_qs_rsp()
+        * to clean up and start the next grace period if one is needed.
+        */
+       rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
+}
+
+/*
+ * Record a quiescent state for the specified CPU to that CPU's rcu_data
+ * structure.  This must be either called from the specified CPU, or
+ * called when the specified CPU is known to be offline (and when it is
+ * also known that no other CPU is concurrently trying to help the offline
+ * CPU).  The lastcomp argument is used to make sure we are still in the
+ * grace period of interest.  We don't want to end the current grace period
+ * based on quiescent states detected in an earlier grace period!
+ */
+static void
+rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       unsigned long flags;
+       unsigned long mask;
+       struct rcu_node *rnp;
+
+       rnp = rdp->mynode;
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
+           rnp->completed == rnp->gpnum) {
+
+               /*
+                * The grace period in which this quiescent state was
+                * recorded has ended, so don't report it upwards.
+                * We will instead need a new quiescent state that lies
+                * within the current grace period.
+                */
+               rdp->passed_quiesce = 0;        /* need qs for new gp. */
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return;
+       }
+       mask = rdp->grpmask;
+       if ((rnp->qsmask & mask) == 0) {
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       } else {
+               rdp->qs_pending = 0;
+
+               /*
+                * This GP can't end until cpu checks in, so all of our
+                * callbacks can be processed during the next GP.
+                */
+               rcu_accelerate_cbs(rsp, rnp, rdp);
+
+               rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
+       }
+}
+
+/*
+ * Check to see if there is a new grace period of which this CPU
+ * is not yet aware, and if so, set up local rcu_data state for it.
+ * Otherwise, see if this CPU has just passed through its first
+ * quiescent state for this grace period, and record that fact if so.
+ */
+static void
+rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       /* Check for grace-period ends and beginnings. */
+       note_gp_changes(rsp, rdp);
+
+       /*
+        * Does this CPU still need to do its part for current grace period?
+        * If no, return and let the other CPUs do their part as well.
+        */
+       if (!rdp->qs_pending)
+               return;
+
+       /*
+        * Was there a quiescent state since the beginning of the grace
+        * period? If no, then exit and wait for the next call.
+        */
+       if (!rdp->passed_quiesce)
+               return;
+
+       /*
+        * Tell RCU we are done (but rcu_report_qs_rdp() will be the
+        * judge of that).
+        */
+       rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Send the specified CPU's RCU callbacks to the orphanage.  The
+ * specified CPU must be offline, and the caller must hold the
+ * ->orphan_lock.
+ */
+static void
+rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
+                         struct rcu_node *rnp, struct rcu_data *rdp)
+{
+       /* No-CBs CPUs do not have orphanable callbacks. */
+       if (rcu_is_nocb_cpu(rdp->cpu))
+               return;
+
+       /*
+        * Orphan the callbacks.  First adjust the counts.  This is safe
+        * because _rcu_barrier() excludes CPU-hotplug operations, so it
+        * cannot be running now.  Thus no memory barrier is required.
+        */
+       if (rdp->nxtlist != NULL) {
+               rsp->qlen_lazy += rdp->qlen_lazy;
+               rsp->qlen += rdp->qlen;
+               rdp->n_cbs_orphaned += rdp->qlen;
+               rdp->qlen_lazy = 0;
+               ACCESS_ONCE(rdp->qlen) = 0;
+       }
+
+       /*
+        * Next, move those callbacks still needing a grace period to
+        * the orphanage, where some other CPU will pick them up.
+        * Some of the callbacks might have gone partway through a grace
+        * period, but that is too bad.  They get to start over because we
+        * cannot assume that grace periods are synchronized across CPUs.
+        * We don't bother updating the ->nxttail[] array yet, instead
+        * we just reset the whole thing later on.
+        */
+       if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
+               *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
+               rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
+               *rdp->nxttail[RCU_DONE_TAIL] = NULL;
+       }
+
+       /*
+        * Then move the ready-to-invoke callbacks to the orphanage,
+        * where some other CPU will pick them up.  These will not be
+        * required to pass though another grace period: They are done.
+        */
+       if (rdp->nxtlist != NULL) {
+               *rsp->orphan_donetail = rdp->nxtlist;
+               rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
+       }
+
+       /* Finally, initialize the rcu_data structure's list to empty.  */
+       init_callback_list(rdp);
+}
+
+/*
+ * Adopt the RCU callbacks from the specified rcu_state structure's
+ * orphanage.  The caller must hold the ->orphan_lock.
+ */
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+{
+       int i;
+       struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+
+       /* No-CBs CPUs are handled specially. */
+       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
+               return;
+
+       /* Do the accounting first. */
+       rdp->qlen_lazy += rsp->qlen_lazy;
+       rdp->qlen += rsp->qlen;
+       rdp->n_cbs_adopted += rsp->qlen;
+       if (rsp->qlen_lazy != rsp->qlen)
+               rcu_idle_count_callbacks_posted();
+       rsp->qlen_lazy = 0;
+       rsp->qlen = 0;
+
+       /*
+        * We do not need a memory barrier here because the only way we
+        * can get here if there is an rcu_barrier() in flight is if
+        * we are the task doing the rcu_barrier().
+        */
+
+       /* First adopt the ready-to-invoke callbacks. */
+       if (rsp->orphan_donelist != NULL) {
+               *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
+               *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
+               for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
+                       if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
+                               rdp->nxttail[i] = rsp->orphan_donetail;
+               rsp->orphan_donelist = NULL;
+               rsp->orphan_donetail = &rsp->orphan_donelist;
+       }
+
+       /* And then adopt the callbacks that still need a grace period. */
+       if (rsp->orphan_nxtlist != NULL) {
+               *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
+               rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
+               rsp->orphan_nxtlist = NULL;
+               rsp->orphan_nxttail = &rsp->orphan_nxtlist;
+       }
+}
+
+/*
+ * Trace the fact that this CPU is going offline.
+ */
+static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
+{
+       RCU_TRACE(unsigned long mask);
+       RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
+       RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
+
+       RCU_TRACE(mask = rdp->grpmask);
+       trace_rcu_grace_period(rsp->name,
+                              rnp->gpnum + 1 - !!(rnp->qsmask & mask),
+                              TPS("cpuofl"));
+}
+
+/*
+ * The CPU has been completely removed, and some other CPU is reporting
+ * this fact from process context.  Do the remainder of the cleanup,
+ * including orphaning the outgoing CPU's RCU callbacks, and also
+ * adopting them.  There can only be one CPU hotplug operation at a time,
+ * so no other CPU can be attempting to update rcu_cpu_kthread_task.
+ */
+static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
+{
+       unsigned long flags;
+       unsigned long mask;
+       int need_report = 0;
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
+
+       /* Adjust any no-longer-needed kthreads. */
+       rcu_boost_kthread_setaffinity(rnp, -1);
+
+       /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
+
+       /* Exclude any attempts to start a new grace period. */
+       mutex_lock(&rsp->onoff_mutex);
+       raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
+
+       /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
+       rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
+       rcu_adopt_orphan_cbs(rsp);
+
+       /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
+       mask = rdp->grpmask;    /* rnp->grplo is constant. */
+       do {
+               raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
+               rnp->qsmaskinit &= ~mask;
+               if (rnp->qsmaskinit != 0) {
+                       if (rnp != rdp->mynode)
+                               raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+                       break;
+               }
+               if (rnp == rdp->mynode)
+                       need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
+               else
+                       raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+               mask = rnp->grpmask;
+               rnp = rnp->parent;
+       } while (rnp != NULL);
+
+       /*
+        * We still hold the leaf rcu_node structure lock here, and
+        * irqs are still disabled.  The reason for this subterfuge is
+        * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
+        * held leads to deadlock.
+        */
+       raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
+       rnp = rdp->mynode;
+       if (need_report & RCU_OFL_TASKS_NORM_GP)
+               rcu_report_unblock_qs_rnp(rnp, flags);
+       else
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       if (need_report & RCU_OFL_TASKS_EXP_GP)
+               rcu_report_exp_rnp(rsp, rnp, true);
+       WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
+                 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
+                 cpu, rdp->qlen, rdp->nxtlist);
+       init_callback_list(rdp);
+       /* Disallow further callbacks on this CPU. */
+       rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+       mutex_unlock(&rsp->onoff_mutex);
+}
+
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+
+static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
+{
+}
+
+static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Invoke any RCU callbacks that have made it to the end of their grace
+ * period.  Thottle as specified by rdp->blimit.
+ */
+static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       unsigned long flags;
+       struct rcu_head *next, *list, **tail;
+       long bl, count, count_lazy;
+       int i;
+
+       /* If no callbacks are ready, just return. */
+       if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
+               trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
+               trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
+                                   need_resched(), is_idle_task(current),
+                                   rcu_is_callbacks_kthread());
+               return;
+       }
+
+       /*
+        * Extract the list of ready callbacks, disabling to prevent
+        * races with call_rcu() from interrupt handlers.
+        */
+       local_irq_save(flags);
+       WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
+       bl = rdp->blimit;
+       trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
+       list = rdp->nxtlist;
+       rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
+       *rdp->nxttail[RCU_DONE_TAIL] = NULL;
+       tail = rdp->nxttail[RCU_DONE_TAIL];
+       for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
+               if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
+                       rdp->nxttail[i] = &rdp->nxtlist;
+       local_irq_restore(flags);
+
+       /* Invoke callbacks. */
+       count = count_lazy = 0;
+       while (list) {
+               next = list->next;
+               prefetch(next);
+               debug_rcu_head_unqueue(list);
+               if (__rcu_reclaim(rsp->name, list))
+                       count_lazy++;
+               list = next;
+               /* Stop only if limit reached and CPU has something to do. */
+               if (++count >= bl &&
+                   (need_resched() ||
+                    (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
+                       break;
+       }
+
+       local_irq_save(flags);
+       trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
+                           is_idle_task(current),
+                           rcu_is_callbacks_kthread());
+
+       /* Update count, and requeue any remaining callbacks. */
+       if (list != NULL) {
+               *tail = rdp->nxtlist;
+               rdp->nxtlist = list;
+               for (i = 0; i < RCU_NEXT_SIZE; i++)
+                       if (&rdp->nxtlist == rdp->nxttail[i])
+                               rdp->nxttail[i] = tail;
+                       else
+                               break;
+       }
+       smp_mb(); /* List handling before counting for rcu_barrier(). */
+       rdp->qlen_lazy -= count_lazy;
+       ACCESS_ONCE(rdp->qlen) -= count;
+       rdp->n_cbs_invoked += count;
+
+       /* Reinstate batch limit if we have worked down the excess. */
+       if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
+               rdp->blimit = blimit;
+
+       /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
+       if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
+               rdp->qlen_last_fqs_check = 0;
+               rdp->n_force_qs_snap = rsp->n_force_qs;
+       } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
+               rdp->qlen_last_fqs_check = rdp->qlen;
+       WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
+
+       local_irq_restore(flags);
+
+       /* Re-invoke RCU core processing if there are callbacks remaining. */
+       if (cpu_has_callbacks_ready_to_invoke(rdp))
+               invoke_rcu_core();
+}
+
+/*
+ * Check to see if this CPU is in a non-context-switch quiescent state
+ * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
+ * Also schedule RCU core processing.
+ *
+ * This function must be called from hardirq context.  It is normally
+ * invoked from the scheduling-clock interrupt.  If rcu_pending returns
+ * false, there is no point in invoking rcu_check_callbacks().
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+       trace_rcu_utilization(TPS("Start scheduler-tick"));
+       increment_cpu_stall_ticks();
+       if (user || rcu_is_cpu_rrupt_from_idle()) {
+
+               /*
+                * Get here if this CPU took its interrupt from user
+                * mode or from the idle loop, and if this is not a
+                * nested interrupt.  In this case, the CPU is in
+                * a quiescent state, so note it.
+                *
+                * No memory barrier is required here because both
+                * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
+                * variables that other CPUs neither access nor modify,
+                * at least not while the corresponding CPU is online.
+                */
+
+               rcu_sched_qs(cpu);
+               rcu_bh_qs(cpu);
+
+       } else if (!in_softirq()) {
+
+               /*
+                * Get here if this CPU did not take its interrupt from
+                * softirq, in other words, if it is not interrupting
+                * a rcu_bh read-side critical section.  This is an _bh
+                * critical section, so note it.
+                */
+
+               rcu_bh_qs(cpu);
+       }
+       rcu_preempt_check_callbacks(cpu);
+       if (rcu_pending(cpu))
+               invoke_rcu_core();
+       trace_rcu_utilization(TPS("End scheduler-tick"));
+}
+
+/*
+ * Scan the leaf rcu_node structures, processing dyntick state for any that
+ * have not yet encountered a quiescent state, using the function specified.
+ * Also initiate boosting for any threads blocked on the root rcu_node.
+ *
+ * The caller must have suppressed start of new grace periods.
+ */
+static void force_qs_rnp(struct rcu_state *rsp,
+                        int (*f)(struct rcu_data *rsp, bool *isidle,
+                                 unsigned long *maxj),
+                        bool *isidle, unsigned long *maxj)
+{
+       unsigned long bit;
+       int cpu;
+       unsigned long flags;
+       unsigned long mask;
+       struct rcu_node *rnp;
+
+       rcu_for_each_leaf_node(rsp, rnp) {
+               cond_resched();
+               mask = 0;
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               if (!rcu_gp_in_progress(rsp)) {
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                       return;
+               }
+               if (rnp->qsmask == 0) {
+                       rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
+                       continue;
+               }
+               cpu = rnp->grplo;
+               bit = 1;
+               for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
+                       if ((rnp->qsmask & bit) != 0) {
+                               if ((rnp->qsmaskinit & bit) != 0)
+                                       *isidle = 0;
+                               if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+                                       mask |= bit;
+                       }
+               }
+               if (mask != 0) {
+
+                       /* rcu_report_qs_rnp() releases rnp->lock. */
+                       rcu_report_qs_rnp(mask, rsp, rnp, flags);
+                       continue;
+               }
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       }
+       rnp = rcu_get_root(rsp);
+       if (rnp->qsmask == 0) {
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
+       }
+}
+
+/*
+ * Force quiescent states on reluctant CPUs, and also detect which
+ * CPUs are in dyntick-idle mode.
+ */
+static void force_quiescent_state(struct rcu_state *rsp)
+{
+       unsigned long flags;
+       bool ret;
+       struct rcu_node *rnp;
+       struct rcu_node *rnp_old = NULL;
+
+       /* Funnel through hierarchy to reduce memory contention. */
+       rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
+       for (; rnp != NULL; rnp = rnp->parent) {
+               ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
+                     !raw_spin_trylock(&rnp->fqslock);
+               if (rnp_old != NULL)
+                       raw_spin_unlock(&rnp_old->fqslock);
+               if (ret) {
+                       rsp->n_force_qs_lh++;
+                       return;
+               }
+               rnp_old = rnp;
+       }
+       /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
+
+       /* Reached the root of the rcu_node tree, acquire lock. */
+       raw_spin_lock_irqsave(&rnp_old->lock, flags);
+       raw_spin_unlock(&rnp_old->fqslock);
+       if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
+               rsp->n_force_qs_lh++;
+               raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+               return;  /* Someone beat us to it. */
+       }
+       rsp->gp_flags |= RCU_GP_FLAG_FQS;
+       raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+}
+
+/*
+ * This does the RCU core processing work for the specified rcu_state
+ * and rcu_data structures.  This may be called only from the CPU to
+ * whom the rdp belongs.
+ */
+static void
+__rcu_process_callbacks(struct rcu_state *rsp)
+{
+       unsigned long flags;
+       struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+
+       WARN_ON_ONCE(rdp->beenonline == 0);
+
+       /* Update RCU state based on any recent quiescent states. */
+       rcu_check_quiescent_state(rsp, rdp);
+
+       /* Does this CPU require a not-yet-started grace period? */
+       local_irq_save(flags);
+       if (cpu_needs_another_gp(rsp, rdp)) {
+               raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
+               rcu_start_gp(rsp);
+               raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+       } else {
+               local_irq_restore(flags);
+       }
+
+       /* If there are callbacks ready, invoke them. */
+       if (cpu_has_callbacks_ready_to_invoke(rdp))
+               invoke_rcu_callbacks(rsp, rdp);
+}
+
+/*
+ * Do RCU core processing for the current CPU.
+ */
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+       struct rcu_state *rsp;
+
+       if (cpu_is_offline(smp_processor_id()))
+               return;
+       trace_rcu_utilization(TPS("Start RCU core"));
+       for_each_rcu_flavor(rsp)
+               __rcu_process_callbacks(rsp);
+       trace_rcu_utilization(TPS("End RCU core"));
+}
+
+/*
+ * Schedule RCU callback invocation.  If the specified type of RCU
+ * does not support RCU priority boosting, just do a direct call,
+ * otherwise wake up the per-CPU kernel kthread.  Note that because we
+ * are running on the current CPU with interrupts disabled, the
+ * rcu_cpu_kthread_task cannot disappear out from under us.
+ */
+static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
+               return;
+       if (likely(!rsp->boost)) {
+               rcu_do_batch(rsp, rdp);
+               return;
+       }
+       invoke_rcu_callbacks_kthread();
+}
+
+static void invoke_rcu_core(void)
+{
+       if (cpu_online(smp_processor_id()))
+               raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Handle any core-RCU processing required by a call_rcu() invocation.
+ */
+static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
+                           struct rcu_head *head, unsigned long flags)
+{
+       /*
+        * If called from an extended quiescent state, invoke the RCU
+        * core in order to force a re-evaluation of RCU's idleness.
+        */
+       if (!rcu_is_watching() && cpu_online(smp_processor_id()))
+               invoke_rcu_core();
+
+       /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
+       if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
+               return;
+
+       /*
+        * Force the grace period if too many callbacks or too long waiting.
+        * Enforce hysteresis, and don't invoke force_quiescent_state()
+        * if some other CPU has recently done so.  Also, don't bother
+        * invoking force_quiescent_state() if the newly enqueued callback
+        * is the only one waiting for a grace period to complete.
+        */
+       if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
+
+               /* Are we ignoring a completed grace period? */
+               note_gp_changes(rsp, rdp);
+
+               /* Start a new grace period if one not already started. */
+               if (!rcu_gp_in_progress(rsp)) {
+                       struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+                       raw_spin_lock(&rnp_root->lock);
+                       rcu_start_gp(rsp);
+                       raw_spin_unlock(&rnp_root->lock);
+               } else {
+                       /* Give the grace period a kick. */
+                       rdp->blimit = LONG_MAX;
+                       if (rsp->n_force_qs == rdp->n_force_qs_snap &&
+                           *rdp->nxttail[RCU_DONE_TAIL] != head)
+                               force_quiescent_state(rsp);
+                       rdp->n_force_qs_snap = rsp->n_force_qs;
+                       rdp->qlen_last_fqs_check = rdp->qlen;
+               }
+       }
+}
+
+/*
+ * RCU callback function to leak a callback.
+ */
+static void rcu_leak_callback(struct rcu_head *rhp)
+{
+}
+
+/*
+ * Helper function for call_rcu() and friends.  The cpu argument will
+ * normally be -1, indicating "currently running CPU".  It may specify
+ * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier()
+ * is expected to specify a CPU.
+ */
+static void
+__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+          struct rcu_state *rsp, int cpu, bool lazy)
+{
+       unsigned long flags;
+       struct rcu_data *rdp;
+
+       WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
+       if (debug_rcu_head_queue(head)) {
+               /* Probable double call_rcu(), so leak the callback. */
+               ACCESS_ONCE(head->func) = rcu_leak_callback;
+               WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
+               return;
+       }
+       head->func = func;
+       head->next = NULL;
+
+       /*
+        * Opportunistically note grace-period endings and beginnings.
+        * Note that we might see a beginning right after we see an
+        * end, but never vice versa, since this CPU has to pass through
+        * a quiescent state betweentimes.
+        */
+       local_irq_save(flags);
+       rdp = this_cpu_ptr(rsp->rda);
+
+       /* Add the callback to our list. */
+       if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
+               int offline;
+
+               if (cpu != -1)
+                       rdp = per_cpu_ptr(rsp->rda, cpu);
+               offline = !__call_rcu_nocb(rdp, head, lazy);
+               WARN_ON_ONCE(offline);
+               /* _call_rcu() is illegal on offline CPU; leak the callback. */
+               local_irq_restore(flags);
+               return;
+       }
+       ACCESS_ONCE(rdp->qlen)++;
+       if (lazy)
+               rdp->qlen_lazy++;
+       else
+               rcu_idle_count_callbacks_posted();
+       smp_mb();  /* Count before adding callback for rcu_barrier(). */
+       *rdp->nxttail[RCU_NEXT_TAIL] = head;
+       rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
+
+       if (__is_kfree_rcu_offset((unsigned long)func))
+               trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
+                                        rdp->qlen_lazy, rdp->qlen);
+       else
+               trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
+
+       /* Go handle any RCU core processing required. */
+       __call_rcu_core(rsp, rdp, head, flags);
+       local_irq_restore(flags);
+}
+
+/*
+ * Queue an RCU-sched callback for invocation after a grace period.
+ */
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_sched_state, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
+/*
+ * Queue an RCU callback for invocation after a quicker grace period.
+ */
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_bh_state, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+/*
+ * Because a context switch is a grace period for RCU-sched and RCU-bh,
+ * any blocking grace-period wait automatically implies a grace period
+ * if there is only one CPU online at any point time during execution
+ * of either synchronize_sched() or synchronize_rcu_bh().  It is OK to
+ * occasionally incorrectly indicate that there are multiple CPUs online
+ * when there was in fact only one the whole time, as this just adds
+ * some overhead: RCU still operates correctly.
+ */
+static inline int rcu_blocking_is_gp(void)
+{
+       int ret;
+
+       might_sleep();  /* Check for RCU read-side critical section. */
+       preempt_disable();
+       ret = num_online_cpus() <= 1;
+       preempt_enable();
+       return ret;
+}
+
+/**
+ * synchronize_sched - wait until an rcu-sched grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-sched
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-sched read-side critical sections have completed.   These read-side
+ * critical sections are delimited by rcu_read_lock_sched() and
+ * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
+ * local_irq_disable(), and so on may be used in place of
+ * rcu_read_lock_sched().
+ *
+ * This means that all preempt_disable code sequences, including NMI and
+ * non-threaded hardware-interrupt handlers, in progress on entry will
+ * have completed before this primitive returns.  However, this does not
+ * guarantee that softirq handlers will have completed, since in some
+ * kernels, these handlers can run in process context, and can block.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_sched() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_sched().  In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_sched() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_sched() and before the beginning of
+ * that RCU read-side critical section.  Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_sched(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
+ *
+ * This primitive provides the guarantees made by the (now removed)
+ * synchronize_kernel() API.  In contrast, synchronize_rcu() only
+ * guarantees that rcu_read_lock() sections will have completed.
+ * In "classic RCU", these two guarantees happen to be one and
+ * the same, but can differ in realtime RCU implementations.
+ */
+void synchronize_sched(void)
+{
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_sched() in RCU-sched read-side critical section");
+       if (rcu_blocking_is_gp())
+               return;
+       if (rcu_expedited)
+               synchronize_sched_expedited();
+       else
+               wait_rcu_gp(call_rcu_sched);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+/**
+ * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu_bh grace
+ * period has elapsed, in other words after all currently executing rcu_bh
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
+ * and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
+ */
+void synchronize_rcu_bh(void)
+{
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
+       if (rcu_blocking_is_gp())
+               return;
+       if (rcu_expedited)
+               synchronize_rcu_bh_expedited();
+       else
+               wait_rcu_gp(call_rcu_bh);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
+static int synchronize_sched_expedited_cpu_stop(void *data)
+{
+       /*
+        * There must be a full memory barrier on each affected CPU
+        * between the time that try_stop_cpus() is called and the
+        * time that it returns.
+        *
+        * In the current initial implementation of cpu_stop, the
+        * above condition is already met when the control reaches
+        * this point and the following smp_mb() is not strictly
+        * necessary.  Do smp_mb() anyway for documentation and
+        * robustness against future implementation changes.
+        */
+       smp_mb(); /* See above comment block. */
+       return 0;
+}
+
+/**
+ * synchronize_sched_expedited - Brute-force RCU-sched grace period
+ *
+ * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_sched_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_sched() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.
+ *
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
+ */
+void synchronize_sched_expedited(void)
+{
+       long firstsnap, s, snap;
+       int trycount = 0;
+       struct rcu_state *rsp = &rcu_sched_state;
+
+       /*
+        * If we are in danger of counter wrap, just do synchronize_sched().
+        * By allowing sync_sched_expedited_started to advance no more than
+        * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
+        * that more than 3.5 billion CPUs would be required to force a
+        * counter wrap on a 32-bit system.  Quite a few more CPUs would of
+        * course be required on a 64-bit system.
+        */
+       if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
+                        (ulong)atomic_long_read(&rsp->expedited_done) +
+                        ULONG_MAX / 8)) {
+               synchronize_sched();
+               atomic_long_inc(&rsp->expedited_wrap);
+               return;
+       }
+
+       /*
+        * Take a ticket.  Note that atomic_inc_return() implies a
+        * full memory barrier.
+        */
+       snap = atomic_long_inc_return(&rsp->expedited_start);
+       firstsnap = snap;
+       get_online_cpus();
+       WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
+
+       /*
+        * Each pass through the following loop attempts to force a
+        * context switch on each CPU.
+        */
+       while (try_stop_cpus(cpu_online_mask,
+                            synchronize_sched_expedited_cpu_stop,
+                            NULL) == -EAGAIN) {
+               put_online_cpus();
+               atomic_long_inc(&rsp->expedited_tryfail);
+
+               /* Check to see if someone else did our work for us. */
+               s = atomic_long_read(&rsp->expedited_done);
+               if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
+                       /* ensure test happens before caller kfree */
+                       smp_mb__before_atomic_inc(); /* ^^^ */
+                       atomic_long_inc(&rsp->expedited_workdone1);
+                       return;
+               }
+
+               /* No joy, try again later.  Or just synchronize_sched(). */
+               if (trycount++ < 10) {
+                       udelay(trycount * num_online_cpus());
+               } else {
+                       wait_rcu_gp(call_rcu_sched);
+                       atomic_long_inc(&rsp->expedited_normal);
+                       return;
+               }
+
+               /* Recheck to see if someone else did our work for us. */
+               s = atomic_long_read(&rsp->expedited_done);
+               if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
+                       /* ensure test happens before caller kfree */
+                       smp_mb__before_atomic_inc(); /* ^^^ */
+                       atomic_long_inc(&rsp->expedited_workdone2);
+                       return;
+               }
+
+               /*
+                * Refetching sync_sched_expedited_started allows later
+                * callers to piggyback on our grace period.  We retry
+                * after they started, so our grace period works for them,
+                * and they started after our first try, so their grace
+                * period works for us.
+                */
+               get_online_cpus();
+               snap = atomic_long_read(&rsp->expedited_start);
+               smp_mb(); /* ensure read is before try_stop_cpus(). */
+       }
+       atomic_long_inc(&rsp->expedited_stoppedcpus);
+
+       /*
+        * Everyone up to our most recent fetch is covered by our grace
+        * period.  Update the counter, but only if our work is still
+        * relevant -- which it won't be if someone who started later
+        * than we did already did their update.
+        */
+       do {
+               atomic_long_inc(&rsp->expedited_done_tries);
+               s = atomic_long_read(&rsp->expedited_done);
+               if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
+                       /* ensure test happens before caller kfree */
+                       smp_mb__before_atomic_inc(); /* ^^^ */
+                       atomic_long_inc(&rsp->expedited_done_lost);
+                       break;
+               }
+       } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
+       atomic_long_inc(&rsp->expedited_done_exit);
+
+       put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, for the specified type of RCU, returning 1 if so.
+ * The checks are in order of increasing expense: checks that can be
+ * carried out against CPU-local state are performed first.  However,
+ * we must check for CPU stalls first, else we might not get a chance.
+ */
+static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+       struct rcu_node *rnp = rdp->mynode;
+
+       rdp->n_rcu_pending++;
+
+       /* Check for CPU stalls, if enabled. */
+       check_cpu_stall(rsp, rdp);
+
+       /* Is the RCU core waiting for a quiescent state from this CPU? */
+       if (rcu_scheduler_fully_active &&
+           rdp->qs_pending && !rdp->passed_quiesce) {
+               rdp->n_rp_qs_pending++;
+       } else if (rdp->qs_pending && rdp->passed_quiesce) {
+               rdp->n_rp_report_qs++;
+               return 1;
+       }
+
+       /* Does this CPU have callbacks ready to invoke? */
+       if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+               rdp->n_rp_cb_ready++;
+               return 1;
+       }
+
+       /* Has RCU gone idle with this CPU needing another grace period? */
+       if (cpu_needs_another_gp(rsp, rdp)) {
+               rdp->n_rp_cpu_needs_gp++;
+               return 1;
+       }
+
+       /* Has another RCU grace period completed?  */
+       if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
+               rdp->n_rp_gp_completed++;
+               return 1;
+       }
+
+       /* Has a new RCU grace period started? */
+       if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
+               rdp->n_rp_gp_started++;
+               return 1;
+       }
+
+       /* nothing to do */
+       rdp->n_rp_need_nothing++;
+       return 0;
+}
+
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, returning 1 if so.  This function is part of the
+ * RCU implementation; it is -not- an exported member of the RCU API.
+ */
+static int rcu_pending(int cpu)
+{
+       struct rcu_state *rsp;
+
+       for_each_rcu_flavor(rsp)
+               if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
+                       return 1;
+       return 0;
+}
+
+/*
+ * Return true if the specified CPU has any callback.  If all_lazy is
+ * non-NULL, store an indication of whether all callbacks are lazy.
+ * (If there are no callbacks, all of them are deemed to be lazy.)
+ */
+static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
+{
+       bool al = true;
+       bool hc = false;
+       struct rcu_data *rdp;
+       struct rcu_state *rsp;
+
+       for_each_rcu_flavor(rsp) {
+               rdp = per_cpu_ptr(rsp->rda, cpu);
+               if (!rdp->nxtlist)
+                       continue;
+               hc = true;
+               if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
+                       al = false;
+                       break;
+               }
+       }
+       if (all_lazy)
+               *all_lazy = al;
+       return hc;
+}
+
+/*
+ * Helper function for _rcu_barrier() tracing.  If tracing is disabled,
+ * the compiler is expected to optimize this away.
+ */
+static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
+                              int cpu, unsigned long done)
+{
+       trace_rcu_barrier(rsp->name, s, cpu,
+                         atomic_read(&rsp->barrier_cpu_count), done);
+}
+
+/*
+ * RCU callback function for _rcu_barrier().  If we are last, wake
+ * up the task executing _rcu_barrier().
+ */
+static void rcu_barrier_callback(struct rcu_head *rhp)
+{
+       struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
+       struct rcu_state *rsp = rdp->rsp;
+
+       if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
+               _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
+               complete(&rsp->barrier_completion);
+       } else {
+               _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
+       }
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *type)
+{
+       struct rcu_state *rsp = type;
+       struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+
+       _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
+       atomic_inc(&rsp->barrier_cpu_count);
+       rsp->call(&rdp->barrier_head, rcu_barrier_callback);
+}
+
+/*
+ * Orchestrate the specified type of RCU barrier, waiting for all
+ * RCU callbacks of the specified type to complete.
+ */
+static void _rcu_barrier(struct rcu_state *rsp)
+{
+       int cpu;
+       struct rcu_data *rdp;
+       unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
+       unsigned long snap_done;
+
+       _rcu_barrier_trace(rsp, "Begin", -1, snap);
+
+       /* Take mutex to serialize concurrent rcu_barrier() requests. */
+       mutex_lock(&rsp->barrier_mutex);
+
+       /*
+        * Ensure that all prior references, including to ->n_barrier_done,
+        * are ordered before the _rcu_barrier() machinery.
+        */
+       smp_mb();  /* See above block comment. */
+
+       /*
+        * Recheck ->n_barrier_done to see if others did our work for us.
+        * This means checking ->n_barrier_done for an even-to-odd-to-even
+        * transition.  The "if" expression below therefore rounds the old
+        * value up to the next even number and adds two before comparing.
+        */
+       snap_done = rsp->n_barrier_done;
+       _rcu_barrier_trace(rsp, "Check", -1, snap_done);
+
+       /*
+        * If the value in snap is odd, we needed to wait for the current
+        * rcu_barrier() to complete, then wait for the next one, in other
+        * words, we need the value of snap_done to be three larger than
+        * the value of snap.  On the other hand, if the value in snap is
+        * even, we only had to wait for the next rcu_barrier() to complete,
+        * in other words, we need the value of snap_done to be only two
+        * greater than the value of snap.  The "(snap + 3) & ~0x1" computes
+        * this for us (thank you, Linus!).
+        */
+       if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
+               _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
+               smp_mb(); /* caller's subsequent code after above check. */
+               mutex_unlock(&rsp->barrier_mutex);
+               return;
+       }
+
+       /*
+        * Increment ->n_barrier_done to avoid duplicate work.  Use
+        * ACCESS_ONCE() to prevent the compiler from speculating
+        * the increment to precede the early-exit check.
+        */
+       ACCESS_ONCE(rsp->n_barrier_done)++;
+       WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
+       _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
+       smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
+
+       /*
+        * Initialize the count to one rather than to zero in order to
+        * avoid a too-soon return to zero in case of a short grace period
+        * (or preemption of this task).  Exclude CPU-hotplug operations
+        * to ensure that no offline CPU has callbacks queued.
+        */
+       init_completion(&rsp->barrier_completion);
+       atomic_set(&rsp->barrier_cpu_count, 1);
+       get_online_cpus();
+
+       /*
+        * Force each CPU with callbacks to register a new callback.
+        * When that callback is invoked, we will know that all of the
+        * corresponding CPU's preceding callbacks have been invoked.
+        */
+       for_each_possible_cpu(cpu) {
+               if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
+                       continue;
+               rdp = per_cpu_ptr(rsp->rda, cpu);
+               if (rcu_is_nocb_cpu(cpu)) {
+                       _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
+                                          rsp->n_barrier_done);
+                       atomic_inc(&rsp->barrier_cpu_count);
+                       __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
+                                  rsp, cpu, 0);
+               } else if (ACCESS_ONCE(rdp->qlen)) {
+                       _rcu_barrier_trace(rsp, "OnlineQ", cpu,
+                                          rsp->n_barrier_done);
+                       smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
+               } else {
+                       _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
+                                          rsp->n_barrier_done);
+               }
+       }
+       put_online_cpus();
+
+       /*
+        * Now that we have an rcu_barrier_callback() callback on each
+        * CPU, and thus each counted, remove the initial count.
+        */
+       if (atomic_dec_and_test(&rsp->barrier_cpu_count))
+               complete(&rsp->barrier_completion);
+
+       /* Increment ->n_barrier_done to prevent duplicate work. */
+       smp_mb(); /* Keep increment after above mechanism. */
+       ACCESS_ONCE(rsp->n_barrier_done)++;
+       WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
+       _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
+       smp_mb(); /* Keep increment before caller's subsequent code. */
+
+       /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
+       wait_for_completion(&rsp->barrier_completion);
+
+       /* Other rcu_barrier() invocations can now safely proceed. */
+       mutex_unlock(&rsp->barrier_mutex);
+}
+
+/**
+ * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
+ */
+void rcu_barrier_bh(void)
+{
+       _rcu_barrier(&rcu_bh_state);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+/**
+ * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ */
+void rcu_barrier_sched(void)
+{
+       _rcu_barrier(&rcu_sched_state);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
+/*
+ * Do boot-time initialization of a CPU's per-CPU RCU data.
+ */
+static void __init
+rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
+{
+       unsigned long flags;
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       /* Set up local state, ensuring consistent view of global state. */
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
+       init_callback_list(rdp);
+       rdp->qlen_lazy = 0;
+       ACCESS_ONCE(rdp->qlen) = 0;
+       rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
+       WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
+       WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
+       rdp->cpu = cpu;
+       rdp->rsp = rsp;
+       rcu_boot_init_nocb_percpu_data(rdp);
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Initialize a CPU's per-CPU RCU data.  Note that only one online or
+ * offline event can be happening at a given time.  Note also that we
+ * can accept some slop in the rsp->completed access due to the fact
+ * that this CPU cannot possibly have any RCU callbacks in flight yet.
+ */
+static void
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
+{
+       unsigned long flags;
+       unsigned long mask;
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       /* Exclude new grace periods. */
+       mutex_lock(&rsp->onoff_mutex);
+
+       /* Set up local state, ensuring consistent view of global state. */
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       rdp->beenonline = 1;     /* We have now been online. */
+       rdp->preemptible = preemptible;
+       rdp->qlen_last_fqs_check = 0;
+       rdp->n_force_qs_snap = rsp->n_force_qs;
+       rdp->blimit = blimit;
+       init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
+       rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+       rcu_sysidle_init_percpu_data(rdp->dynticks);
+       atomic_set(&rdp->dynticks->dynticks,
+                  (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
+       raw_spin_unlock(&rnp->lock);            /* irqs remain disabled. */
+
+       /* Add CPU to rcu_node bitmasks. */
+       rnp = rdp->mynode;
+       mask = rdp->grpmask;
+       do {
+               /* Exclude any attempts to start a new GP on small systems. */
+               raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
+               rnp->qsmaskinit |= mask;
+               mask = rnp->grpmask;
+               if (rnp == rdp->mynode) {
+                       /*
+                        * If there is a grace period in progress, we will
+                        * set up to wait for it next time we run the
+                        * RCU core code.
+                        */
+                       rdp->gpnum = rnp->completed;
+                       rdp->completed = rnp->completed;
+                       rdp->passed_quiesce = 0;
+                       rdp->qs_pending = 0;
+                       trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
+               }
+               raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
+               rnp = rnp->parent;
+       } while (rnp != NULL && !(rnp->qsmaskinit & mask));
+       local_irq_restore(flags);
+
+       mutex_unlock(&rsp->onoff_mutex);
+}
+
+static void rcu_prepare_cpu(int cpu)
+{
+       struct rcu_state *rsp;
+
+       for_each_rcu_flavor(rsp)
+               rcu_init_percpu_data(cpu, rsp,
+                                    strcmp(rsp->name, "rcu_preempt") == 0);
+}
+
+/*
+ * Handle CPU online/offline notification events.
+ */
+static int rcu_cpu_notify(struct notifier_block *self,
+                                   unsigned long action, void *hcpu)
+{
+       long cpu = (long)hcpu;
+       struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+       struct rcu_node *rnp = rdp->mynode;
+       struct rcu_state *rsp;
+
+       trace_rcu_utilization(TPS("Start CPU hotplug"));
+       switch (action) {
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               rcu_prepare_cpu(cpu);
+               rcu_prepare_kthreads(cpu);
+               break;
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               rcu_boost_kthread_setaffinity(rnp, -1);
+               break;
+       case CPU_DOWN_PREPARE:
+               rcu_boost_kthread_setaffinity(rnp, cpu);
+               break;
+       case CPU_DYING:
+       case CPU_DYING_FROZEN:
+               for_each_rcu_flavor(rsp)
+                       rcu_cleanup_dying_cpu(rsp);
+               break;
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+       case CPU_UP_CANCELED:
+       case CPU_UP_CANCELED_FROZEN:
+               for_each_rcu_flavor(rsp)
+                       rcu_cleanup_dead_cpu(cpu, rsp);
+               break;
+       default:
+               break;
+       }
+       trace_rcu_utilization(TPS("End CPU hotplug"));
+       return NOTIFY_OK;
+}
+
+static int rcu_pm_notify(struct notifier_block *self,
+                        unsigned long action, void *hcpu)
+{
+       switch (action) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
+                       rcu_expedited = 1;
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_SUSPEND:
+               rcu_expedited = 0;
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+/*
+ * Spawn the kthread that handles this RCU flavor's grace periods.
+ */
+static int __init rcu_spawn_gp_kthread(void)
+{
+       unsigned long flags;
+       struct rcu_node *rnp;
+       struct rcu_state *rsp;
+       struct task_struct *t;
+
+       for_each_rcu_flavor(rsp) {
+               t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
+               BUG_ON(IS_ERR(t));
+               rnp = rcu_get_root(rsp);
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               rsp->gp_kthread = t;
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               rcu_spawn_nocb_kthreads(rsp);
+       }
+       return 0;
+}
+early_initcall(rcu_spawn_gp_kthread);
+
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process.  Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system).  After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections.  This function also enables RCU lockdep checking.
+ */
+void rcu_scheduler_starting(void)
+{
+       WARN_ON(num_online_cpus() != 1);
+       WARN_ON(nr_context_switches() > 0);
+       rcu_scheduler_active = 1;
+}
+
+/*
+ * Compute the per-level fanout, either using the exact fanout specified
+ * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
+ */
+#ifdef CONFIG_RCU_FANOUT_EXACT
+static void __init rcu_init_levelspread(struct rcu_state *rsp)
+{
+       int i;
+
+       for (i = rcu_num_lvls - 1; i > 0; i--)
+               rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+       rsp->levelspread[0] = rcu_fanout_leaf;
+}
+#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
+static void __init rcu_init_levelspread(struct rcu_state *rsp)
+{
+       int ccur;
+       int cprv;
+       int i;
+
+       cprv = nr_cpu_ids;
+       for (i = rcu_num_lvls - 1; i >= 0; i--) {
+               ccur = rsp->levelcnt[i];
+               rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
+               cprv = ccur;
+       }
+}
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
+
+/*
+ * Helper function for rcu_init() that initializes one rcu_state structure.
+ */
+static void __init rcu_init_one(struct rcu_state *rsp,
+               struct rcu_data __percpu *rda)
+{
+       static char *buf[] = { "rcu_node_0",
+                              "rcu_node_1",
+                              "rcu_node_2",
+                              "rcu_node_3" };  /* Match MAX_RCU_LVLS */
+       static char *fqs[] = { "rcu_node_fqs_0",
+                              "rcu_node_fqs_1",
+                              "rcu_node_fqs_2",
+                              "rcu_node_fqs_3" };  /* Match MAX_RCU_LVLS */
+       int cpustride = 1;
+       int i;
+       int j;
+       struct rcu_node *rnp;
+
+       BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
+
+       /* Silence gcc 4.8 warning about array index out of range. */
+       if (rcu_num_lvls > RCU_NUM_LVLS)
+               panic("rcu_init_one: rcu_num_lvls overflow");
+
+       /* Initialize the level-tracking arrays. */
+
+       for (i = 0; i < rcu_num_lvls; i++)
+               rsp->levelcnt[i] = num_rcu_lvl[i];
+       for (i = 1; i < rcu_num_lvls; i++)
+               rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
+       rcu_init_levelspread(rsp);
+
+       /* Initialize the elements themselves, starting from the leaves. */
+
+       for (i = rcu_num_lvls - 1; i >= 0; i--) {
+               cpustride *= rsp->levelspread[i];
+               rnp = rsp->level[i];
+               for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
+                       raw_spin_lock_init(&rnp->lock);
+                       lockdep_set_class_and_name(&rnp->lock,
+                                                  &rcu_node_class[i], buf[i]);
+                       raw_spin_lock_init(&rnp->fqslock);
+                       lockdep_set_class_and_name(&rnp->fqslock,
+                                                  &rcu_fqs_class[i], fqs[i]);
+                       rnp->gpnum = rsp->gpnum;
+                       rnp->completed = rsp->completed;
+                       rnp->qsmask = 0;
+                       rnp->qsmaskinit = 0;
+                       rnp->grplo = j * cpustride;
+                       rnp->grphi = (j + 1) * cpustride - 1;
+                       if (rnp->grphi >= NR_CPUS)
+                               rnp->grphi = NR_CPUS - 1;
+                       if (i == 0) {
+                               rnp->grpnum = 0;
+                               rnp->grpmask = 0;
+                               rnp->parent = NULL;
+                       } else {
+                               rnp->grpnum = j % rsp->levelspread[i - 1];
+                               rnp->grpmask = 1UL << rnp->grpnum;
+                               rnp->parent = rsp->level[i - 1] +
+                                             j / rsp->levelspread[i - 1];
+                       }
+                       rnp->level = i;
+                       INIT_LIST_HEAD(&rnp->blkd_tasks);
+                       rcu_init_one_nocb(rnp);
+               }
+       }
+
+       rsp->rda = rda;
+       init_waitqueue_head(&rsp->gp_wq);
+       init_irq_work(&rsp->wakeup_work, rsp_wakeup);
+       rnp = rsp->level[rcu_num_lvls - 1];
+       for_each_possible_cpu(i) {
+               while (i > rnp->grphi)
+                       rnp++;
+               per_cpu_ptr(rsp->rda, i)->mynode = rnp;
+               rcu_boot_init_percpu_data(i, rsp);
+       }
+       list_add(&rsp->flavors, &rcu_struct_flavors);
+}
+
+/*
+ * Compute the rcu_node tree geometry from kernel parameters.  This cannot
+ * replace the definitions in tree.h because those are needed to size
+ * the ->node array in the rcu_state structure.
+ */
+static void __init rcu_init_geometry(void)
+{
+       ulong d;
+       int i;
+       int j;
+       int n = nr_cpu_ids;
+       int rcu_capacity[MAX_RCU_LVLS + 1];
+
+       /*
+        * Initialize any unspecified boot parameters.
+        * The default values of jiffies_till_first_fqs and
+        * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
+        * value, which is a function of HZ, then adding one for each
+        * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
+        */
+       d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
+       if (jiffies_till_first_fqs == ULONG_MAX)
+               jiffies_till_first_fqs = d;
+       if (jiffies_till_next_fqs == ULONG_MAX)
+               jiffies_till_next_fqs = d;
+
+       /* If the compile-time values are accurate, just leave. */
+       if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
+           nr_cpu_ids == NR_CPUS)
+               return;
+
+       /*
+        * Compute number of nodes that can be handled an rcu_node tree
+        * with the given number of levels.  Setting rcu_capacity[0] makes
+        * some of the arithmetic easier.
+        */
+       rcu_capacity[0] = 1;
+       rcu_capacity[1] = rcu_fanout_leaf;
+       for (i = 2; i <= MAX_RCU_LVLS; i++)
+               rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
+
+       /*
+        * The boot-time rcu_fanout_leaf parameter is only permitted
+        * to increase the leaf-level fanout, not decrease it.  Of course,
+        * the leaf-level fanout cannot exceed the number of bits in
+        * the rcu_node masks.  Finally, the tree must be able to accommodate
+        * the configured number of CPUs.  Complain and fall back to the
+        * compile-time values if these limits are exceeded.
+        */
+       if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
+           rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
+           n > rcu_capacity[MAX_RCU_LVLS]) {
+               WARN_ON(1);
+               return;
+       }
+
+       /* Calculate the number of rcu_nodes at each level of the tree. */
+       for (i = 1; i <= MAX_RCU_LVLS; i++)
+               if (n <= rcu_capacity[i]) {
+                       for (j = 0; j <= i; j++)
+                               num_rcu_lvl[j] =
+                                       DIV_ROUND_UP(n, rcu_capacity[i - j]);
+                       rcu_num_lvls = i;
+                       for (j = i + 1; j <= MAX_RCU_LVLS; j++)
+                               num_rcu_lvl[j] = 0;
+                       break;
+               }
+
+       /* Calculate the total number of rcu_node structures. */
+       rcu_num_nodes = 0;
+       for (i = 0; i <= MAX_RCU_LVLS; i++)
+               rcu_num_nodes += num_rcu_lvl[i];
+       rcu_num_nodes -= n;
+}
+
+void __init rcu_init(void)
+{
+       int cpu;
+
+       rcu_bootup_announce();
+       rcu_init_geometry();
+       rcu_init_one(&rcu_bh_state, &rcu_bh_data);
+       rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+       __rcu_init_preempt();
+       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+
+       /*
+        * We don't need protection against CPU-hotplug here because
+        * this is called early in boot, before either interrupts
+        * or the scheduler are operational.
+        */
+       cpu_notifier(rcu_cpu_notify, 0);
+       pm_notifier(rcu_pm_notify, 0);
+       for_each_online_cpu(cpu)
+               rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
+}
+
+#include "tree_plugin.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
new file mode 100644 (file)
index 0000000..52be957
--- /dev/null
@@ -0,0 +1,585 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Ingo Molnar <mingo@elte.hu>
+ *        Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+#include <linux/irq_work.h>
+
+/*
+ * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
+ * CONFIG_RCU_FANOUT_LEAF.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
+ */
+#define MAX_RCU_LVLS 4
+#define RCU_FANOUT_1         (CONFIG_RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2         (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_3         (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_4         (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT_1
+#  define RCU_NUM_LVLS       1
+#  define NUM_RCU_LVL_0              1
+#  define NUM_RCU_LVL_1              (NR_CPUS)
+#  define NUM_RCU_LVL_2              0
+#  define NUM_RCU_LVL_3              0
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_2
+#  define RCU_NUM_LVLS       2
+#  define NUM_RCU_LVL_0              1
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_2              (NR_CPUS)
+#  define NUM_RCU_LVL_3              0
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_3
+#  define RCU_NUM_LVLS       3
+#  define NUM_RCU_LVL_0              1
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_3              (NR_CPUS)
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_4
+#  define RCU_NUM_LVLS       4
+#  define NUM_RCU_LVL_0              1
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_4              (NR_CPUS)
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
+
+#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
+#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
+
+extern int rcu_num_lvls;
+extern int rcu_num_nodes;
+
+/*
+ * Dynticks per-CPU state.
+ */
+struct rcu_dynticks {
+       long long dynticks_nesting; /* Track irq/process nesting level. */
+                                   /* Process level is worth LLONG_MAX/2. */
+       int dynticks_nmi_nesting;   /* Track NMI nesting level. */
+       atomic_t dynticks;          /* Even value for idle, else odd. */
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+       long long dynticks_idle_nesting;
+                                   /* irq/process nesting level from idle. */
+       atomic_t dynticks_idle;     /* Even value for idle, else odd. */
+                                   /*  "Idle" excludes userspace execution. */
+       unsigned long dynticks_idle_jiffies;
+                                   /* End of last non-NMI non-idle period. */
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+#ifdef CONFIG_RCU_FAST_NO_HZ
+       bool all_lazy;              /* Are all CPU's CBs lazy? */
+       unsigned long nonlazy_posted;
+                                   /* # times non-lazy CBs posted to CPU. */
+       unsigned long nonlazy_posted_snap;
+                                   /* idle-period nonlazy_posted snapshot. */
+       unsigned long last_accelerate;
+                                   /* Last jiffy CBs were accelerated. */
+       unsigned long last_advance_all;
+                                   /* Last jiffy CBs were all advanced. */
+       int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
+#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+};
+
+/* RCU's kthread states for tracing. */
+#define RCU_KTHREAD_STOPPED  0
+#define RCU_KTHREAD_RUNNING  1
+#define RCU_KTHREAD_WAITING  2
+#define RCU_KTHREAD_OFFCPU   3
+#define RCU_KTHREAD_YIELDING 4
+#define RCU_KTHREAD_MAX      4
+
+/*
+ * Definition for node within the RCU grace-period-detection hierarchy.
+ */
+struct rcu_node {
+       raw_spinlock_t lock;    /* Root rcu_node's lock protects some */
+                               /*  rcu_state fields as well as following. */
+       unsigned long gpnum;    /* Current grace period for this node. */
+                               /*  This will either be equal to or one */
+                               /*  behind the root rcu_node's gpnum. */
+       unsigned long completed; /* Last GP completed for this node. */
+                               /*  This will either be equal to or one */
+                               /*  behind the root rcu_node's gpnum. */
+       unsigned long qsmask;   /* CPUs or groups that need to switch in */
+                               /*  order for current grace period to proceed.*/
+                               /*  In leaf rcu_node, each bit corresponds to */
+                               /*  an rcu_data structure, otherwise, each */
+                               /*  bit corresponds to a child rcu_node */
+                               /*  structure. */
+       unsigned long expmask;  /* Groups that have ->blkd_tasks */
+                               /*  elements that need to drain to allow the */
+                               /*  current expedited grace period to */
+                               /*  complete (only for TREE_PREEMPT_RCU). */
+       unsigned long qsmaskinit;
+                               /* Per-GP initial value for qsmask & expmask. */
+       unsigned long grpmask;  /* Mask to apply to parent qsmask. */
+                               /*  Only one bit will be set in this mask. */
+       int     grplo;          /* lowest-numbered CPU or group here. */
+       int     grphi;          /* highest-numbered CPU or group here. */
+       u8      grpnum;         /* CPU/group number for next level up. */
+       u8      level;          /* root is at level 0. */
+       struct rcu_node *parent;
+       struct list_head blkd_tasks;
+                               /* Tasks blocked in RCU read-side critical */
+                               /*  section.  Tasks are placed at the head */
+                               /*  of this list and age towards the tail. */
+       struct list_head *gp_tasks;
+                               /* Pointer to the first task blocking the */
+                               /*  current grace period, or NULL if there */
+                               /*  is no such task. */
+       struct list_head *exp_tasks;
+                               /* Pointer to the first task blocking the */
+                               /*  current expedited grace period, or NULL */
+                               /*  if there is no such task.  If there */
+                               /*  is no current expedited grace period, */
+                               /*  then there can cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+       struct list_head *boost_tasks;
+                               /* Pointer to first task that needs to be */
+                               /*  priority boosted, or NULL if no priority */
+                               /*  boosting is needed for this rcu_node */
+                               /*  structure.  If there are no tasks */
+                               /*  queued on this rcu_node structure that */
+                               /*  are blocking the current grace period, */
+                               /*  there can be no such task. */
+       unsigned long boost_time;
+                               /* When to start boosting (jiffies). */
+       struct task_struct *boost_kthread_task;
+                               /* kthread that takes care of priority */
+                               /*  boosting for this rcu_node structure. */
+       unsigned int boost_kthread_status;
+                               /* State of boost_kthread_task for tracing. */
+       unsigned long n_tasks_boosted;
+                               /* Total number of tasks boosted. */
+       unsigned long n_exp_boosts;
+                               /* Number of tasks boosted for expedited GP. */
+       unsigned long n_normal_boosts;
+                               /* Number of tasks boosted for normal GP. */
+       unsigned long n_balk_blkd_tasks;
+                               /* Refused to boost: no blocked tasks. */
+       unsigned long n_balk_exp_gp_tasks;
+                               /* Refused to boost: nothing blocking GP. */
+       unsigned long n_balk_boost_tasks;
+                               /* Refused to boost: already boosting. */
+       unsigned long n_balk_notblocked;
+                               /* Refused to boost: RCU RS CS still running. */
+       unsigned long n_balk_notyet;
+                               /* Refused to boost: not yet time. */
+       unsigned long n_balk_nos;
+                               /* Refused to boost: not sure why, though. */
+                               /*  This can happen due to race conditions. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#ifdef CONFIG_RCU_NOCB_CPU
+       wait_queue_head_t nocb_gp_wq[2];
+                               /* Place for rcu_nocb_kthread() to wait GP. */
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+       int need_future_gp[2];
+                               /* Counts of upcoming no-CB GP requests. */
+       raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
+} ____cacheline_internodealigned_in_smp;
+
+/*
+ * Do a full breadth-first scan of the rcu_node structures for the
+ * specified rcu_state structure.
+ */
+#define rcu_for_each_node_breadth_first(rsp, rnp) \
+       for ((rnp) = &(rsp)->node[0]; \
+            (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
+
+/*
+ * Do a breadth-first scan of the non-leaf rcu_node structures for the
+ * specified rcu_state structure.  Note that if there is a singleton
+ * rcu_node tree with but one rcu_node structure, this loop is a no-op.
+ */
+#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+       for ((rnp) = &(rsp)->node[0]; \
+            (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
+
+/*
+ * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
+ * structure.  Note that if there is a singleton rcu_node tree with but
+ * one rcu_node structure, this loop -will- visit the rcu_node structure.
+ * It is still a leaf node, even if it is also the root node.
+ */
+#define rcu_for_each_leaf_node(rsp, rnp) \
+       for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+            (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
+
+/* Index values for nxttail array in struct rcu_data. */
+#define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL    2       /* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL          3
+#define RCU_NEXT_SIZE          4
+
+/* Per-CPU data for read-copy update. */
+struct rcu_data {
+       /* 1) quiescent-state and grace-period handling : */
+       unsigned long   completed;      /* Track rsp->completed gp number */
+                                       /*  in order to detect GP end. */
+       unsigned long   gpnum;          /* Highest gp number that this CPU */
+                                       /*  is aware of having started. */
+       bool            passed_quiesce; /* User-mode/idle loop etc. */
+       bool            qs_pending;     /* Core waits for quiesc state. */
+       bool            beenonline;     /* CPU online at least once. */
+       bool            preemptible;    /* Preemptible RCU? */
+       struct rcu_node *mynode;        /* This CPU's leaf of hierarchy */
+       unsigned long grpmask;          /* Mask to apply to leaf qsmask. */
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+       unsigned long   ticks_this_gp;  /* The number of scheduling-clock */
+                                       /*  ticks this CPU has handled */
+                                       /*  during and after the last grace */
+                                       /* period it is aware of. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+       /* 2) batch handling */
+       /*
+        * If nxtlist is not NULL, it is partitioned as follows.
+        * Any of the partitions might be empty, in which case the
+        * pointer to that partition will be equal to the pointer for
+        * the following partition.  When the list is empty, all of
+        * the nxttail elements point to the ->nxtlist pointer itself,
+        * which in that case is NULL.
+        *
+        * [nxtlist, *nxttail[RCU_DONE_TAIL]):
+        *      Entries that batch # <= ->completed
+        *      The grace period for these entries has completed, and
+        *      the other grace-period-completed entries may be moved
+        *      here temporarily in rcu_process_callbacks().
+        * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+        *      Entries that batch # <= ->completed - 1: waiting for current GP
+        * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+        *      Entries known to have arrived before current GP ended
+        * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
+        *      Entries that might have arrived after current GP ended
+        *      Note that the value of *nxttail[RCU_NEXT_TAIL] will
+        *      always be NULL, as this is the end of the list.
+        */
+       struct rcu_head *nxtlist;
+       struct rcu_head **nxttail[RCU_NEXT_SIZE];
+       unsigned long   nxtcompleted[RCU_NEXT_SIZE];
+                                       /* grace periods for sublists. */
+       long            qlen_lazy;      /* # of lazy queued callbacks */
+       long            qlen;           /* # of queued callbacks, incl lazy */
+       long            qlen_last_fqs_check;
+                                       /* qlen at last check for QS forcing */
+       unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
+       unsigned long   n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
+       unsigned long   n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
+       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from dying CPU */
+       unsigned long   n_force_qs_snap;
+                                       /* did other CPU force QS recently? */
+       long            blimit;         /* Upper limit on a processed batch */
+
+       /* 3) dynticks interface. */
+       struct rcu_dynticks *dynticks;  /* Shared per-CPU dynticks state. */
+       int dynticks_snap;              /* Per-GP tracking for dynticks. */
+
+       /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
+       unsigned long dynticks_fqs;     /* Kicked due to dynticks idle. */
+       unsigned long offline_fqs;      /* Kicked due to being offline. */
+
+       /* 5) __rcu_pending() statistics. */
+       unsigned long n_rcu_pending;    /* rcu_pending() calls since boot. */
+       unsigned long n_rp_qs_pending;
+       unsigned long n_rp_report_qs;
+       unsigned long n_rp_cb_ready;
+       unsigned long n_rp_cpu_needs_gp;
+       unsigned long n_rp_gp_completed;
+       unsigned long n_rp_gp_started;
+       unsigned long n_rp_need_nothing;
+
+       /* 6) _rcu_barrier() and OOM callbacks. */
+       struct rcu_head barrier_head;
+#ifdef CONFIG_RCU_FAST_NO_HZ
+       struct rcu_head oom_head;
+#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+       /* 7) Callback offloading. */
+#ifdef CONFIG_RCU_NOCB_CPU
+       struct rcu_head *nocb_head;     /* CBs waiting for kthread. */
+       struct rcu_head **nocb_tail;
+       atomic_long_t nocb_q_count;     /* # CBs waiting for kthread */
+       atomic_long_t nocb_q_count_lazy; /*  (approximate). */
+       int nocb_p_count;               /* # CBs being invoked by kthread */
+       int nocb_p_count_lazy;          /*  (approximate). */
+       wait_queue_head_t nocb_wq;      /* For nocb kthreads to sleep on. */
+       struct task_struct *nocb_kthread;
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+
+       /* 8) RCU CPU stall data. */
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+       unsigned int softirq_snap;      /* Snapshot of softirq activity. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+       int cpu;
+       struct rcu_state *rsp;
+};
+
+/* Values for fqs_state field in struct rcu_state. */
+#define RCU_GP_IDLE            0       /* No grace period in progress. */
+#define RCU_GP_INIT            1       /* Grace period being initialized. */
+#define RCU_SAVE_DYNTICK       2       /* Need to scan dyntick state. */
+#define RCU_FORCE_QS           3       /* Need to force quiescent state. */
+#define RCU_SIGNAL_INIT                RCU_SAVE_DYNTICK
+
+#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
+                                       /* For jiffies_till_first_fqs and */
+                                       /*  and jiffies_till_next_fqs. */
+
+#define RCU_JIFFIES_FQS_DIV    256     /* Very large systems need more */
+                                       /*  delay between bouts of */
+                                       /*  quiescent-state forcing. */
+
+#define RCU_STALL_RAT_DELAY    2       /* Allow other CPUs time to take */
+                                       /*  at least one scheduling clock */
+                                       /*  irq before ratting on them. */
+
+#define rcu_wait(cond)                                                 \
+do {                                                                   \
+       for (;;) {                                                      \
+               set_current_state(TASK_INTERRUPTIBLE);                  \
+               if (cond)                                               \
+                       break;                                          \
+               schedule();                                             \
+       }                                                               \
+       __set_current_state(TASK_RUNNING);                              \
+} while (0)
+
+/*
+ * RCU global state, including node hierarchy.  This hierarchy is
+ * represented in "heap" form in a dense array.  The root (first level)
+ * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
+ * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
+ * and the third level in ->node[m+1] and following (->node[m+1] referenced
+ * by ->level[2]).  The number of levels is determined by the number of
+ * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy"
+ * consisting of a single rcu_node.
+ */
+struct rcu_state {
+       struct rcu_node node[NUM_RCU_NODES];    /* Hierarchy. */
+       struct rcu_node *level[RCU_NUM_LVLS];   /* Hierarchy levels. */
+       u32 levelcnt[MAX_RCU_LVLS + 1];         /* # nodes in each level. */
+       u8 levelspread[RCU_NUM_LVLS];           /* kids/node in each level. */
+       struct rcu_data __percpu *rda;          /* pointer of percu rcu_data. */
+       void (*call)(struct rcu_head *head,     /* call_rcu() flavor. */
+                    void (*func)(struct rcu_head *head));
+
+       /* The following fields are guarded by the root rcu_node's lock. */
+
+       u8      fqs_state ____cacheline_internodealigned_in_smp;
+                                               /* Force QS state. */
+       u8      boost;                          /* Subject to priority boost. */
+       unsigned long gpnum;                    /* Current gp number. */
+       unsigned long completed;                /* # of last completed gp. */
+       struct task_struct *gp_kthread;         /* Task for grace periods. */
+       wait_queue_head_t gp_wq;                /* Where GP task waits. */
+       int gp_flags;                           /* Commands for GP task. */
+
+       /* End of fields guarded by root rcu_node's lock. */
+
+       raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
+                                               /* Protect following fields. */
+       struct rcu_head *orphan_nxtlist;        /* Orphaned callbacks that */
+                                               /*  need a grace period. */
+       struct rcu_head **orphan_nxttail;       /* Tail of above. */
+       struct rcu_head *orphan_donelist;       /* Orphaned callbacks that */
+                                               /*  are ready to invoke. */
+       struct rcu_head **orphan_donetail;      /* Tail of above. */
+       long qlen_lazy;                         /* Number of lazy callbacks. */
+       long qlen;                              /* Total number of callbacks. */
+       /* End of fields guarded by orphan_lock. */
+
+       struct mutex onoff_mutex;               /* Coordinate hotplug & GPs. */
+
+       struct mutex barrier_mutex;             /* Guards barrier fields. */
+       atomic_t barrier_cpu_count;             /* # CPUs waiting on. */
+       struct completion barrier_completion;   /* Wake at barrier end. */
+       unsigned long n_barrier_done;           /* ++ at start and end of */
+                                               /*  _rcu_barrier(). */
+       /* End of fields guarded by barrier_mutex. */
+
+       atomic_long_t expedited_start;          /* Starting ticket. */
+       atomic_long_t expedited_done;           /* Done ticket. */
+       atomic_long_t expedited_wrap;           /* # near-wrap incidents. */
+       atomic_long_t expedited_tryfail;        /* # acquisition failures. */
+       atomic_long_t expedited_workdone1;      /* # done by others #1. */
+       atomic_long_t expedited_workdone2;      /* # done by others #2. */
+       atomic_long_t expedited_normal;         /* # fallbacks to normal. */
+       atomic_long_t expedited_stoppedcpus;    /* # successful stop_cpus. */
+       atomic_long_t expedited_done_tries;     /* # tries to update _done. */
+       atomic_long_t expedited_done_lost;      /* # times beaten to _done. */
+       atomic_long_t expedited_done_exit;      /* # times exited _done loop. */
+
+       unsigned long jiffies_force_qs;         /* Time at which to invoke */
+                                               /*  force_quiescent_state(). */
+       unsigned long n_force_qs;               /* Number of calls to */
+                                               /*  force_quiescent_state(). */
+       unsigned long n_force_qs_lh;            /* ~Number of calls leaving */
+                                               /*  due to lock unavailable. */
+       unsigned long n_force_qs_ngp;           /* Number of calls leaving */
+                                               /*  due to no GP active. */
+       unsigned long gp_start;                 /* Time at which GP started, */
+                                               /*  but in jiffies. */
+       unsigned long jiffies_stall;            /* Time at which to check */
+                                               /*  for CPU stalls. */
+       unsigned long gp_max;                   /* Maximum GP duration in */
+                                               /*  jiffies. */
+       const char *name;                       /* Name of structure. */
+       char abbr;                              /* Abbreviated name. */
+       struct list_head flavors;               /* List of RCU flavors. */
+       struct irq_work wakeup_work;            /* Postponed wakeups */
+};
+
+/* Values for rcu_state structure's gp_flags field. */
+#define RCU_GP_FLAG_INIT 0x1   /* Need grace-period initialization. */
+#define RCU_GP_FLAG_FQS  0x2   /* Need grace-period quiescent-state forcing. */
+
+extern struct list_head rcu_struct_flavors;
+
+/* Sequence through rcu_state structures for each RCU flavor. */
+#define for_each_rcu_flavor(rsp) \
+       list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
+
+/* Return values for rcu_preempt_offline_tasks(). */
+
+#define RCU_OFL_TASKS_NORM_GP  0x1             /* Tasks blocking normal */
+                                               /*  GP were moved to root. */
+#define RCU_OFL_TASKS_EXP_GP   0x2             /* Tasks blocking expedited */
+                                               /*  GP were moved to root. */
+
+/*
+ * RCU implementation internal declarations:
+ */
+extern struct rcu_state rcu_sched_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
+
+extern struct rcu_state rcu_bh_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+#ifdef CONFIG_TREE_PREEMPT_RCU
+extern struct rcu_state rcu_preempt_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+#ifdef CONFIG_RCU_BOOST
+DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
+DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
+DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
+DECLARE_PER_CPU(char, rcu_cpu_has_work);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+#ifndef RCU_TREE_NONCORE
+
+/* Forward declarations for rcutree_plugin.h */
+static void rcu_bootup_announce(void);
+long rcu_batches_completed(void);
+static void rcu_preempt_note_context_switch(int cpu);
+static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
+                                     unsigned long flags);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_print_detail_task_stall(struct rcu_state *rsp);
+static int rcu_print_task_stall(struct rcu_node *rnp);
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_preempt_check_callbacks(int cpu);
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
+                              bool wake);
+#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
+static void __init __rcu_init_preempt(void);
+static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
+static void invoke_rcu_callbacks_kthread(void);
+static bool rcu_is_callbacks_kthread(void);
+#ifdef CONFIG_RCU_BOOST
+static void rcu_preempt_do_callbacks(void);
+static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+                                                struct rcu_node *rnp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+static void rcu_prepare_kthreads(int cpu);
+static void rcu_cleanup_after_idle(int cpu);
+static void rcu_prepare_for_idle(int cpu);
+static void rcu_idle_count_callbacks_posted(void);
+static void print_cpu_stall_info_begin(void);
+static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
+static void print_cpu_stall_info_end(void);
+static void zero_cpu_stall_ticks(struct rcu_data *rdp);
+static void increment_cpu_stall_ticks(void);
+static int rcu_nocb_needs_gp(struct rcu_state *rsp);
+static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
+static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
+static void rcu_init_one_nocb(struct rcu_node *rnp);
+static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
+                           bool lazy);
+static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
+                                     struct rcu_data *rdp);
+static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
+static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
+static void rcu_kick_nohz_cpu(int cpu);
+static bool init_nocb_callback_list(struct rcu_data *rdp);
+static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
+static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
+static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
+                                 unsigned long *maxj);
+static bool is_sysidle_rcu_state(struct rcu_state *rsp);
+static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
+                                 unsigned long maxj);
+static void rcu_bind_gp_kthread(void);
+static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
+
+#endif /* #ifndef RCU_TREE_NONCORE */
+
+#ifdef CONFIG_RCU_TRACE
+#ifdef CONFIG_RCU_NOCB_CPU
+/* Sum up queue lengths for tracing. */
+static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
+{
+       *ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
+       *qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
+}
+#else /* #ifdef CONFIG_RCU_NOCB_CPU */
+static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
+{
+       *ql = 0;
+       *qll = 0;
+}
+#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
new file mode 100644 (file)
index 0000000..3822ac0
--- /dev/null
@@ -0,0 +1,2831 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions that provide either classic
+ * or preemptible semantics.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright Red Hat, 2009
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Ingo Molnar <mingo@elte.hu>
+ *        Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include "../time/tick-internal.h"
+
+#define RCU_KTHREAD_PRIO 1
+
+#ifdef CONFIG_RCU_BOOST
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+#else
+#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
+#endif
+
+#ifdef CONFIG_RCU_NOCB_CPU
+static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
+static bool have_rcu_nocb_mask;            /* Was rcu_nocb_mask allocated? */
+static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
+static char __initdata nocb_buf[NR_CPUS * 5];
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+
+/*
+ * Check the RCU kernel configuration parameters and print informative
+ * messages about anything out of the ordinary.  If you like #ifdef, you
+ * will love this function.
+ */
+static void __init rcu_bootup_announce_oddness(void)
+{
+#ifdef CONFIG_RCU_TRACE
+       pr_info("\tRCU debugfs-based tracing is enabled.\n");
+#endif
+#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
+       pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
+              CONFIG_RCU_FANOUT);
+#endif
+#ifdef CONFIG_RCU_FANOUT_EXACT
+       pr_info("\tHierarchical RCU autobalancing is disabled.\n");
+#endif
+#ifdef CONFIG_RCU_FAST_NO_HZ
+       pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
+#endif
+#ifdef CONFIG_PROVE_RCU
+       pr_info("\tRCU lockdep checking is enabled.\n");
+#endif
+#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
+       pr_info("\tRCU torture testing starts during boot.\n");
+#endif
+#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
+       pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");
+#endif
+#if defined(CONFIG_RCU_CPU_STALL_INFO)
+       pr_info("\tAdditional per-CPU info printed with stalls.\n");
+#endif
+#if NUM_RCU_LVL_4 != 0
+       pr_info("\tFour-level hierarchy is enabled.\n");
+#endif
+       if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
+               pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
+       if (nr_cpu_ids != NR_CPUS)
+               pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
+#ifdef CONFIG_RCU_NOCB_CPU
+#ifndef CONFIG_RCU_NOCB_CPU_NONE
+       if (!have_rcu_nocb_mask) {
+               zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
+               have_rcu_nocb_mask = true;
+       }
+#ifdef CONFIG_RCU_NOCB_CPU_ZERO
+       pr_info("\tOffload RCU callbacks from CPU 0\n");
+       cpumask_set_cpu(0, rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
+#ifdef CONFIG_RCU_NOCB_CPU_ALL
+       pr_info("\tOffload RCU callbacks from all CPUs\n");
+       cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
+       if (have_rcu_nocb_mask) {
+               if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+                       pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+                       cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+                                   rcu_nocb_mask);
+               }
+               cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
+               pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
+               if (rcu_nocb_poll)
+                       pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
+       }
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+}
+
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
+RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
+static struct rcu_state *rcu_state = &rcu_preempt_state;
+
+static int rcu_preempted_readers_exp(struct rcu_node *rnp);
+
+/*
+ * Tell them what RCU they are running.
+ */
+static void __init rcu_bootup_announce(void)
+{
+       pr_info("Preemptible hierarchical RCU implementation.\n");
+       rcu_bootup_announce_oddness();
+}
+
+/*
+ * Return the number of RCU-preempt batches processed thus far
+ * for debug and statistics.
+ */
+long rcu_batches_completed_preempt(void)
+{
+       return rcu_preempt_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
+
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+       return rcu_batches_completed_preempt();
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+/*
+ * Force a quiescent state for preemptible RCU.
+ */
+void rcu_force_quiescent_state(void)
+{
+       force_quiescent_state(&rcu_preempt_state);
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
+ * Record a preemptible-RCU quiescent state for the specified CPU.  Note
+ * that this just means that the task currently running on the CPU is
+ * not in a quiescent state.  There might be any number of tasks blocked
+ * while in an RCU read-side critical section.
+ *
+ * Unlike the other rcu_*_qs() functions, callers to this function
+ * must disable irqs in order to protect the assignment to
+ * ->rcu_read_unlock_special.
+ */
+static void rcu_preempt_qs(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+
+       if (rdp->passed_quiesce == 0)
+               trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
+       rdp->passed_quiesce = 1;
+       current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+}
+
+/*
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from.  If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the blkd_tasks list.
+ * The task will dequeue itself when it exits the outermost enclosing
+ * RCU read-side critical section.  Therefore, the current grace period
+ * cannot be permitted to complete until the blkd_tasks list entries
+ * predating the current grace period drain, in other words, until
+ * rnp->gp_tasks becomes NULL.
+ *
+ * Caller must disable preemption.
+ */
+static void rcu_preempt_note_context_switch(int cpu)
+{
+       struct task_struct *t = current;
+       unsigned long flags;
+       struct rcu_data *rdp;
+       struct rcu_node *rnp;
+
+       if (t->rcu_read_lock_nesting > 0 &&
+           (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+
+               /* Possibly blocking in an RCU read-side critical section. */
+               rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
+               rnp = rdp->mynode;
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+               t->rcu_blocked_node = rnp;
+
+               /*
+                * If this CPU has already checked in, then this task
+                * will hold up the next grace period rather than the
+                * current grace period.  Queue the task accordingly.
+                * If the task is queued for the current grace period
+                * (i.e., this CPU has not yet passed through a quiescent
+                * state for the current grace period), then as long
+                * as that task remains queued, the current grace period
+                * cannot end.  Note that there is some uncertainty as
+                * to exactly when the current grace period started.
+                * We take a conservative approach, which can result
+                * in unnecessarily waiting on tasks that started very
+                * slightly after the current grace period began.  C'est
+                * la vie!!!
+                *
+                * But first, note that the current CPU must still be
+                * on line!
+                */
+               WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
+               WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
+               if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
+                       list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
+                       rnp->gp_tasks = &t->rcu_node_entry;
+#ifdef CONFIG_RCU_BOOST
+                       if (rnp->boost_tasks != NULL)
+                               rnp->boost_tasks = rnp->gp_tasks;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+               } else {
+                       list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
+                       if (rnp->qsmask & rdp->grpmask)
+                               rnp->gp_tasks = &t->rcu_node_entry;
+               }
+               trace_rcu_preempt_task(rdp->rsp->name,
+                                      t->pid,
+                                      (rnp->qsmask & rdp->grpmask)
+                                      ? rnp->gpnum
+                                      : rnp->gpnum + 1);
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       } else if (t->rcu_read_lock_nesting < 0 &&
+                  t->rcu_read_unlock_special) {
+
+               /*
+                * Complete exit from RCU read-side critical section on
+                * behalf of preempted instance of __rcu_read_unlock().
+                */
+               rcu_read_unlock_special(t);
+       }
+
+       /*
+        * Either we were not in an RCU read-side critical section to
+        * begin with, or we have now recorded that critical section
+        * globally.  Either way, we can now note a quiescent state
+        * for this CPU.  Again, if we were in an RCU read-side critical
+        * section, and if that critical section was blocking the current
+        * grace period, then the fact that the task has been enqueued
+        * means that we continue to block the current grace period.
+        */
+       local_irq_save(flags);
+       rcu_preempt_qs(cpu);
+       local_irq_restore(flags);
+}
+
+/*
+ * Check for preempted RCU readers blocking the current grace period
+ * for the specified rcu_node structure.  If the caller needs a reliable
+ * answer, it must hold the rcu_node's ->lock.
+ */
+static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
+{
+       return rnp->gp_tasks != NULL;
+}
+
+/*
+ * Record a quiescent state for all tasks that were previously queued
+ * on the specified rcu_node structure and that were blocking the current
+ * RCU grace period.  The caller must hold the specified rnp->lock with
+ * irqs disabled, and this lock is released upon return, but irqs remain
+ * disabled.
+ */
+static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
+       __releases(rnp->lock)
+{
+       unsigned long mask;
+       struct rcu_node *rnp_p;
+
+       if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return;  /* Still need more quiescent states! */
+       }
+
+       rnp_p = rnp->parent;
+       if (rnp_p == NULL) {
+               /*
+                * Either there is only one rcu_node in the tree,
+                * or tasks were kicked up to root rcu_node due to
+                * CPUs going offline.
+                */
+               rcu_report_qs_rsp(&rcu_preempt_state, flags);
+               return;
+       }
+
+       /* Report up the rest of the hierarchy. */
+       mask = rnp->grpmask;
+       raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
+       raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
+       rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
+}
+
+/*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t,
+                                            struct rcu_node *rnp)
+{
+       struct list_head *np;
+
+       np = t->rcu_node_entry.next;
+       if (np == &rnp->blkd_tasks)
+               np = NULL;
+       return np;
+}
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
+void rcu_read_unlock_special(struct task_struct *t)
+{
+       int empty;
+       int empty_exp;
+       int empty_exp_now;
+       unsigned long flags;
+       struct list_head *np;
+#ifdef CONFIG_RCU_BOOST
+       struct rt_mutex *rbmp = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+       struct rcu_node *rnp;
+       int special;
+
+       /* NMI handlers cannot block and cannot safely manipulate state. */
+       if (in_nmi())
+               return;
+
+       local_irq_save(flags);
+
+       /*
+        * If RCU core is waiting for this CPU to exit critical section,
+        * let it know that we have done so.
+        */
+       special = t->rcu_read_unlock_special;
+       if (special & RCU_READ_UNLOCK_NEED_QS) {
+               rcu_preempt_qs(smp_processor_id());
+       }
+
+       /* Hardware IRQ handlers cannot block. */
+       if (in_irq() || in_serving_softirq()) {
+               local_irq_restore(flags);
+               return;
+       }
+
+       /* Clean up if blocked during RCU read-side critical section. */
+       if (special & RCU_READ_UNLOCK_BLOCKED) {
+               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+
+               /*
+                * Remove this task from the list it blocked on.  The
+                * task can migrate while we acquire the lock, but at
+                * most one time.  So at most two passes through loop.
+                */
+               for (;;) {
+                       rnp = t->rcu_blocked_node;
+                       raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
+                       if (rnp == t->rcu_blocked_node)
+                               break;
+                       raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+               }
+               empty = !rcu_preempt_blocked_readers_cgp(rnp);
+               empty_exp = !rcu_preempted_readers_exp(rnp);
+               smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
+               np = rcu_next_node_entry(t, rnp);
+               list_del_init(&t->rcu_node_entry);
+               t->rcu_blocked_node = NULL;
+               trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
+                                               rnp->gpnum, t->pid);
+               if (&t->rcu_node_entry == rnp->gp_tasks)
+                       rnp->gp_tasks = np;
+               if (&t->rcu_node_entry == rnp->exp_tasks)
+                       rnp->exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+               if (&t->rcu_node_entry == rnp->boost_tasks)
+                       rnp->boost_tasks = np;
+               /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
+               if (t->rcu_boost_mutex) {
+                       rbmp = t->rcu_boost_mutex;
+                       t->rcu_boost_mutex = NULL;
+               }
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+               /*
+                * If this was the last task on the current list, and if
+                * we aren't waiting on any CPUs, report the quiescent state.
+                * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
+                * so we must take a snapshot of the expedited state.
+                */
+               empty_exp_now = !rcu_preempted_readers_exp(rnp);
+               if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
+                       trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
+                                                        rnp->gpnum,
+                                                        0, rnp->qsmask,
+                                                        rnp->level,
+                                                        rnp->grplo,
+                                                        rnp->grphi,
+                                                        !!rnp->gp_tasks);
+                       rcu_report_unblock_qs_rnp(rnp, flags);
+               } else {
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               }
+
+#ifdef CONFIG_RCU_BOOST
+               /* Unboost if we were boosted. */
+               if (rbmp)
+                       rt_mutex_unlock(rbmp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+               /*
+                * If this was the last task on the expedited lists,
+                * then we need to report up the rcu_node hierarchy.
+                */
+               if (!empty_exp && empty_exp_now)
+                       rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
+       } else {
+               local_irq_restore(flags);
+       }
+}
+
+#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period on the specified rcu_node structure.
+ */
+static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+       unsigned long flags;
+       struct task_struct *t;
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       if (!rcu_preempt_blocked_readers_cgp(rnp)) {
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return;
+       }
+       t = list_entry(rnp->gp_tasks,
+                      struct task_struct, rcu_node_entry);
+       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
+               sched_show_task(t);
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period.
+ */
+static void rcu_print_detail_task_stall(struct rcu_state *rsp)
+{
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       rcu_print_detail_task_stall_rnp(rnp);
+       rcu_for_each_leaf_node(rsp, rnp)
+               rcu_print_detail_task_stall_rnp(rnp);
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
+
+static void rcu_print_detail_task_stall(struct rcu_state *rsp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
+
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+
+static void rcu_print_task_stall_begin(struct rcu_node *rnp)
+{
+       pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
+              rnp->level, rnp->grplo, rnp->grphi);
+}
+
+static void rcu_print_task_stall_end(void)
+{
+       pr_cont("\n");
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+static void rcu_print_task_stall_begin(struct rcu_node *rnp)
+{
+}
+
+static void rcu_print_task_stall_end(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each.
+ */
+static int rcu_print_task_stall(struct rcu_node *rnp)
+{
+       struct task_struct *t;
+       int ndetected = 0;
+
+       if (!rcu_preempt_blocked_readers_cgp(rnp))
+               return 0;
+       rcu_print_task_stall_begin(rnp);
+       t = list_entry(rnp->gp_tasks,
+                      struct task_struct, rcu_node_entry);
+       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+               pr_cont(" P%d", t->pid);
+               ndetected++;
+       }
+       rcu_print_task_stall_end();
+       return ndetected;
+}
+
+/*
+ * Check that the list of blocked tasks for the newly completed grace
+ * period is in fact empty.  It is a serious bug to complete a grace
+ * period that still has RCU readers blocked!  This function must be
+ * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
+ * must be held by the caller.
+ *
+ * Also, if there are blocked tasks on the list, they automatically
+ * block the newly created grace period, so set up ->gp_tasks accordingly.
+ */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
+{
+       WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
+       if (!list_empty(&rnp->blkd_tasks))
+               rnp->gp_tasks = rnp->blkd_tasks.next;
+       WARN_ON_ONCE(rnp->qsmask);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Handle tasklist migration for case in which all CPUs covered by the
+ * specified rcu_node have gone offline.  Move them up to the root
+ * rcu_node.  The reason for not just moving them to the immediate
+ * parent is to remove the need for rcu_read_unlock_special() to
+ * make more than two attempts to acquire the target rcu_node's lock.
+ * Returns true if there were tasks blocking the current RCU grace
+ * period.
+ *
+ * Returns 1 if there was previously a task blocking the current grace
+ * period on the specified rcu_node structure.
+ *
+ * The caller must hold rnp->lock with irqs disabled.
+ */
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp)
+{
+       struct list_head *lp;
+       struct list_head *lp_root;
+       int retval = 0;
+       struct rcu_node *rnp_root = rcu_get_root(rsp);
+       struct task_struct *t;
+
+       if (rnp == rnp_root) {
+               WARN_ONCE(1, "Last CPU thought to be offlined?");
+               return 0;  /* Shouldn't happen: at least one CPU online. */
+       }
+
+       /* If we are on an internal node, complain bitterly. */
+       WARN_ON_ONCE(rnp != rdp->mynode);
+
+       /*
+        * Move tasks up to root rcu_node.  Don't try to get fancy for
+        * this corner-case operation -- just put this node's tasks
+        * at the head of the root node's list, and update the root node's
+        * ->gp_tasks and ->exp_tasks pointers to those of this node's,
+        * if non-NULL.  This might result in waiting for more tasks than
+        * absolutely necessary, but this is a good performance/complexity
+        * tradeoff.
+        */
+       if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
+               retval |= RCU_OFL_TASKS_NORM_GP;
+       if (rcu_preempted_readers_exp(rnp))
+               retval |= RCU_OFL_TASKS_EXP_GP;
+       lp = &rnp->blkd_tasks;
+       lp_root = &rnp_root->blkd_tasks;
+       while (!list_empty(lp)) {
+               t = list_entry(lp->next, typeof(*t), rcu_node_entry);
+               raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
+               list_del(&t->rcu_node_entry);
+               t->rcu_blocked_node = rnp_root;
+               list_add(&t->rcu_node_entry, lp_root);
+               if (&t->rcu_node_entry == rnp->gp_tasks)
+                       rnp_root->gp_tasks = rnp->gp_tasks;
+               if (&t->rcu_node_entry == rnp->exp_tasks)
+                       rnp_root->exp_tasks = rnp->exp_tasks;
+#ifdef CONFIG_RCU_BOOST
+               if (&t->rcu_node_entry == rnp->boost_tasks)
+                       rnp_root->boost_tasks = rnp->boost_tasks;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+               raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
+       }
+
+       rnp->gp_tasks = NULL;
+       rnp->exp_tasks = NULL;
+#ifdef CONFIG_RCU_BOOST
+       rnp->boost_tasks = NULL;
+       /*
+        * In case root is being boosted and leaf was not.  Make sure
+        * that we boost the tasks blocking the current grace period
+        * in this case.
+        */
+       raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
+       if (rnp_root->boost_tasks != NULL &&
+           rnp_root->boost_tasks != rnp_root->gp_tasks &&
+           rnp_root->boost_tasks != rnp_root->exp_tasks)
+               rnp_root->boost_tasks = rnp_root->gp_tasks;
+       raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+       return retval;
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Check for a quiescent state from the current CPU.  When a task blocks,
+ * the task is recorded in the corresponding CPU's rcu_node structure,
+ * which is checked elsewhere.
+ *
+ * Caller must disable hard irqs.
+ */
+static void rcu_preempt_check_callbacks(int cpu)
+{
+       struct task_struct *t = current;
+
+       if (t->rcu_read_lock_nesting == 0) {
+               rcu_preempt_qs(cpu);
+               return;
+       }
+       if (t->rcu_read_lock_nesting > 0 &&
+           per_cpu(rcu_preempt_data, cpu).qs_pending)
+               t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+}
+
+#ifdef CONFIG_RCU_BOOST
+
+static void rcu_preempt_do_callbacks(void)
+{
+       rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
+}
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * Queue a preemptible-RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_preempt_state, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Queue an RCU callback for lazy invocation after a grace period.
+ * This will likely be later named something like "call_rcu_lazy()",
+ * but this change will require some way of tagging the lazy RCU
+ * callbacks in the list of pending callbacks.  Until then, this
+ * function may only be called from __kfree_rcu().
+ */
+void kfree_call_rcu(struct rcu_head *head,
+                   void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_preempt_state, -1, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+
+/**
+ * synchronize_rcu - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed.  Note, however, that
+ * upon return from synchronize_rcu(), the caller might well be executing
+ * concurrently with new RCU read-side critical sections that began while
+ * synchronize_rcu() was waiting.  RCU read-side critical sections are
+ * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
+ */
+void synchronize_rcu(void)
+{
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_rcu() in RCU read-side critical section");
+       if (!rcu_scheduler_active)
+               return;
+       if (rcu_expedited)
+               synchronize_rcu_expedited();
+       else
+               wait_rcu_gp(call_rcu);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu);
+
+static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
+static unsigned long sync_rcu_preempt_exp_count;
+static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
+
+/*
+ * Return non-zero if there are any tasks in RCU read-side critical
+ * sections blocking the current preemptible-RCU expedited grace period.
+ * If there is no preemptible-RCU expedited grace period currently in
+ * progress, returns zero unconditionally.
+ */
+static int rcu_preempted_readers_exp(struct rcu_node *rnp)
+{
+       return rnp->exp_tasks != NULL;
+}
+
+/*
+ * return non-zero if there is no RCU expedited grace period in progress
+ * for the specified rcu_node structure, in other words, if all CPUs and
+ * tasks covered by the specified rcu_node structure have done their bit
+ * for the current expedited grace period.  Works only for preemptible
+ * RCU -- other RCU implementation use other means.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex.
+ */
+static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
+{
+       return !rcu_preempted_readers_exp(rnp) &&
+              ACCESS_ONCE(rnp->expmask) == 0;
+}
+
+/*
+ * Report the exit from RCU read-side critical section for the last task
+ * that queued itself during or before the current expedited preemptible-RCU
+ * grace period.  This event is reported either to the rcu_node structure on
+ * which the task was queued or to one of that rcu_node structure's ancestors,
+ * recursively up the tree.  (Calm down, calm down, we do the recursion
+ * iteratively!)
+ *
+ * Most callers will set the "wake" flag, but the task initiating the
+ * expedited grace period need not wake itself.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex.
+ */
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
+                              bool wake)
+{
+       unsigned long flags;
+       unsigned long mask;
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       for (;;) {
+               if (!sync_rcu_preempt_exp_done(rnp)) {
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                       break;
+               }
+               if (rnp->parent == NULL) {
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                       if (wake)
+                               wake_up(&sync_rcu_preempt_exp_wq);
+                       break;
+               }
+               mask = rnp->grpmask;
+               raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+               rnp = rnp->parent;
+               raw_spin_lock(&rnp->lock); /* irqs already disabled */
+               rnp->expmask &= ~mask;
+       }
+}
+
+/*
+ * Snapshot the tasks blocking the newly started preemptible-RCU expedited
+ * grace period for the specified rcu_node structure.  If there are no such
+ * tasks, report it up the rcu_node hierarchy.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
+ * CPU hotplug operations.
+ */
+static void
+sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+       unsigned long flags;
+       int must_wait = 0;
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       if (list_empty(&rnp->blkd_tasks)) {
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       } else {
+               rnp->exp_tasks = rnp->blkd_tasks.next;
+               rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
+               must_wait = 1;
+       }
+       if (!must_wait)
+               rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
+}
+
+/**
+ * synchronize_rcu_expedited - Brute-force RCU grace period
+ *
+ * Wait for an RCU-preempt grace period, but expedite it.  The basic
+ * idea is to invoke synchronize_sched_expedited() to push all the tasks to
+ * the ->blkd_tasks lists and wait for this list to drain.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.
+ * In fact, if you are using synchronize_rcu_expedited() in a loop,
+ * please restructure your code to batch your updates, and then Use a
+ * single synchronize_rcu() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.
+ */
+void synchronize_rcu_expedited(void)
+{
+       unsigned long flags;
+       struct rcu_node *rnp;
+       struct rcu_state *rsp = &rcu_preempt_state;
+       unsigned long snap;
+       int trycount = 0;
+
+       smp_mb(); /* Caller's modifications seen first by other CPUs. */
+       snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
+       smp_mb(); /* Above access cannot bleed into critical section. */
+
+       /*
+        * Block CPU-hotplug operations.  This means that any CPU-hotplug
+        * operation that finds an rcu_node structure with tasks in the
+        * process of being boosted will know that all tasks blocking
+        * this expedited grace period will already be in the process of
+        * being boosted.  This simplifies the process of moving tasks
+        * from leaf to root rcu_node structures.
+        */
+       get_online_cpus();
+
+       /*
+        * Acquire lock, falling back to synchronize_rcu() if too many
+        * lock-acquisition failures.  Of course, if someone does the
+        * expedited grace period for us, just leave.
+        */
+       while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
+               if (ULONG_CMP_LT(snap,
+                   ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
+                       put_online_cpus();
+                       goto mb_ret; /* Others did our work for us. */
+               }
+               if (trycount++ < 10) {
+                       udelay(trycount * num_online_cpus());
+               } else {
+                       put_online_cpus();
+                       wait_rcu_gp(call_rcu);
+                       return;
+               }
+       }
+       if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
+               put_online_cpus();
+               goto unlock_mb_ret; /* Others did our work for us. */
+       }
+
+       /* force all RCU readers onto ->blkd_tasks lists. */
+       synchronize_sched_expedited();
+
+       /* Initialize ->expmask for all non-leaf rcu_node structures. */
+       rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               rnp->expmask = rnp->qsmaskinit;
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       }
+
+       /* Snapshot current state of ->blkd_tasks lists. */
+       rcu_for_each_leaf_node(rsp, rnp)
+               sync_rcu_preempt_exp_init(rsp, rnp);
+       if (NUM_RCU_NODES > 1)
+               sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
+
+       put_online_cpus();
+
+       /* Wait for snapshotted ->blkd_tasks lists to drain. */
+       rnp = rcu_get_root(rsp);
+       wait_event(sync_rcu_preempt_exp_wq,
+                  sync_rcu_preempt_exp_done(rnp));
+
+       /* Clean up and exit. */
+       smp_mb(); /* ensure expedited GP seen before counter increment. */
+       ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
+unlock_mb_ret:
+       mutex_unlock(&sync_rcu_preempt_exp_mutex);
+mb_ret:
+       smp_mb(); /* ensure subsequent action seen after grace period. */
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete.  For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
+ */
+void rcu_barrier(void)
+{
+       _rcu_barrier(&rcu_preempt_state);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
+ * Initialize preemptible RCU's state structures.
+ */
+static void __init __rcu_init_preempt(void)
+{
+       rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
+}
+
+/*
+ * Check for a task exiting while in a preemptible-RCU read-side
+ * critical section, clean up if so.  No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+       struct task_struct *t = current;
+
+       if (likely(list_empty(&current->rcu_node_entry)))
+               return;
+       t->rcu_read_lock_nesting = 1;
+       barrier();
+       t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+       __rcu_read_unlock();
+}
+
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+static struct rcu_state *rcu_state = &rcu_sched_state;
+
+/*
+ * Tell them what RCU they are running.
+ */
+static void __init rcu_bootup_announce(void)
+{
+       pr_info("Hierarchical RCU implementation.\n");
+       rcu_bootup_announce_oddness();
+}
+
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+       return rcu_batches_completed_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+/*
+ * Force a quiescent state for RCU, which, because there is no preemptible
+ * RCU, becomes the same as rcu-sched.
+ */
+void rcu_force_quiescent_state(void)
+{
+       rcu_sched_force_quiescent_state();
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * CPUs being in quiescent states.
+ */
+static void rcu_preempt_note_context_switch(int cpu)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, there are never any preempted
+ * RCU readers.
+ */
+static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
+{
+       return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Because preemptible RCU does not exist, no quieting of tasks. */
+static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
+{
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static void rcu_print_detail_task_stall(struct rcu_state *rsp)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static int rcu_print_task_stall(struct rcu_node *rnp)
+{
+       return 0;
+}
+
+/*
+ * Because there is no preemptible RCU, there can be no readers blocked,
+ * so there is no need to check for blocked tasks.  So check only for
+ * bogus qsmask values.
+ */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
+{
+       WARN_ON_ONCE(rnp->qsmask);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Because preemptible RCU does not exist, it never needs to migrate
+ * tasks that were blocked within RCU read-side critical sections, and
+ * such non-existent tasks cannot possibly have been blocking the current
+ * grace period.
+ */
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp)
+{
+       return 0;
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to check.
+ */
+static void rcu_preempt_check_callbacks(int cpu)
+{
+}
+
+/*
+ * Queue an RCU callback for lazy invocation after a grace period.
+ * This will likely be later named something like "call_rcu_lazy()",
+ * but this change will require some way of tagging the lazy RCU
+ * callbacks in the list of pending callbacks.  Until then, this
+ * function may only be called from __kfree_rcu().
+ *
+ * Because there is no preemptible RCU, we use RCU-sched instead.
+ */
+void kfree_call_rcu(struct rcu_head *head,
+                   void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_sched_state, -1, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+
+/*
+ * Wait for an rcu-preempt grace period, but make it happen quickly.
+ * But because preemptible RCU does not exist, map to rcu-sched.
+ */
+void synchronize_rcu_expedited(void)
+{
+       synchronize_sched_expedited();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Because preemptible RCU does not exist, there is never any need to
+ * report on tasks preempted in RCU read-side critical sections during
+ * expedited RCU grace periods.
+ */
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
+                              bool wake)
+{
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Because preemptible RCU does not exist, rcu_barrier() is just
+ * another name for rcu_barrier_sched().
+ */
+void rcu_barrier(void)
+{
+       rcu_barrier_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
+ * Because preemptible RCU does not exist, it need not be initialized.
+ */
+static void __init __rcu_init_preempt(void)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, tasks cannot possibly exit
+ * while in preemptible RCU read-side critical sections.
+ */
+void exit_rcu(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+#ifdef CONFIG_RCU_BOOST
+
+#include "../rtmutex_common.h"
+
+#ifdef CONFIG_RCU_TRACE
+
+static void rcu_initiate_boost_trace(struct rcu_node *rnp)
+{
+       if (list_empty(&rnp->blkd_tasks))
+               rnp->n_balk_blkd_tasks++;
+       else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
+               rnp->n_balk_exp_gp_tasks++;
+       else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
+               rnp->n_balk_boost_tasks++;
+       else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
+               rnp->n_balk_notblocked++;
+       else if (rnp->gp_tasks != NULL &&
+                ULONG_CMP_LT(jiffies, rnp->boost_time))
+               rnp->n_balk_notyet++;
+       else
+               rnp->n_balk_nos++;
+}
+
+#else /* #ifdef CONFIG_RCU_TRACE */
+
+static void rcu_initiate_boost_trace(struct rcu_node *rnp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+       /*
+        * If the thread is yielding, only wake it when this
+        * is invoked from idle
+        */
+       if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
+               wake_up_process(t);
+}
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->exp_tasks
+ * or ->boost_tasks, advancing the pointer to the next task in the
+ * ->blkd_tasks list.
+ *
+ * Note that irqs must be enabled: boosting the task can block.
+ * Returns 1 if there are more tasks needing to be boosted.
+ */
+static int rcu_boost(struct rcu_node *rnp)
+{
+       unsigned long flags;
+       struct rt_mutex mtx;
+       struct task_struct *t;
+       struct list_head *tb;
+
+       if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
+               return 0;  /* Nothing left to boost. */
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+
+       /*
+        * Recheck under the lock: all tasks in need of boosting
+        * might exit their RCU read-side critical sections on their own.
+        */
+       if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return 0;
+       }
+
+       /*
+        * Preferentially boost tasks blocking expedited grace periods.
+        * This cannot starve the normal grace periods because a second
+        * expedited grace period must boost all blocked tasks, including
+        * those blocking the pre-existing normal grace period.
+        */
+       if (rnp->exp_tasks != NULL) {
+               tb = rnp->exp_tasks;
+               rnp->n_exp_boosts++;
+       } else {
+               tb = rnp->boost_tasks;
+               rnp->n_normal_boosts++;
+       }
+       rnp->n_tasks_boosted++;
+
+       /*
+        * We boost task t by manufacturing an rt_mutex that appears to
+        * be held by task t.  We leave a pointer to that rt_mutex where
+        * task t can find it, and task t will release the mutex when it
+        * exits its outermost RCU read-side critical section.  Then
+        * simply acquiring this artificial rt_mutex will boost task
+        * t's priority.  (Thanks to tglx for suggesting this approach!)
+        *
+        * Note that task t must acquire rnp->lock to remove itself from
+        * the ->blkd_tasks list, which it will do from exit() if from
+        * nowhere else.  We therefore are guaranteed that task t will
+        * stay around at least until we drop rnp->lock.  Note that
+        * rnp->lock also resolves races between our priority boosting
+        * and task t's exiting its outermost RCU read-side critical
+        * section.
+        */
+       t = container_of(tb, struct task_struct, rcu_node_entry);
+       rt_mutex_init_proxy_locked(&mtx, t);
+       t->rcu_boost_mutex = &mtx;
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
+       rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
+
+       return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
+              ACCESS_ONCE(rnp->boost_tasks) != NULL;
+}
+
+/*
+ * Priority-boosting kthread.  One per leaf rcu_node and one for the
+ * root rcu_node.
+ */
+static int rcu_boost_kthread(void *arg)
+{
+       struct rcu_node *rnp = (struct rcu_node *)arg;
+       int spincnt = 0;
+       int more2boost;
+
+       trace_rcu_utilization(TPS("Start boost kthread@init"));
+       for (;;) {
+               rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
+               trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
+               rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
+               trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
+               rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
+               more2boost = rcu_boost(rnp);
+               if (more2boost)
+                       spincnt++;
+               else
+                       spincnt = 0;
+               if (spincnt > 10) {
+                       rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
+                       trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
+                       schedule_timeout_interruptible(2);
+                       trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
+                       spincnt = 0;
+               }
+       }
+       /* NOTREACHED */
+       trace_rcu_utilization(TPS("End boost kthread@notreached"));
+       return 0;
+}
+
+/*
+ * Check to see if it is time to start boosting RCU readers that are
+ * blocking the current grace period, and, if so, tell the per-rcu_node
+ * kthread to start boosting them.  If there is an expedited grace
+ * period in progress, it is always time to boost.
+ *
+ * The caller must hold rnp->lock, which this function releases.
+ * The ->boost_kthread_task is immortal, so we don't need to worry
+ * about it going away.
+ */
+static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
+{
+       struct task_struct *t;
+
+       if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
+               rnp->n_balk_exp_gp_tasks++;
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return;
+       }
+       if (rnp->exp_tasks != NULL ||
+           (rnp->gp_tasks != NULL &&
+            rnp->boost_tasks == NULL &&
+            rnp->qsmask == 0 &&
+            ULONG_CMP_GE(jiffies, rnp->boost_time))) {
+               if (rnp->exp_tasks == NULL)
+                       rnp->boost_tasks = rnp->gp_tasks;
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               t = rnp->boost_kthread_task;
+               if (t)
+                       rcu_wake_cond(t, rnp->boost_kthread_status);
+       } else {
+               rcu_initiate_boost_trace(rnp);
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       }
+}
+
+/*
+ * Wake up the per-CPU kthread to invoke RCU callbacks.
+ */
+static void invoke_rcu_callbacks_kthread(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __this_cpu_write(rcu_cpu_has_work, 1);
+       if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
+           current != __this_cpu_read(rcu_cpu_kthread_task)) {
+               rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
+                             __this_cpu_read(rcu_cpu_kthread_status));
+       }
+       local_irq_restore(flags);
+}
+
+/*
+ * Is the current CPU running the RCU-callbacks kthread?
+ * Caller must have preemption disabled.
+ */
+static bool rcu_is_callbacks_kthread(void)
+{
+       return __this_cpu_read(rcu_cpu_kthread_task) == current;
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
+{
+       rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+}
+
+/*
+ * Create an RCU-boost kthread for the specified node if one does not
+ * already exist.  We only create this kthread for preemptible RCU.
+ * Returns zero if all is well, a negated errno otherwise.
+ */
+static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+                                                struct rcu_node *rnp)
+{
+       int rnp_index = rnp - &rsp->node[0];
+       unsigned long flags;
+       struct sched_param sp;
+       struct task_struct *t;
+
+       if (&rcu_preempt_state != rsp)
+               return 0;
+
+       if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
+               return 0;
+
+       rsp->boost = 1;
+       if (rnp->boost_kthread_task != NULL)
+               return 0;
+       t = kthread_create(rcu_boost_kthread, (void *)rnp,
+                          "rcub/%d", rnp_index);
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       rnp->boost_kthread_task = t;
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       sp.sched_priority = RCU_BOOST_PRIO;
+       sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+       wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
+       return 0;
+}
+
+static void rcu_kthread_do_work(void)
+{
+       rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
+       rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
+       rcu_preempt_do_callbacks();
+}
+
+static void rcu_cpu_kthread_setup(unsigned int cpu)
+{
+       struct sched_param sp;
+
+       sp.sched_priority = RCU_KTHREAD_PRIO;
+       sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+}
+
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+       per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+       return __this_cpu_read(rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * RCU softirq used in flavors and configurations of RCU that do not
+ * support RCU priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+       unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
+       char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
+       int spincnt;
+
+       for (spincnt = 0; spincnt < 10; spincnt++) {
+               trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+               local_bh_disable();
+               *statusp = RCU_KTHREAD_RUNNING;
+               this_cpu_inc(rcu_cpu_kthread_loops);
+               local_irq_disable();
+               work = *workp;
+               *workp = 0;
+               local_irq_enable();
+               if (work)
+                       rcu_kthread_do_work();
+               local_bh_enable();
+               if (*workp == 0) {
+                       trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+                       *statusp = RCU_KTHREAD_WAITING;
+                       return;
+               }
+       }
+       *statusp = RCU_KTHREAD_YIELDING;
+       trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+       schedule_timeout_interruptible(2);
+       trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+       *statusp = RCU_KTHREAD_WAITING;
+}
+
+/*
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question.  The CPU hotplug lock is still
+ * held, so the value of rnp->qsmaskinit will be stable.
+ *
+ * We don't include outgoingcpu in the affinity set, use -1 if there is
+ * no outgoing CPU.  If there are no CPUs left in the affinity set,
+ * this function allows the kthread to execute on any CPU.
+ */
+static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+       struct task_struct *t = rnp->boost_kthread_task;
+       unsigned long mask = rnp->qsmaskinit;
+       cpumask_var_t cm;
+       int cpu;
+
+       if (!t)
+               return;
+       if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
+               return;
+       for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
+               if ((mask & 0x1) && cpu != outgoingcpu)
+                       cpumask_set_cpu(cpu, cm);
+       if (cpumask_weight(cm) == 0) {
+               cpumask_setall(cm);
+               for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
+                       cpumask_clear_cpu(cpu, cm);
+               WARN_ON_ONCE(cpumask_weight(cm) == 0);
+       }
+       set_cpus_allowed_ptr(t, cm);
+       free_cpumask_var(cm);
+}
+
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+       .store                  = &rcu_cpu_kthread_task,
+       .thread_should_run      = rcu_cpu_kthread_should_run,
+       .thread_fn              = rcu_cpu_kthread,
+       .thread_comm            = "rcuc/%u",
+       .setup                  = rcu_cpu_kthread_setup,
+       .park                   = rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn all kthreads -- called as soon as the scheduler is running.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+       struct rcu_node *rnp;
+       int cpu;
+
+       rcu_scheduler_fully_active = 1;
+       for_each_possible_cpu(cpu)
+               per_cpu(rcu_cpu_has_work, cpu) = 0;
+       BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
+       rnp = rcu_get_root(rcu_state);
+       (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
+       if (NUM_RCU_NODES > 1) {
+               rcu_for_each_leaf_node(rcu_state, rnp)
+                       (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
+       }
+       return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+
+static void rcu_prepare_kthreads(int cpu)
+{
+       struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+       struct rcu_node *rnp = rdp->mynode;
+
+       /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
+       if (rcu_scheduler_fully_active)
+               (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
+{
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+static void invoke_rcu_callbacks_kthread(void)
+{
+       WARN_ON_ONCE(1);
+}
+
+static bool rcu_is_callbacks_kthread(void)
+{
+       return false;
+}
+
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
+{
+}
+
+static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+}
+
+static int __init rcu_scheduler_really_started(void)
+{
+       rcu_scheduler_fully_active = 1;
+       return 0;
+}
+early_initcall(rcu_scheduler_really_started);
+
+static void rcu_prepare_kthreads(int cpu)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+#if !defined(CONFIG_RCU_FAST_NO_HZ)
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
+ * any flavor of RCU.
+ */
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
+{
+       *delta_jiffies = ULONG_MAX;
+       return rcu_cpu_has_callbacks(cpu, NULL);
+}
+
+/*
+ * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
+ * after it.
+ */
+static void rcu_cleanup_after_idle(int cpu)
+{
+}
+
+/*
+ * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
+ * is nothing.
+ */
+static void rcu_prepare_for_idle(int cpu)
+{
+}
+
+/*
+ * Don't bother keeping a running count of the number of RCU callbacks
+ * posted because CONFIG_RCU_FAST_NO_HZ=n.
+ */
+static void rcu_idle_count_callbacks_posted(void)
+{
+}
+
+#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
+
+/*
+ * This code is invoked when a CPU goes idle, at which point we want
+ * to have the CPU do everything required for RCU so that it can enter
+ * the energy-efficient dyntick-idle mode.  This is handled by a
+ * state machine implemented by rcu_prepare_for_idle() below.
+ *
+ * The following three proprocessor symbols control this state machine:
+ *
+ * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
+ *     to sleep in dyntick-idle mode with RCU callbacks pending.  This
+ *     is sized to be roughly one RCU grace period.  Those energy-efficiency
+ *     benchmarkers who might otherwise be tempted to set this to a large
+ *     number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
+ *     system.  And if you are -that- concerned about energy efficiency,
+ *     just power the system down and be done with it!
+ * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
+ *     permitted to sleep in dyntick-idle mode with only lazy RCU
+ *     callbacks pending.  Setting this too high can OOM your system.
+ *
+ * The values below work well in practice.  If future workloads require
+ * adjustment, they can be converted into kernel config parameters, though
+ * making the state machine smarter might be a better option.
+ */
+#define RCU_IDLE_GP_DELAY 4            /* Roughly one grace period. */
+#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)        /* Roughly six seconds. */
+
+static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
+module_param(rcu_idle_gp_delay, int, 0644);
+static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
+module_param(rcu_idle_lazy_gp_delay, int, 0644);
+
+extern int tick_nohz_enabled;
+
+/*
+ * Try to advance callbacks for all flavors of RCU on the current CPU, but
+ * only if it has been awhile since the last time we did so.  Afterwards,
+ * if there are any callbacks ready for immediate invocation, return true.
+ */
+static bool rcu_try_advance_all_cbs(void)
+{
+       bool cbs_ready = false;
+       struct rcu_data *rdp;
+       struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+       struct rcu_node *rnp;
+       struct rcu_state *rsp;
+
+       /* Exit early if we advanced recently. */
+       if (jiffies == rdtp->last_advance_all)
+               return 0;
+       rdtp->last_advance_all = jiffies;
+
+       for_each_rcu_flavor(rsp) {
+               rdp = this_cpu_ptr(rsp->rda);
+               rnp = rdp->mynode;
+
+               /*
+                * Don't bother checking unless a grace period has
+                * completed since we last checked and there are
+                * callbacks not yet ready to invoke.
+                */
+               if (rdp->completed != rnp->completed &&
+                   rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
+                       note_gp_changes(rsp, rdp);
+
+               if (cpu_has_callbacks_ready_to_invoke(rdp))
+                       cbs_ready = true;
+       }
+       return cbs_ready;
+}
+
+/*
+ * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
+ * to invoke.  If the CPU has callbacks, try to advance them.  Tell the
+ * caller to set the timeout based on whether or not there are non-lazy
+ * callbacks.
+ *
+ * The caller must have disabled interrupts.
+ */
+int rcu_needs_cpu(int cpu, unsigned long *dj)
+{
+       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+       /* Snapshot to detect later posting of non-lazy callback. */
+       rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
+
+       /* If no callbacks, RCU doesn't need the CPU. */
+       if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
+               *dj = ULONG_MAX;
+               return 0;
+       }
+
+       /* Attempt to advance callbacks. */
+       if (rcu_try_advance_all_cbs()) {
+               /* Some ready to invoke, so initiate later invocation. */
+               invoke_rcu_core();
+               return 1;
+       }
+       rdtp->last_accelerate = jiffies;
+
+       /* Request timer delay depending on laziness, and round. */
+       if (!rdtp->all_lazy) {
+               *dj = round_up(rcu_idle_gp_delay + jiffies,
+                              rcu_idle_gp_delay) - jiffies;
+       } else {
+               *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
+       }
+       return 0;
+}
+
+/*
+ * Prepare a CPU for idle from an RCU perspective.  The first major task
+ * is to sense whether nohz mode has been enabled or disabled via sysfs.
+ * The second major task is to check to see if a non-lazy callback has
+ * arrived at a CPU that previously had only lazy callbacks.  The third
+ * major task is to accelerate (that is, assign grace-period numbers to)
+ * any recently arrived callbacks.
+ *
+ * The caller must have disabled interrupts.
+ */
+static void rcu_prepare_for_idle(int cpu)
+{
+       struct rcu_data *rdp;
+       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+       struct rcu_node *rnp;
+       struct rcu_state *rsp;
+       int tne;
+
+       /* Handle nohz enablement switches conservatively. */
+       tne = ACCESS_ONCE(tick_nohz_enabled);
+       if (tne != rdtp->tick_nohz_enabled_snap) {
+               if (rcu_cpu_has_callbacks(cpu, NULL))
+                       invoke_rcu_core(); /* force nohz to see update. */
+               rdtp->tick_nohz_enabled_snap = tne;
+               return;
+       }
+       if (!tne)
+               return;
+
+       /* If this is a no-CBs CPU, no callbacks, just return. */
+       if (rcu_is_nocb_cpu(cpu))
+               return;
+
+       /*
+        * If a non-lazy callback arrived at a CPU having only lazy
+        * callbacks, invoke RCU core for the side-effect of recalculating
+        * idle duration on re-entry to idle.
+        */
+       if (rdtp->all_lazy &&
+           rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
+               rdtp->all_lazy = false;
+               rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
+               invoke_rcu_core();
+               return;
+       }
+
+       /*
+        * If we have not yet accelerated this jiffy, accelerate all
+        * callbacks on this CPU.
+        */
+       if (rdtp->last_accelerate == jiffies)
+               return;
+       rdtp->last_accelerate = jiffies;
+       for_each_rcu_flavor(rsp) {
+               rdp = per_cpu_ptr(rsp->rda, cpu);
+               if (!*rdp->nxttail[RCU_DONE_TAIL])
+                       continue;
+               rnp = rdp->mynode;
+               raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+               rcu_accelerate_cbs(rsp, rnp, rdp);
+               raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+       }
+}
+
+/*
+ * Clean up for exit from idle.  Attempt to advance callbacks based on
+ * any grace periods that elapsed while the CPU was idle, and if any
+ * callbacks are now ready to invoke, initiate invocation.
+ */
+static void rcu_cleanup_after_idle(int cpu)
+{
+
+       if (rcu_is_nocb_cpu(cpu))
+               return;
+       if (rcu_try_advance_all_cbs())
+               invoke_rcu_core();
+}
+
+/*
+ * Keep a running count of the number of non-lazy callbacks posted
+ * on this CPU.  This running counter (which is never decremented) allows
+ * rcu_prepare_for_idle() to detect when something out of the idle loop
+ * posts a callback, even if an equal number of callbacks are invoked.
+ * Of course, callbacks should only be posted from within a trace event
+ * designed to be called from idle or from within RCU_NONIDLE().
+ */
+static void rcu_idle_count_callbacks_posted(void)
+{
+       __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
+}
+
+/*
+ * Data for flushing lazy RCU callbacks at OOM time.
+ */
+static atomic_t oom_callback_count;
+static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
+
+/*
+ * RCU OOM callback -- decrement the outstanding count and deliver the
+ * wake-up if we are the last one.
+ */
+static void rcu_oom_callback(struct rcu_head *rhp)
+{
+       if (atomic_dec_and_test(&oom_callback_count))
+               wake_up(&oom_callback_wq);
+}
+
+/*
+ * Post an rcu_oom_notify callback on the current CPU if it has at
+ * least one lazy callback.  This will unnecessarily post callbacks
+ * to CPUs that already have a non-lazy callback at the end of their
+ * callback list, but this is an infrequent operation, so accept some
+ * extra overhead to keep things simple.
+ */
+static void rcu_oom_notify_cpu(void *unused)
+{
+       struct rcu_state *rsp;
+       struct rcu_data *rdp;
+
+       for_each_rcu_flavor(rsp) {
+               rdp = __this_cpu_ptr(rsp->rda);
+               if (rdp->qlen_lazy != 0) {
+                       atomic_inc(&oom_callback_count);
+                       rsp->call(&rdp->oom_head, rcu_oom_callback);
+               }
+       }
+}
+
+/*
+ * If low on memory, ensure that each CPU has a non-lazy callback.
+ * This will wake up CPUs that have only lazy callbacks, in turn
+ * ensuring that they free up the corresponding memory in a timely manner.
+ * Because an uncertain amount of memory will be freed in some uncertain
+ * timeframe, we do not claim to have freed anything.
+ */
+static int rcu_oom_notify(struct notifier_block *self,
+                         unsigned long notused, void *nfreed)
+{
+       int cpu;
+
+       /* Wait for callbacks from earlier instance to complete. */
+       wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
+
+       /*
+        * Prevent premature wakeup: ensure that all increments happen
+        * before there is a chance of the counter reaching zero.
+        */
+       atomic_set(&oom_callback_count, 1);
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
+               cond_resched();
+       }
+       put_online_cpus();
+
+       /* Unconditionally decrement: no need to wake ourselves up. */
+       atomic_dec(&oom_callback_count);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block rcu_oom_nb = {
+       .notifier_call = rcu_oom_notify
+};
+
+static int __init rcu_register_oom_notifier(void)
+{
+       register_oom_notifier(&rcu_oom_nb);
+       return 0;
+}
+early_initcall(rcu_register_oom_notifier);
+
+#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
+
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+
+#ifdef CONFIG_RCU_FAST_NO_HZ
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+       unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
+
+       sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
+               rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
+               ulong2long(nlpd),
+               rdtp->all_lazy ? 'L' : '.',
+               rdtp->tick_nohz_enabled_snap ? '.' : 'D');
+}
+
+#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+       *cp = '\0';
+}
+
+#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+/* Initiate the stall-info list. */
+static void print_cpu_stall_info_begin(void)
+{
+       pr_cont("\n");
+}
+
+/*
+ * Print out diagnostic information for the specified stalled CPU.
+ *
+ * If the specified CPU is aware of the current RCU grace period
+ * (flavor specified by rsp), then print the number of scheduling
+ * clock interrupts the CPU has taken during the time that it has
+ * been aware.  Otherwise, print the number of RCU grace periods
+ * that this CPU is ignorant of, for example, "1" if the CPU was
+ * aware of the previous grace period.
+ *
+ * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
+ */
+static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
+{
+       char fast_no_hz[72];
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_dynticks *rdtp = rdp->dynticks;
+       char *ticks_title;
+       unsigned long ticks_value;
+
+       if (rsp->gpnum == rdp->gpnum) {
+               ticks_title = "ticks this GP";
+               ticks_value = rdp->ticks_this_gp;
+       } else {
+               ticks_title = "GPs behind";
+               ticks_value = rsp->gpnum - rdp->gpnum;
+       }
+       print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
+       pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
+              cpu, ticks_value, ticks_title,
+              atomic_read(&rdtp->dynticks) & 0xfff,
+              rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
+              rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
+              fast_no_hz);
+}
+
+/* Terminate the stall-info list. */
+static void print_cpu_stall_info_end(void)
+{
+       pr_err("\t");
+}
+
+/* Zero ->ticks_this_gp for all flavors of RCU. */
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+       rdp->ticks_this_gp = 0;
+       rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
+}
+
+/* Increment ->ticks_this_gp for all flavors of RCU. */
+static void increment_cpu_stall_ticks(void)
+{
+       struct rcu_state *rsp;
+
+       for_each_rcu_flavor(rsp)
+               __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+static void print_cpu_stall_info_begin(void)
+{
+       pr_cont(" {");
+}
+
+static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
+{
+       pr_cont(" %d", cpu);
+}
+
+static void print_cpu_stall_info_end(void)
+{
+       pr_cont("} ");
+}
+
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+}
+
+static void increment_cpu_stall_ticks(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+#ifdef CONFIG_RCU_NOCB_CPU
+
+/*
+ * Offload callback processing from the boot-time-specified set of CPUs
+ * specified by rcu_nocb_mask.  For each CPU in the set, there is a
+ * kthread created that pulls the callbacks from the corresponding CPU,
+ * waits for a grace period to elapse, and invokes the callbacks.
+ * The no-CBs CPUs do a wake_up() on their kthread when they insert
+ * a callback into any empty list, unless the rcu_nocb_poll boot parameter
+ * has been specified, in which case each kthread actively polls its
+ * CPU.  (Which isn't so great for energy efficiency, but which does
+ * reduce RCU's overhead on that CPU.)
+ *
+ * This is intended to be used in conjunction with Frederic Weisbecker's
+ * adaptive-idle work, which would seriously reduce OS jitter on CPUs
+ * running CPU-bound user-mode computations.
+ *
+ * Offloading of callback processing could also in theory be used as
+ * an energy-efficiency measure because CPUs with no RCU callbacks
+ * queued are more aggressive about entering dyntick-idle mode.
+ */
+
+
+/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
+static int __init rcu_nocb_setup(char *str)
+{
+       alloc_bootmem_cpumask_var(&rcu_nocb_mask);
+       have_rcu_nocb_mask = true;
+       cpulist_parse(str, rcu_nocb_mask);
+       return 1;
+}
+__setup("rcu_nocbs=", rcu_nocb_setup);
+
+static int __init parse_rcu_nocb_poll(char *arg)
+{
+       rcu_nocb_poll = 1;
+       return 0;
+}
+early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+
+/*
+ * Do any no-CBs CPUs need another grace period?
+ *
+ * Interrupts must be disabled.  If the caller does not hold the root
+ * rnp_node structure's ->lock, the results are advisory only.
+ */
+static int rcu_nocb_needs_gp(struct rcu_state *rsp)
+{
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
+}
+
+/*
+ * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
+ * grace period.
+ */
+static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+       wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+}
+
+/*
+ * Set the root rcu_node structure's ->need_future_gp field
+ * based on the sum of those of all rcu_node structures.  This does
+ * double-count the root rcu_node structure's requests, but this
+ * is necessary to handle the possibility of a rcu_nocb_kthread()
+ * having awakened during the time that the rcu_node structures
+ * were being updated for the end of the previous grace period.
+ */
+static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
+{
+       rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
+}
+
+static void rcu_init_one_nocb(struct rcu_node *rnp)
+{
+       init_waitqueue_head(&rnp->nocb_gp_wq[0]);
+       init_waitqueue_head(&rnp->nocb_gp_wq[1]);
+}
+
+/* Is the specified CPU a no-CPUs CPU? */
+bool rcu_is_nocb_cpu(int cpu)
+{
+       if (have_rcu_nocb_mask)
+               return cpumask_test_cpu(cpu, rcu_nocb_mask);
+       return false;
+}
+
+/*
+ * Enqueue the specified string of rcu_head structures onto the specified
+ * CPU's no-CBs lists.  The CPU is specified by rdp, the head of the
+ * string by rhp, and the tail of the string by rhtp.  The non-lazy/lazy
+ * counts are supplied by rhcount and rhcount_lazy.
+ *
+ * If warranted, also wake up the kthread servicing this CPUs queues.
+ */
+static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
+                                   struct rcu_head *rhp,
+                                   struct rcu_head **rhtp,
+                                   int rhcount, int rhcount_lazy)
+{
+       int len;
+       struct rcu_head **old_rhpp;
+       struct task_struct *t;
+
+       /* Enqueue the callback on the nocb list and update counts. */
+       old_rhpp = xchg(&rdp->nocb_tail, rhtp);
+       ACCESS_ONCE(*old_rhpp) = rhp;
+       atomic_long_add(rhcount, &rdp->nocb_q_count);
+       atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
+
+       /* If we are not being polled and there is a kthread, awaken it ... */
+       t = ACCESS_ONCE(rdp->nocb_kthread);
+       if (rcu_nocb_poll || !t) {
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                   TPS("WakeNotPoll"));
+               return;
+       }
+       len = atomic_long_read(&rdp->nocb_q_count);
+       if (old_rhpp == &rdp->nocb_head) {
+               wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
+               rdp->qlen_last_fqs_check = 0;
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
+       } else if (len > rdp->qlen_last_fqs_check + qhimark) {
+               wake_up_process(t); /* ... or if many callbacks queued. */
+               rdp->qlen_last_fqs_check = LONG_MAX / 2;
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
+       } else {
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
+       }
+       return;
+}
+
+/*
+ * This is a helper for __call_rcu(), which invokes this when the normal
+ * callback queue is inoperable.  If this is not a no-CBs CPU, this
+ * function returns failure back to __call_rcu(), which can complain
+ * appropriately.
+ *
+ * Otherwise, this function queues the callback where the corresponding
+ * "rcuo" kthread can find it.
+ */
+static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
+                           bool lazy)
+{
+
+       if (!rcu_is_nocb_cpu(rdp->cpu))
+               return 0;
+       __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
+       if (__is_kfree_rcu_offset((unsigned long)rhp->func))
+               trace_rcu_kfree_callback(rdp->rsp->name, rhp,
+                                        (unsigned long)rhp->func,
+                                        -atomic_long_read(&rdp->nocb_q_count_lazy),
+                                        -atomic_long_read(&rdp->nocb_q_count));
+       else
+               trace_rcu_callback(rdp->rsp->name, rhp,
+                                  -atomic_long_read(&rdp->nocb_q_count_lazy),
+                                  -atomic_long_read(&rdp->nocb_q_count));
+       return 1;
+}
+
+/*
+ * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
+ * not a no-CBs CPU.
+ */
+static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
+                                                    struct rcu_data *rdp)
+{
+       long ql = rsp->qlen;
+       long qll = rsp->qlen_lazy;
+
+       /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
+       if (!rcu_is_nocb_cpu(smp_processor_id()))
+               return 0;
+       rsp->qlen = 0;
+       rsp->qlen_lazy = 0;
+
+       /* First, enqueue the donelist, if any.  This preserves CB ordering. */
+       if (rsp->orphan_donelist != NULL) {
+               __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
+                                       rsp->orphan_donetail, ql, qll);
+               ql = qll = 0;
+               rsp->orphan_donelist = NULL;
+               rsp->orphan_donetail = &rsp->orphan_donelist;
+       }
+       if (rsp->orphan_nxtlist != NULL) {
+               __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
+                                       rsp->orphan_nxttail, ql, qll);
+               ql = qll = 0;
+               rsp->orphan_nxtlist = NULL;
+               rsp->orphan_nxttail = &rsp->orphan_nxtlist;
+       }
+       return 1;
+}
+
+/*
+ * If necessary, kick off a new grace period, and either way wait
+ * for a subsequent grace period to complete.
+ */
+static void rcu_nocb_wait_gp(struct rcu_data *rdp)
+{
+       unsigned long c;
+       bool d;
+       unsigned long flags;
+       struct rcu_node *rnp = rdp->mynode;
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       c = rcu_start_future_gp(rnp, rdp);
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       /*
+        * Wait for the grace period.  Do so interruptibly to avoid messing
+        * up the load average.
+        */
+       trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
+       for (;;) {
+               wait_event_interruptible(
+                       rnp->nocb_gp_wq[c & 0x1],
+                       (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
+               if (likely(d))
+                       break;
+               flush_signals(current);
+               trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
+       }
+       trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
+       smp_mb(); /* Ensure that CB invocation happens after GP end. */
+}
+
+/*
+ * Per-rcu_data kthread, but only for no-CBs CPUs.  Each kthread invokes
+ * callbacks queued by the corresponding no-CBs CPU.
+ */
+static int rcu_nocb_kthread(void *arg)
+{
+       int c, cl;
+       bool firsttime = 1;
+       struct rcu_head *list;
+       struct rcu_head *next;
+       struct rcu_head **tail;
+       struct rcu_data *rdp = arg;
+
+       /* Each pass through this loop invokes one batch of callbacks */
+       for (;;) {
+               /* If not polling, wait for next batch of callbacks. */
+               if (!rcu_nocb_poll) {
+                       trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                           TPS("Sleep"));
+                       wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
+               } else if (firsttime) {
+                       firsttime = 0;
+                       trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                           TPS("Poll"));
+               }
+               list = ACCESS_ONCE(rdp->nocb_head);
+               if (!list) {
+                       if (!rcu_nocb_poll)
+                               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                                   TPS("WokeEmpty"));
+                       schedule_timeout_interruptible(1);
+                       flush_signals(current);
+                       continue;
+               }
+               firsttime = 1;
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                   TPS("WokeNonEmpty"));
+
+               /*
+                * Extract queued callbacks, update counts, and wait
+                * for a grace period to elapse.
+                */
+               ACCESS_ONCE(rdp->nocb_head) = NULL;
+               tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
+               c = atomic_long_xchg(&rdp->nocb_q_count, 0);
+               cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
+               ACCESS_ONCE(rdp->nocb_p_count) += c;
+               ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
+               rcu_nocb_wait_gp(rdp);
+
+               /* Each pass through the following loop invokes a callback. */
+               trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
+               c = cl = 0;
+               while (list) {
+                       next = list->next;
+                       /* Wait for enqueuing to complete, if needed. */
+                       while (next == NULL && &list->next != tail) {
+                               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                                   TPS("WaitQueue"));
+                               schedule_timeout_interruptible(1);
+                               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                                   TPS("WokeQueue"));
+                               next = list->next;
+                       }
+                       debug_rcu_head_unqueue(list);
+                       local_bh_disable();
+                       if (__rcu_reclaim(rdp->rsp->name, list))
+                               cl++;
+                       c++;
+                       local_bh_enable();
+                       list = next;
+               }
+               trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
+               ACCESS_ONCE(rdp->nocb_p_count) -= c;
+               ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
+               rdp->n_nocbs_invoked += c;
+       }
+       return 0;
+}
+
+/* Initialize per-rcu_data variables for no-CBs CPUs. */
+static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
+{
+       rdp->nocb_tail = &rdp->nocb_head;
+       init_waitqueue_head(&rdp->nocb_wq);
+}
+
+/* Create a kthread for each RCU flavor for each no-CBs CPU. */
+static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
+{
+       int cpu;
+       struct rcu_data *rdp;
+       struct task_struct *t;
+
+       if (rcu_nocb_mask == NULL)
+               return;
+       for_each_cpu(cpu, rcu_nocb_mask) {
+               rdp = per_cpu_ptr(rsp->rda, cpu);
+               t = kthread_run(rcu_nocb_kthread, rdp,
+                               "rcuo%c/%d", rsp->abbr, cpu);
+               BUG_ON(IS_ERR(t));
+               ACCESS_ONCE(rdp->nocb_kthread) = t;
+       }
+}
+
+/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
+static bool init_nocb_callback_list(struct rcu_data *rdp)
+{
+       if (rcu_nocb_mask == NULL ||
+           !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
+               return false;
+       rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+       return true;
+}
+
+#else /* #ifdef CONFIG_RCU_NOCB_CPU */
+
+static int rcu_nocb_needs_gp(struct rcu_state *rsp)
+{
+       return 0;
+}
+
+static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+}
+
+static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
+{
+}
+
+static void rcu_init_one_nocb(struct rcu_node *rnp)
+{
+}
+
+static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
+                           bool lazy)
+{
+       return 0;
+}
+
+static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
+                                                    struct rcu_data *rdp)
+{
+       return 0;
+}
+
+static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
+{
+}
+
+static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
+{
+}
+
+static bool init_nocb_callback_list(struct rcu_data *rdp)
+{
+       return false;
+}
+
+#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+
+/*
+ * An adaptive-ticks CPU can potentially execute in kernel mode for an
+ * arbitrarily long period of time with the scheduling-clock tick turned
+ * off.  RCU will be paying attention to this CPU because it is in the
+ * kernel, but the CPU cannot be guaranteed to be executing the RCU state
+ * machine because the scheduling-clock tick has been disabled.  Therefore,
+ * if an adaptive-ticks CPU is failing to respond to the current grace
+ * period and has not be idle from an RCU perspective, kick it.
+ */
+static void rcu_kick_nohz_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ_FULL
+       if (tick_nohz_full_cpu(cpu))
+               smp_send_reschedule(cpu);
+#endif /* #ifdef CONFIG_NO_HZ_FULL */
+}
+
+
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+
+/*
+ * Define RCU flavor that holds sysidle state.  This needs to be the
+ * most active flavor of RCU.
+ */
+#ifdef CONFIG_PREEMPT_RCU
+static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
+static int full_sysidle_state;         /* Current system-idle state. */
+#define RCU_SYSIDLE_NOT                0       /* Some CPU is not idle. */
+#define RCU_SYSIDLE_SHORT      1       /* All CPUs idle for brief period. */
+#define RCU_SYSIDLE_LONG       2       /* All CPUs idle for long enough. */
+#define RCU_SYSIDLE_FULL       3       /* All CPUs idle, ready for sysidle. */
+#define RCU_SYSIDLE_FULL_NOTED 4       /* Actually entered sysidle state. */
+
+/*
+ * Invoked to note exit from irq or task transition to idle.  Note that
+ * usermode execution does -not- count as idle here!  After all, we want
+ * to detect full-system idle states, not RCU quiescent states and grace
+ * periods.  The caller must have disabled interrupts.
+ */
+static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
+{
+       unsigned long j;
+
+       /* Adjust nesting, check for fully idle. */
+       if (irq) {
+               rdtp->dynticks_idle_nesting--;
+               WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
+               if (rdtp->dynticks_idle_nesting != 0)
+                       return;  /* Still not fully idle. */
+       } else {
+               if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
+                   DYNTICK_TASK_NEST_VALUE) {
+                       rdtp->dynticks_idle_nesting = 0;
+               } else {
+                       rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
+                       WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
+                       return;  /* Still not fully idle. */
+               }
+       }
+
+       /* Record start of fully idle period. */
+       j = jiffies;
+       ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
+       smp_mb__before_atomic_inc();
+       atomic_inc(&rdtp->dynticks_idle);
+       smp_mb__after_atomic_inc();
+       WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
+}
+
+/*
+ * Unconditionally force exit from full system-idle state.  This is
+ * invoked when a normal CPU exits idle, but must be called separately
+ * for the timekeeping CPU (tick_do_timer_cpu).  The reason for this
+ * is that the timekeeping CPU is permitted to take scheduling-clock
+ * interrupts while the system is in system-idle state, and of course
+ * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
+ * interrupt from any other type of interrupt.
+ */
+void rcu_sysidle_force_exit(void)
+{
+       int oldstate = ACCESS_ONCE(full_sysidle_state);
+       int newoldstate;
+
+       /*
+        * Each pass through the following loop attempts to exit full
+        * system-idle state.  If contention proves to be a problem,
+        * a trylock-based contention tree could be used here.
+        */
+       while (oldstate > RCU_SYSIDLE_SHORT) {
+               newoldstate = cmpxchg(&full_sysidle_state,
+                                     oldstate, RCU_SYSIDLE_NOT);
+               if (oldstate == newoldstate &&
+                   oldstate == RCU_SYSIDLE_FULL_NOTED) {
+                       rcu_kick_nohz_cpu(tick_do_timer_cpu);
+                       return; /* We cleared it, done! */
+               }
+               oldstate = newoldstate;
+       }
+       smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
+}
+
+/*
+ * Invoked to note entry to irq or task transition from idle.  Note that
+ * usermode execution does -not- count as idle here!  The caller must
+ * have disabled interrupts.
+ */
+static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
+{
+       /* Adjust nesting, check for already non-idle. */
+       if (irq) {
+               rdtp->dynticks_idle_nesting++;
+               WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
+               if (rdtp->dynticks_idle_nesting != 1)
+                       return; /* Already non-idle. */
+       } else {
+               /*
+                * Allow for irq misnesting.  Yes, it really is possible
+                * to enter an irq handler then never leave it, and maybe
+                * also vice versa.  Handle both possibilities.
+                */
+               if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
+                       rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
+                       WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
+                       return; /* Already non-idle. */
+               } else {
+                       rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
+               }
+       }
+
+       /* Record end of idle period. */
+       smp_mb__before_atomic_inc();
+       atomic_inc(&rdtp->dynticks_idle);
+       smp_mb__after_atomic_inc();
+       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
+
+       /*
+        * If we are the timekeeping CPU, we are permitted to be non-idle
+        * during a system-idle state.  This must be the case, because
+        * the timekeeping CPU has to take scheduling-clock interrupts
+        * during the time that the system is transitioning to full
+        * system-idle state.  This means that the timekeeping CPU must
+        * invoke rcu_sysidle_force_exit() directly if it does anything
+        * more than take a scheduling-clock interrupt.
+        */
+       if (smp_processor_id() == tick_do_timer_cpu)
+               return;
+
+       /* Update system-idle state: We are clearly no longer fully idle! */
+       rcu_sysidle_force_exit();
+}
+
+/*
+ * Check to see if the current CPU is idle.  Note that usermode execution
+ * does not count as idle.  The caller must have disabled interrupts.
+ */
+static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
+                                 unsigned long *maxj)
+{
+       int cur;
+       unsigned long j;
+       struct rcu_dynticks *rdtp = rdp->dynticks;
+
+       /*
+        * If some other CPU has already reported non-idle, if this is
+        * not the flavor of RCU that tracks sysidle state, or if this
+        * is an offline or the timekeeping CPU, nothing to do.
+        */
+       if (!*isidle || rdp->rsp != rcu_sysidle_state ||
+           cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
+               return;
+       if (rcu_gp_in_progress(rdp->rsp))
+               WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
+
+       /* Pick up current idle and NMI-nesting counter and check. */
+       cur = atomic_read(&rdtp->dynticks_idle);
+       if (cur & 0x1) {
+               *isidle = false; /* We are not idle! */
+               return;
+       }
+       smp_mb(); /* Read counters before timestamps. */
+
+       /* Pick up timestamps. */
+       j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
+       /* If this CPU entered idle more recently, update maxj timestamp. */
+       if (ULONG_CMP_LT(*maxj, j))
+               *maxj = j;
+}
+
+/*
+ * Is this the flavor of RCU that is handling full-system idle?
+ */
+static bool is_sysidle_rcu_state(struct rcu_state *rsp)
+{
+       return rsp == rcu_sysidle_state;
+}
+
+/*
+ * Bind the grace-period kthread for the sysidle flavor of RCU to the
+ * timekeeping CPU.
+ */
+static void rcu_bind_gp_kthread(void)
+{
+       int cpu = ACCESS_ONCE(tick_do_timer_cpu);
+
+       if (cpu < 0 || cpu >= nr_cpu_ids)
+               return;
+       if (raw_smp_processor_id() != cpu)
+               set_cpus_allowed_ptr(current, cpumask_of(cpu));
+}
+
+/*
+ * Return a delay in jiffies based on the number of CPUs, rcu_node
+ * leaf fanout, and jiffies tick rate.  The idea is to allow larger
+ * systems more time to transition to full-idle state in order to
+ * avoid the cache thrashing that otherwise occur on the state variable.
+ * Really small systems (less than a couple of tens of CPUs) should
+ * instead use a single global atomically incremented counter, and later
+ * versions of this will automatically reconfigure themselves accordingly.
+ */
+static unsigned long rcu_sysidle_delay(void)
+{
+       if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
+               return 0;
+       return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
+}
+
+/*
+ * Advance the full-system-idle state.  This is invoked when all of
+ * the non-timekeeping CPUs are idle.
+ */
+static void rcu_sysidle(unsigned long j)
+{
+       /* Check the current state. */
+       switch (ACCESS_ONCE(full_sysidle_state)) {
+       case RCU_SYSIDLE_NOT:
+
+               /* First time all are idle, so note a short idle period. */
+               ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
+               break;
+
+       case RCU_SYSIDLE_SHORT:
+
+               /*
+                * Idle for a bit, time to advance to next state?
+                * cmpxchg failure means race with non-idle, let them win.
+                */
+               if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
+                       (void)cmpxchg(&full_sysidle_state,
+                                     RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
+               break;
+
+       case RCU_SYSIDLE_LONG:
+
+               /*
+                * Do an additional check pass before advancing to full.
+                * cmpxchg failure means race with non-idle, let them win.
+                */
+               if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
+                       (void)cmpxchg(&full_sysidle_state,
+                                     RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
+               break;
+
+       default:
+               break;
+       }
+}
+
+/*
+ * Found a non-idle non-timekeeping CPU, so kick the system-idle state
+ * back to the beginning.
+ */
+static void rcu_sysidle_cancel(void)
+{
+       smp_mb();
+       ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
+}
+
+/*
+ * Update the sysidle state based on the results of a force-quiescent-state
+ * scan of the CPUs' dyntick-idle state.
+ */
+static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
+                              unsigned long maxj, bool gpkt)
+{
+       if (rsp != rcu_sysidle_state)
+               return;  /* Wrong flavor, ignore. */
+       if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
+               return;  /* Running state machine from timekeeping CPU. */
+       if (isidle)
+               rcu_sysidle(maxj);    /* More idle! */
+       else
+               rcu_sysidle_cancel(); /* Idle is over. */
+}
+
+/*
+ * Wrapper for rcu_sysidle_report() when called from the grace-period
+ * kthread's context.
+ */
+static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
+                                 unsigned long maxj)
+{
+       rcu_sysidle_report(rsp, isidle, maxj, true);
+}
+
+/* Callback and function for forcing an RCU grace period. */
+struct rcu_sysidle_head {
+       struct rcu_head rh;
+       int inuse;
+};
+
+static void rcu_sysidle_cb(struct rcu_head *rhp)
+{
+       struct rcu_sysidle_head *rshp;
+
+       /*
+        * The following memory barrier is needed to replace the
+        * memory barriers that would normally be in the memory
+        * allocator.
+        */
+       smp_mb();  /* grace period precedes setting inuse. */
+
+       rshp = container_of(rhp, struct rcu_sysidle_head, rh);
+       ACCESS_ONCE(rshp->inuse) = 0;
+}
+
+/*
+ * Check to see if the system is fully idle, other than the timekeeping CPU.
+ * The caller must have disabled interrupts.
+ */
+bool rcu_sys_is_idle(void)
+{
+       static struct rcu_sysidle_head rsh;
+       int rss = ACCESS_ONCE(full_sysidle_state);
+
+       if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
+               return false;
+
+       /* Handle small-system case by doing a full scan of CPUs. */
+       if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
+               int oldrss = rss - 1;
+
+               /*
+                * One pass to advance to each state up to _FULL.
+                * Give up if any pass fails to advance the state.
+                */
+               while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
+                       int cpu;
+                       bool isidle = true;
+                       unsigned long maxj = jiffies - ULONG_MAX / 4;
+                       struct rcu_data *rdp;
+
+                       /* Scan all the CPUs looking for nonidle CPUs. */
+                       for_each_possible_cpu(cpu) {
+                               rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
+                               rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
+                               if (!isidle)
+                                       break;
+                       }
+                       rcu_sysidle_report(rcu_sysidle_state,
+                                          isidle, maxj, false);
+                       oldrss = rss;
+                       rss = ACCESS_ONCE(full_sysidle_state);
+               }
+       }
+
+       /* If this is the first observation of an idle period, record it. */
+       if (rss == RCU_SYSIDLE_FULL) {
+               rss = cmpxchg(&full_sysidle_state,
+                             RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
+               return rss == RCU_SYSIDLE_FULL;
+       }
+
+       smp_mb(); /* ensure rss load happens before later caller actions. */
+
+       /* If already fully idle, tell the caller (in case of races). */
+       if (rss == RCU_SYSIDLE_FULL_NOTED)
+               return true;
+
+       /*
+        * If we aren't there yet, and a grace period is not in flight,
+        * initiate a grace period.  Either way, tell the caller that
+        * we are not there yet.  We use an xchg() rather than an assignment
+        * to make up for the memory barriers that would otherwise be
+        * provided by the memory allocator.
+        */
+       if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
+           !rcu_gp_in_progress(rcu_sysidle_state) &&
+           !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
+               call_rcu(&rsh.rh, rcu_sysidle_cb);
+       return false;
+}
+
+/*
+ * Initialize dynticks sysidle state for CPUs coming online.
+ */
+static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
+{
+       rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
+}
+
+#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+
+static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
+{
+}
+
+static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
+{
+}
+
+static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
+                                 unsigned long *maxj)
+{
+}
+
+static bool is_sysidle_rcu_state(struct rcu_state *rsp)
+{
+       return false;
+}
+
+static void rcu_bind_gp_kthread(void)
+{
+}
+
+static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
+                                 unsigned long maxj)
+{
+}
+
+static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
new file mode 100644 (file)
index 0000000..3596797
--- /dev/null
@@ -0,0 +1,500 @@
+/*
+ * Read-Copy Update tracing for classic implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Papers:  http://www.rdrop.com/users/paulmck/RCU
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *             Documentation/RCU
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define RCU_TREE_NONCORE
+#include "tree.h"
+
+static int r_open(struct inode *inode, struct file *file,
+                                       const struct seq_operations *op)
+{
+       int ret = seq_open(file, op);
+       if (!ret) {
+               struct seq_file *m = (struct seq_file *)file->private_data;
+               m->private = inode->i_private;
+       }
+       return ret;
+}
+
+static void *r_start(struct seq_file *m, loff_t *pos)
+{
+       struct rcu_state *rsp = (struct rcu_state *)m->private;
+       *pos = cpumask_next(*pos - 1, cpu_possible_mask);
+       if ((*pos) < nr_cpu_ids)
+               return per_cpu_ptr(rsp->rda, *pos);
+       return NULL;
+}
+
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       (*pos)++;
+       return r_start(m, pos);
+}
+
+static void r_stop(struct seq_file *m, void *v)
+{
+}
+
+static int show_rcubarrier(struct seq_file *m, void *v)
+{
+       struct rcu_state *rsp = (struct rcu_state *)m->private;
+       seq_printf(m, "bcc: %d nbd: %lu\n",
+                  atomic_read(&rsp->barrier_cpu_count),
+                  rsp->n_barrier_done);
+       return 0;
+}
+
+static int rcubarrier_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcubarrier, inode->i_private);
+}
+
+static const struct file_operations rcubarrier_fops = {
+       .owner = THIS_MODULE,
+       .open = rcubarrier_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = single_release,
+};
+
+#ifdef CONFIG_RCU_BOOST
+
+static char convert_kthread_status(unsigned int kthread_status)
+{
+       if (kthread_status > RCU_KTHREAD_MAX)
+               return '?';
+       return "SRWOY"[kthread_status];
+}
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
+{
+       long ql, qll;
+
+       if (!rdp->beenonline)
+               return;
+       seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
+                  rdp->cpu,
+                  cpu_is_offline(rdp->cpu) ? '!' : ' ',
+                  ulong2long(rdp->completed), ulong2long(rdp->gpnum),
+                  rdp->passed_quiesce, rdp->qs_pending);
+       seq_printf(m, " dt=%d/%llx/%d df=%lu",
+                  atomic_read(&rdp->dynticks->dynticks),
+                  rdp->dynticks->dynticks_nesting,
+                  rdp->dynticks->dynticks_nmi_nesting,
+                  rdp->dynticks_fqs);
+       seq_printf(m, " of=%lu", rdp->offline_fqs);
+       rcu_nocb_q_lengths(rdp, &ql, &qll);
+       qll += rdp->qlen_lazy;
+       ql += rdp->qlen;
+       seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
+                  qll, ql,
+                  ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
+                       rdp->nxttail[RCU_NEXT_TAIL]],
+                  ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
+                       rdp->nxttail[RCU_NEXT_READY_TAIL]],
+                  ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
+                       rdp->nxttail[RCU_WAIT_TAIL]],
+                  ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
+#ifdef CONFIG_RCU_BOOST
+       seq_printf(m, " kt=%d/%c ktl=%x",
+                  per_cpu(rcu_cpu_has_work, rdp->cpu),
+                  convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
+                                         rdp->cpu)),
+                  per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+       seq_printf(m, " b=%ld", rdp->blimit);
+       seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n",
+                  rdp->n_cbs_invoked, rdp->n_nocbs_invoked,
+                  rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
+}
+
+static int show_rcudata(struct seq_file *m, void *v)
+{
+       print_one_rcu_data(m, (struct rcu_data *)v);
+       return 0;
+}
+
+static const struct seq_operations rcudate_op = {
+       .start = r_start,
+       .next  = r_next,
+       .stop  = r_stop,
+       .show  = show_rcudata,
+};
+
+static int rcudata_open(struct inode *inode, struct file *file)
+{
+       return r_open(inode, file, &rcudate_op);
+}
+
+static const struct file_operations rcudata_fops = {
+       .owner = THIS_MODULE,
+       .open = rcudata_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = seq_release,
+};
+
+static int show_rcuexp(struct seq_file *m, void *v)
+{
+       struct rcu_state *rsp = (struct rcu_state *)m->private;
+
+       seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n",
+                  atomic_long_read(&rsp->expedited_start),
+                  atomic_long_read(&rsp->expedited_done),
+                  atomic_long_read(&rsp->expedited_wrap),
+                  atomic_long_read(&rsp->expedited_tryfail),
+                  atomic_long_read(&rsp->expedited_workdone1),
+                  atomic_long_read(&rsp->expedited_workdone2),
+                  atomic_long_read(&rsp->expedited_normal),
+                  atomic_long_read(&rsp->expedited_stoppedcpus),
+                  atomic_long_read(&rsp->expedited_done_tries),
+                  atomic_long_read(&rsp->expedited_done_lost),
+                  atomic_long_read(&rsp->expedited_done_exit));
+       return 0;
+}
+
+static int rcuexp_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcuexp, inode->i_private);
+}
+
+static const struct file_operations rcuexp_fops = {
+       .owner = THIS_MODULE,
+       .open = rcuexp_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = single_release,
+};
+
+#ifdef CONFIG_RCU_BOOST
+
+static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
+{
+       seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
+                  rnp->grplo, rnp->grphi,
+                  "T."[list_empty(&rnp->blkd_tasks)],
+                  "N."[!rnp->gp_tasks],
+                  "E."[!rnp->exp_tasks],
+                  "B."[!rnp->boost_tasks],
+                  convert_kthread_status(rnp->boost_kthread_status),
+                  rnp->n_tasks_boosted, rnp->n_exp_boosts,
+                  rnp->n_normal_boosts);
+       seq_printf(m, "j=%04x bt=%04x\n",
+                  (int)(jiffies & 0xffff),
+                  (int)(rnp->boost_time & 0xffff));
+       seq_printf(m, "    balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
+                  rnp->n_balk_blkd_tasks,
+                  rnp->n_balk_exp_gp_tasks,
+                  rnp->n_balk_boost_tasks,
+                  rnp->n_balk_notblocked,
+                  rnp->n_balk_notyet,
+                  rnp->n_balk_nos);
+}
+
+static int show_rcu_node_boost(struct seq_file *m, void *unused)
+{
+       struct rcu_node *rnp;
+
+       rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
+               print_one_rcu_node_boost(m, rnp);
+       return 0;
+}
+
+static int rcu_node_boost_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcu_node_boost, NULL);
+}
+
+static const struct file_operations rcu_node_boost_fops = {
+       .owner = THIS_MODULE,
+       .open = rcu_node_boost_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = single_release,
+};
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
+{
+       unsigned long gpnum;
+       int level = 0;
+       struct rcu_node *rnp;
+
+       gpnum = rsp->gpnum;
+       seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
+                  ulong2long(rsp->completed), ulong2long(gpnum),
+                  rsp->fqs_state,
+                  (long)(rsp->jiffies_force_qs - jiffies),
+                  (int)(jiffies & 0xffff));
+       seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
+                  rsp->n_force_qs, rsp->n_force_qs_ngp,
+                  rsp->n_force_qs - rsp->n_force_qs_ngp,
+                  rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
+       for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
+               if (rnp->level != level) {
+                       seq_puts(m, "\n");
+                       level = rnp->level;
+               }
+               seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d    ",
+                          rnp->qsmask, rnp->qsmaskinit,
+                          ".G"[rnp->gp_tasks != NULL],
+                          ".E"[rnp->exp_tasks != NULL],
+                          ".T"[!list_empty(&rnp->blkd_tasks)],
+                          rnp->grplo, rnp->grphi, rnp->grpnum);
+       }
+       seq_puts(m, "\n");
+}
+
+static int show_rcuhier(struct seq_file *m, void *v)
+{
+       struct rcu_state *rsp = (struct rcu_state *)m->private;
+       print_one_rcu_state(m, rsp);
+       return 0;
+}
+
+static int rcuhier_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcuhier, inode->i_private);
+}
+
+static const struct file_operations rcuhier_fops = {
+       .owner = THIS_MODULE,
+       .open = rcuhier_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = single_release,
+};
+
+static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
+{
+       unsigned long flags;
+       unsigned long completed;
+       unsigned long gpnum;
+       unsigned long gpage;
+       unsigned long gpmax;
+       struct rcu_node *rnp = &rsp->node[0];
+
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       completed = ACCESS_ONCE(rsp->completed);
+       gpnum = ACCESS_ONCE(rsp->gpnum);
+       if (completed == gpnum)
+               gpage = 0;
+       else
+               gpage = jiffies - rsp->gp_start;
+       gpmax = rsp->gp_max;
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       seq_printf(m, "completed=%ld  gpnum=%ld  age=%ld  max=%ld\n",
+                  ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
+}
+
+static int show_rcugp(struct seq_file *m, void *v)
+{
+       struct rcu_state *rsp = (struct rcu_state *)m->private;
+       show_one_rcugp(m, rsp);
+       return 0;
+}
+
+static int rcugp_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcugp, inode->i_private);
+}
+
+static const struct file_operations rcugp_fops = {
+       .owner = THIS_MODULE,
+       .open = rcugp_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = single_release,
+};
+
+static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
+{
+       if (!rdp->beenonline)
+               return;
+       seq_printf(m, "%3d%cnp=%ld ",
+                  rdp->cpu,
+                  cpu_is_offline(rdp->cpu) ? '!' : ' ',
+                  rdp->n_rcu_pending);
+       seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
+                  rdp->n_rp_qs_pending,
+                  rdp->n_rp_report_qs,
+                  rdp->n_rp_cb_ready,
+                  rdp->n_rp_cpu_needs_gp);
+       seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n",
+                  rdp->n_rp_gp_completed,
+                  rdp->n_rp_gp_started,
+                  rdp->n_rp_need_nothing);
+}
+
+static int show_rcu_pending(struct seq_file *m, void *v)
+{
+       print_one_rcu_pending(m, (struct rcu_data *)v);
+       return 0;
+}
+
+static const struct seq_operations rcu_pending_op = {
+       .start = r_start,
+       .next  = r_next,
+       .stop  = r_stop,
+       .show  = show_rcu_pending,
+};
+
+static int rcu_pending_open(struct inode *inode, struct file *file)
+{
+       return r_open(inode, file, &rcu_pending_op);
+}
+
+static const struct file_operations rcu_pending_fops = {
+       .owner = THIS_MODULE,
+       .open = rcu_pending_open,
+       .read = seq_read,
+       .llseek = no_llseek,
+       .release = seq_release,
+};
+
+static int show_rcutorture(struct seq_file *m, void *unused)
+{
+       seq_printf(m, "rcutorture test sequence: %lu %s\n",
+                  rcutorture_testseq >> 1,
+                  (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
+       seq_printf(m, "rcutorture update version number: %lu\n",
+                  rcutorture_vernum);
+       return 0;
+}
+
+static int rcutorture_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcutorture, NULL);
+}
+
+static const struct file_operations rcutorture_fops = {
+       .owner = THIS_MODULE,
+       .open = rcutorture_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static struct dentry *rcudir;
+
+static int __init rcutree_trace_init(void)
+{
+       struct rcu_state *rsp;
+       struct dentry *retval;
+       struct dentry *rspdir;
+
+       rcudir = debugfs_create_dir("rcu", NULL);
+       if (!rcudir)
+               goto free_out;
+
+       for_each_rcu_flavor(rsp) {
+               rspdir = debugfs_create_dir(rsp->name, rcudir);
+               if (!rspdir)
+                       goto free_out;
+
+               retval = debugfs_create_file("rcudata", 0444,
+                               rspdir, rsp, &rcudata_fops);
+               if (!retval)
+                       goto free_out;
+
+               retval = debugfs_create_file("rcuexp", 0444,
+                               rspdir, rsp, &rcuexp_fops);
+               if (!retval)
+                       goto free_out;
+
+               retval = debugfs_create_file("rcu_pending", 0444,
+                               rspdir, rsp, &rcu_pending_fops);
+               if (!retval)
+                       goto free_out;
+
+               retval = debugfs_create_file("rcubarrier", 0444,
+                               rspdir, rsp, &rcubarrier_fops);
+               if (!retval)
+                       goto free_out;
+
+#ifdef CONFIG_RCU_BOOST
+               if (rsp == &rcu_preempt_state) {
+                       retval = debugfs_create_file("rcuboost", 0444,
+                               rspdir, NULL, &rcu_node_boost_fops);
+                       if (!retval)
+                               goto free_out;
+               }
+#endif
+
+               retval = debugfs_create_file("rcugp", 0444,
+                               rspdir, rsp, &rcugp_fops);
+               if (!retval)
+                       goto free_out;
+
+               retval = debugfs_create_file("rcuhier", 0444,
+                               rspdir, rsp, &rcuhier_fops);
+               if (!retval)
+                       goto free_out;
+       }
+
+       retval = debugfs_create_file("rcutorture", 0444, rcudir,
+                                               NULL, &rcutorture_fops);
+       if (!retval)
+               goto free_out;
+       return 0;
+free_out:
+       debugfs_remove_recursive(rcudir);
+       return 1;
+}
+
+static void __exit rcutree_trace_cleanup(void)
+{
+       debugfs_remove_recursive(rcudir);
+}
+
+
+module_init(rcutree_trace_init);
+module_exit(rcutree_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
+MODULE_LICENSE("GPL");
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
new file mode 100644 (file)
index 0000000..6cb3dff
--- /dev/null
@@ -0,0 +1,347 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2001
+ *
+ * Authors: Dipankar Sarma <dipankar@in.ibm.com>
+ *         Manfred Spraul <manfred@colorfullife.com>
+ *
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *             http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/export.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rcu.h>
+
+#include "rcu.h"
+
+MODULE_ALIAS("rcupdate");
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "rcupdate."
+
+module_param(rcu_expedited, int, 0);
+
+#ifdef CONFIG_PREEMPT_RCU
+
+/*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+       current->rcu_read_lock_nesting++;
+       barrier();  /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+       struct task_struct *t = current;
+
+       if (t->rcu_read_lock_nesting != 1) {
+               --t->rcu_read_lock_nesting;
+       } else {
+               barrier();  /* critical section before exit code. */
+               t->rcu_read_lock_nesting = INT_MIN;
+#ifdef CONFIG_PROVE_RCU_DELAY
+               udelay(10); /* Make preemption more probable. */
+#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
+               barrier();  /* assign before ->rcu_read_unlock_special load */
+               if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+                       rcu_read_unlock_special(t);
+               barrier();  /* ->rcu_read_unlock_special load before assign */
+               t->rcu_read_lock_nesting = 0;
+       }
+#ifdef CONFIG_PROVE_LOCKING
+       {
+               int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+
+               WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+       }
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+       STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+
+static struct lock_class_key rcu_bh_lock_key;
+struct lockdep_map rcu_bh_lock_map =
+       STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
+EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
+
+static struct lock_class_key rcu_sched_lock_key;
+struct lockdep_map rcu_sched_lock_map =
+       STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
+EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
+
+int notrace debug_lockdep_rcu_enabled(void)
+{
+       return rcu_scheduler_active && debug_locks &&
+              current->lockdep_recursion == 0;
+}
+EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
+
+/**
+ * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
+ *
+ * Check for bottom half being disabled, which covers both the
+ * CONFIG_PROVE_RCU and not cases.  Note that if someone uses
+ * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
+ * will show the situation.  This is useful for debug checks in functions
+ * that require that they be called within an RCU read-side critical
+ * section.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_bh_held(void)
+{
+       if (!debug_lockdep_rcu_enabled())
+               return 1;
+       if (!rcu_is_watching())
+               return 0;
+       if (!rcu_lockdep_current_cpu_online())
+               return 0;
+       return in_softirq() || irqs_disabled();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+struct rcu_synchronize {
+       struct rcu_head head;
+       struct completion completion;
+};
+
+/*
+ * Awaken the corresponding synchronize_rcu() instance now that a
+ * grace period has elapsed.
+ */
+static void wakeme_after_rcu(struct rcu_head  *head)
+{
+       struct rcu_synchronize *rcu;
+
+       rcu = container_of(head, struct rcu_synchronize, head);
+       complete(&rcu->completion);
+}
+
+void wait_rcu_gp(call_rcu_func_t crf)
+{
+       struct rcu_synchronize rcu;
+
+       init_rcu_head_on_stack(&rcu.head);
+       init_completion(&rcu.completion);
+       /* Will wake me after RCU finished. */
+       crf(&rcu.head, wakeme_after_rcu);
+       /* Wait for it. */
+       wait_for_completion(&rcu.completion);
+       destroy_rcu_head_on_stack(&rcu.head);
+}
+EXPORT_SYMBOL_GPL(wait_rcu_gp);
+
+#ifdef CONFIG_PROVE_RCU
+/*
+ * wrapper function to avoid #include problems.
+ */
+int rcu_my_thread_group_empty(void)
+{
+       return thread_group_empty(current);
+}
+EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+static inline void debug_init_rcu_head(struct rcu_head *head)
+{
+       debug_object_init(head, &rcuhead_debug_descr);
+}
+
+static inline void debug_rcu_head_free(struct rcu_head *head)
+{
+       debug_object_free(head, &rcuhead_debug_descr);
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ * Activation is performed internally by call_rcu().
+ */
+static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
+{
+       struct rcu_head *head = addr;
+
+       switch (state) {
+
+       case ODEBUG_STATE_NOTAVAILABLE:
+               /*
+                * This is not really a fixup. We just make sure that it is
+                * tracked in the object tracker.
+                */
+               debug_object_init(head, &rcuhead_debug_descr);
+               debug_object_activate(head, &rcuhead_debug_descr);
+               return 0;
+       default:
+               return 1;
+       }
+}
+
+/**
+ * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
+ * @head: pointer to rcu_head structure to be initialized
+ *
+ * This function informs debugobjects of a new rcu_head structure that
+ * has been allocated as an auto variable on the stack.  This function
+ * is not required for rcu_head structures that are statically defined or
+ * that are dynamically allocated on the heap.  This function has no
+ * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
+ */
+void init_rcu_head_on_stack(struct rcu_head *head)
+{
+       debug_object_init_on_stack(head, &rcuhead_debug_descr);
+}
+EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
+
+/**
+ * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
+ * @head: pointer to rcu_head structure to be initialized
+ *
+ * This function informs debugobjects that an on-stack rcu_head structure
+ * is about to go out of scope.  As with init_rcu_head_on_stack(), this
+ * function is not required for rcu_head structures that are statically
+ * defined or that are dynamically allocated on the heap.  Also as with
+ * init_rcu_head_on_stack(), this function has no effect for
+ * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
+ */
+void destroy_rcu_head_on_stack(struct rcu_head *head)
+{
+       debug_object_free(head, &rcuhead_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
+
+struct debug_obj_descr rcuhead_debug_descr = {
+       .name = "rcu_head",
+       .fixup_activate = rcuhead_fixup_activate,
+};
+EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
+#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
+void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp,
+                              unsigned long secs,
+                              unsigned long c_old, unsigned long c)
+{
+       trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
+}
+EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+       do { } while (0)
+#endif
+
+#ifdef CONFIG_RCU_STALL_COMMON
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA         (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA         0
+#endif
+
+int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+
+module_param(rcu_cpu_stall_suppress, int, 0644);
+module_param(rcu_cpu_stall_timeout, int, 0644);
+
+int rcu_jiffies_till_stall_check(void)
+{
+       int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+
+       /*
+        * Limit check must be consistent with the Kconfig limits
+        * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+        */
+       if (till_stall_check < 3) {
+               ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+               till_stall_check = 3;
+       } else if (till_stall_check > 300) {
+               ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+               till_stall_check = 300;
+       }
+       return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+       rcu_cpu_stall_suppress = 1;
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block rcu_panic_block = {
+       .notifier_call = rcu_panic,
+};
+
+static int __init check_cpu_stall_init(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
+       return 0;
+}
+early_initcall(check_cpu_stall_init);
+
+#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
deleted file mode 100644 (file)
index b02a339..0000000
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2001
- *
- * Authors: Dipankar Sarma <dipankar@in.ibm.com>
- *         Manfred Spraul <manfred@colorfullife.com>
- *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- * Papers:
- * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
- * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *             http://lse.sourceforge.net/locking/rcupdate.html
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/export.h>
-#include <linux/hardirq.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/rcu.h>
-
-#include "rcu.h"
-
-module_param(rcu_expedited, int, 0);
-
-#ifdef CONFIG_PREEMPT_RCU
-
-/*
- * Preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
-       current->rcu_read_lock_nesting++;
-       barrier();  /* critical section after entry code. */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-/*
- * Preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
-       struct task_struct *t = current;
-
-       if (t->rcu_read_lock_nesting != 1) {
-               --t->rcu_read_lock_nesting;
-       } else {
-               barrier();  /* critical section before exit code. */
-               t->rcu_read_lock_nesting = INT_MIN;
-#ifdef CONFIG_PROVE_RCU_DELAY
-               udelay(10); /* Make preemption more probable. */
-#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
-               barrier();  /* assign before ->rcu_read_unlock_special load */
-               if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
-                       rcu_read_unlock_special(t);
-               barrier();  /* ->rcu_read_unlock_special load before assign */
-               t->rcu_read_lock_nesting = 0;
-       }
-#ifdef CONFIG_PROVE_LOCKING
-       {
-               int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
-
-               WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
-       }
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static struct lock_class_key rcu_lock_key;
-struct lockdep_map rcu_lock_map =
-       STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
-EXPORT_SYMBOL_GPL(rcu_lock_map);
-
-static struct lock_class_key rcu_bh_lock_key;
-struct lockdep_map rcu_bh_lock_map =
-       STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
-EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
-
-static struct lock_class_key rcu_sched_lock_key;
-struct lockdep_map rcu_sched_lock_map =
-       STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
-EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
-
-int notrace debug_lockdep_rcu_enabled(void)
-{
-       return rcu_scheduler_active && debug_locks &&
-              current->lockdep_recursion == 0;
-}
-EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
-
-/**
- * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
- *
- * Check for bottom half being disabled, which covers both the
- * CONFIG_PROVE_RCU and not cases.  Note that if someone uses
- * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
- * will show the situation.  This is useful for debug checks in functions
- * that require that they be called within an RCU read-side critical
- * section.
- *
- * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
- *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
- * offline from an RCU perspective, so check for those as well.
- */
-int rcu_read_lock_bh_held(void)
-{
-       if (!debug_lockdep_rcu_enabled())
-               return 1;
-       if (rcu_is_cpu_idle())
-               return 0;
-       if (!rcu_lockdep_current_cpu_online())
-               return 0;
-       return in_softirq() || irqs_disabled();
-}
-EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
-
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-struct rcu_synchronize {
-       struct rcu_head head;
-       struct completion completion;
-};
-
-/*
- * Awaken the corresponding synchronize_rcu() instance now that a
- * grace period has elapsed.
- */
-static void wakeme_after_rcu(struct rcu_head  *head)
-{
-       struct rcu_synchronize *rcu;
-
-       rcu = container_of(head, struct rcu_synchronize, head);
-       complete(&rcu->completion);
-}
-
-void wait_rcu_gp(call_rcu_func_t crf)
-{
-       struct rcu_synchronize rcu;
-
-       init_rcu_head_on_stack(&rcu.head);
-       init_completion(&rcu.completion);
-       /* Will wake me after RCU finished. */
-       crf(&rcu.head, wakeme_after_rcu);
-       /* Wait for it. */
-       wait_for_completion(&rcu.completion);
-       destroy_rcu_head_on_stack(&rcu.head);
-}
-EXPORT_SYMBOL_GPL(wait_rcu_gp);
-
-#ifdef CONFIG_PROVE_RCU
-/*
- * wrapper function to avoid #include problems.
- */
-int rcu_my_thread_group_empty(void)
-{
-       return thread_group_empty(current);
-}
-EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
-#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-static inline void debug_init_rcu_head(struct rcu_head *head)
-{
-       debug_object_init(head, &rcuhead_debug_descr);
-}
-
-static inline void debug_rcu_head_free(struct rcu_head *head)
-{
-       debug_object_free(head, &rcuhead_debug_descr);
-}
-
-/*
- * fixup_activate is called when:
- * - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
- * Activation is performed internally by call_rcu().
- */
-static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
-{
-       struct rcu_head *head = addr;
-
-       switch (state) {
-
-       case ODEBUG_STATE_NOTAVAILABLE:
-               /*
-                * This is not really a fixup. We just make sure that it is
-                * tracked in the object tracker.
-                */
-               debug_object_init(head, &rcuhead_debug_descr);
-               debug_object_activate(head, &rcuhead_debug_descr);
-               return 0;
-       default:
-               return 1;
-       }
-}
-
-/**
- * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
- * @head: pointer to rcu_head structure to be initialized
- *
- * This function informs debugobjects of a new rcu_head structure that
- * has been allocated as an auto variable on the stack.  This function
- * is not required for rcu_head structures that are statically defined or
- * that are dynamically allocated on the heap.  This function has no
- * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
- */
-void init_rcu_head_on_stack(struct rcu_head *head)
-{
-       debug_object_init_on_stack(head, &rcuhead_debug_descr);
-}
-EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
-
-/**
- * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
- * @head: pointer to rcu_head structure to be initialized
- *
- * This function informs debugobjects that an on-stack rcu_head structure
- * is about to go out of scope.  As with init_rcu_head_on_stack(), this
- * function is not required for rcu_head structures that are statically
- * defined or that are dynamically allocated on the heap.  Also as with
- * init_rcu_head_on_stack(), this function has no effect for
- * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
- */
-void destroy_rcu_head_on_stack(struct rcu_head *head)
-{
-       debug_object_free(head, &rcuhead_debug_descr);
-}
-EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
-
-struct debug_obj_descr rcuhead_debug_descr = {
-       .name = "rcu_head",
-       .fixup_activate = rcuhead_fixup_activate,
-};
-EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
-#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
-void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp,
-                              unsigned long secs,
-                              unsigned long c_old, unsigned long c)
-{
-       trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
-}
-EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
-#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-       do { } while (0)
-#endif
-
-#ifdef CONFIG_RCU_STALL_COMMON
-
-#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA         (5 * HZ)
-#else
-#define RCU_STALL_DELAY_DELTA         0
-#endif
-
-int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
-int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
-
-module_param(rcu_cpu_stall_suppress, int, 0644);
-module_param(rcu_cpu_stall_timeout, int, 0644);
-
-int rcu_jiffies_till_stall_check(void)
-{
-       int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
-
-       /*
-        * Limit check must be consistent with the Kconfig limits
-        * for CONFIG_RCU_CPU_STALL_TIMEOUT.
-        */
-       if (till_stall_check < 3) {
-               ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
-               till_stall_check = 3;
-       } else if (till_stall_check > 300) {
-               ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
-               till_stall_check = 300;
-       }
-       return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
-}
-
-static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
-{
-       rcu_cpu_stall_suppress = 1;
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block rcu_panic_block = {
-       .notifier_call = rcu_panic,
-};
-
-static int __init check_cpu_stall_init(void)
-{
-       atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
-       return 0;
-}
-early_initcall(check_cpu_stall_init);
-
-#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
deleted file mode 100644 (file)
index 9ed6075..0000000
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2008
- *
- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *             Documentation/RCU
- */
-#include <linux/completion.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/rcupdate.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/cpu.h>
-#include <linux/prefetch.h>
-
-#ifdef CONFIG_RCU_TRACE
-#include <trace/events/rcu.h>
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
-#include "rcu.h"
-
-/* Forward declarations for rcutiny_plugin.h. */
-struct rcu_ctrlblk;
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
-static void rcu_process_callbacks(struct softirq_action *unused);
-static void __call_rcu(struct rcu_head *head,
-                      void (*func)(struct rcu_head *rcu),
-                      struct rcu_ctrlblk *rcp);
-
-static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-
-#include "rcutiny_plugin.h"
-
-/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
-static void rcu_idle_enter_common(long long newval)
-{
-       if (newval) {
-               RCU_TRACE(trace_rcu_dyntick("--=",
-                                           rcu_dynticks_nesting, newval));
-               rcu_dynticks_nesting = newval;
-               return;
-       }
-       RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval));
-       if (!is_idle_task(current)) {
-               struct task_struct *idle = idle_task(smp_processor_id());
-
-               RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
-                                           rcu_dynticks_nesting, newval));
-               ftrace_dump(DUMP_ALL);
-               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-                         current->pid, current->comm,
-                         idle->pid, idle->comm); /* must be idle task! */
-       }
-       rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
-       barrier();
-       rcu_dynticks_nesting = newval;
-}
-
-/*
- * Enter idle, which is an extended quiescent state if we have fully
- * entered that mode (i.e., if the new value of dynticks_nesting is zero).
- */
-void rcu_idle_enter(void)
-{
-       unsigned long flags;
-       long long newval;
-
-       local_irq_save(flags);
-       WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
-       if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
-           DYNTICK_TASK_NEST_VALUE)
-               newval = 0;
-       else
-               newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
-       rcu_idle_enter_common(newval);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(rcu_idle_enter);
-
-/*
- * Exit an interrupt handler towards idle.
- */
-void rcu_irq_exit(void)
-{
-       unsigned long flags;
-       long long newval;
-
-       local_irq_save(flags);
-       newval = rcu_dynticks_nesting - 1;
-       WARN_ON_ONCE(newval < 0);
-       rcu_idle_enter_common(newval);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(rcu_irq_exit);
-
-/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
-static void rcu_idle_exit_common(long long oldval)
-{
-       if (oldval) {
-               RCU_TRACE(trace_rcu_dyntick("++=",
-                                           oldval, rcu_dynticks_nesting));
-               return;
-       }
-       RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting));
-       if (!is_idle_task(current)) {
-               struct task_struct *idle = idle_task(smp_processor_id());
-
-               RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
-                         oldval, rcu_dynticks_nesting));
-               ftrace_dump(DUMP_ALL);
-               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-                         current->pid, current->comm,
-                         idle->pid, idle->comm); /* must be idle task! */
-       }
-}
-
-/*
- * Exit idle, so that we are no longer in an extended quiescent state.
- */
-void rcu_idle_exit(void)
-{
-       unsigned long flags;
-       long long oldval;
-
-       local_irq_save(flags);
-       oldval = rcu_dynticks_nesting;
-       WARN_ON_ONCE(rcu_dynticks_nesting < 0);
-       if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
-               rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
-       else
-               rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-       rcu_idle_exit_common(oldval);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(rcu_idle_exit);
-
-/*
- * Enter an interrupt handler, moving away from idle.
- */
-void rcu_irq_enter(void)
-{
-       unsigned long flags;
-       long long oldval;
-
-       local_irq_save(flags);
-       oldval = rcu_dynticks_nesting;
-       rcu_dynticks_nesting++;
-       WARN_ON_ONCE(rcu_dynticks_nesting == 0);
-       rcu_idle_exit_common(oldval);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(rcu_irq_enter);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-/*
- * Test whether RCU thinks that the current CPU is idle.
- */
-int rcu_is_cpu_idle(void)
-{
-       return !rcu_dynticks_nesting;
-}
-EXPORT_SYMBOL(rcu_is_cpu_idle);
-
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/*
- * Test whether the current CPU was interrupted from idle.  Nested
- * interrupts don't count, we must be running at the first interrupt
- * level.
- */
-static int rcu_is_cpu_rrupt_from_idle(void)
-{
-       return rcu_dynticks_nesting <= 1;
-}
-
-/*
- * Helper function for rcu_sched_qs() and rcu_bh_qs().
- * Also irqs are disabled to avoid confusion due to interrupt handlers
- * invoking call_rcu().
- */
-static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
-{
-       RCU_TRACE(reset_cpu_stall_ticks(rcp));
-       if (rcp->rcucblist != NULL &&
-           rcp->donetail != rcp->curtail) {
-               rcp->donetail = rcp->curtail;
-               return 1;
-       }
-
-       return 0;
-}
-
-/*
- * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
- * are at it, given that any rcu quiescent state is also an rcu_bh
- * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
- */
-void rcu_sched_qs(int cpu)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
-           rcu_qsctr_help(&rcu_bh_ctrlblk))
-               raise_softirq(RCU_SOFTIRQ);
-       local_irq_restore(flags);
-}
-
-/*
- * Record an rcu_bh quiescent state.
- */
-void rcu_bh_qs(int cpu)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       if (rcu_qsctr_help(&rcu_bh_ctrlblk))
-               raise_softirq(RCU_SOFTIRQ);
-       local_irq_restore(flags);
-}
-
-/*
- * Check to see if the scheduling-clock interrupt came from an extended
- * quiescent state, and, if so, tell RCU about it.  This function must
- * be called from hardirq context.  It is normally called from the
- * scheduling-clock interrupt.
- */
-void rcu_check_callbacks(int cpu, int user)
-{
-       RCU_TRACE(check_cpu_stalls());
-       if (user || rcu_is_cpu_rrupt_from_idle())
-               rcu_sched_qs(cpu);
-       else if (!in_softirq())
-               rcu_bh_qs(cpu);
-}
-
-/*
- * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
- * whose grace period has elapsed.
- */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
-{
-       const char *rn = NULL;
-       struct rcu_head *next, *list;
-       unsigned long flags;
-       RCU_TRACE(int cb_count = 0);
-
-       /* If no RCU callbacks ready to invoke, just return. */
-       if (&rcp->rcucblist == rcp->donetail) {
-               RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
-               RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
-                                             ACCESS_ONCE(rcp->rcucblist),
-                                             need_resched(),
-                                             is_idle_task(current),
-                                             false));
-               return;
-       }
-
-       /* Move the ready-to-invoke callbacks to a local list. */
-       local_irq_save(flags);
-       RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
-       list = rcp->rcucblist;
-       rcp->rcucblist = *rcp->donetail;
-       *rcp->donetail = NULL;
-       if (rcp->curtail == rcp->donetail)
-               rcp->curtail = &rcp->rcucblist;
-       rcp->donetail = &rcp->rcucblist;
-       local_irq_restore(flags);
-
-       /* Invoke the callbacks on the local list. */
-       RCU_TRACE(rn = rcp->name);
-       while (list) {
-               next = list->next;
-               prefetch(next);
-               debug_rcu_head_unqueue(list);
-               local_bh_disable();
-               __rcu_reclaim(rn, list);
-               local_bh_enable();
-               list = next;
-               RCU_TRACE(cb_count++);
-       }
-       RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
-       RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
-                                     is_idle_task(current),
-                                     false));
-}
-
-static void rcu_process_callbacks(struct softirq_action *unused)
-{
-       __rcu_process_callbacks(&rcu_sched_ctrlblk);
-       __rcu_process_callbacks(&rcu_bh_ctrlblk);
-}
-
-/*
- * Wait for a grace period to elapse.  But it is illegal to invoke
- * synchronize_sched() from within an RCU read-side critical section.
- * Therefore, any legal call to synchronize_sched() is a quiescent
- * state, and so on a UP system, synchronize_sched() need do nothing.
- * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
- * benefits of doing might_sleep() to reduce latency.)
- *
- * Cool, huh?  (Due to Josh Triplett.)
- *
- * But we want to make this a static inline later.  The cond_resched()
- * currently makes this problematic.
- */
-void synchronize_sched(void)
-{
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_sched() in RCU read-side critical section");
-       cond_resched();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
-/*
- * Helper function for call_rcu() and call_rcu_bh().
- */
-static void __call_rcu(struct rcu_head *head,
-                      void (*func)(struct rcu_head *rcu),
-                      struct rcu_ctrlblk *rcp)
-{
-       unsigned long flags;
-
-       debug_rcu_head_queue(head);
-       head->func = func;
-       head->next = NULL;
-
-       local_irq_save(flags);
-       *rcp->curtail = head;
-       rcp->curtail = &head->next;
-       RCU_TRACE(rcp->qlen++);
-       local_irq_restore(flags);
-}
-
-/*
- * Post an RCU callback to be invoked after the end of an RCU-sched grace
- * period.  But since we have but one CPU, that would be after any
- * quiescent state.
- */
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_sched_ctrlblk);
-}
-EXPORT_SYMBOL_GPL(call_rcu_sched);
-
-/*
- * Post an RCU bottom-half callback to be invoked after any subsequent
- * quiescent state.
- */
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_bh_ctrlblk);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
-void rcu_init(void)
-{
-       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-}
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
deleted file mode 100644 (file)
index 280d06c..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
- * Internal non-public definitions that provide either classic
- * or preemptible semantics.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2010 Linaro
- *
- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- */
-
-#include <linux/kthread.h>
-#include <linux/module.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
-       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
-       struct rcu_head **curtail;      /* ->next pointer of last CB. */
-       RCU_TRACE(long qlen);           /* Number of pending CBs. */
-       RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
-       RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
-       RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
-       RCU_TRACE(const char *name);    /* Name of RCU type. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-       .donetail       = &rcu_sched_ctrlblk.rcucblist,
-       .curtail        = &rcu_sched_ctrlblk.rcucblist,
-       RCU_TRACE(.name = "rcu_sched")
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-       .donetail       = &rcu_bh_ctrlblk.rcucblist,
-       .curtail        = &rcu_bh_ctrlblk.rcucblist,
-       RCU_TRACE(.name = "rcu_bh")
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#include <linux/kernel_stat.h>
-
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-
-/*
- * During boot, we forgive RCU lockdep issues.  After this function is
- * invoked, we start taking RCU lockdep issues seriously.
- */
-void __init rcu_scheduler_starting(void)
-{
-       WARN_ON(nr_context_switches() > 0);
-       rcu_scheduler_active = 1;
-}
-
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-#ifdef CONFIG_RCU_TRACE
-
-static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       rcp->qlen -= n;
-       local_irq_restore(flags);
-}
-
-/*
- * Dump statistics for TINY_RCU, such as they are.
- */
-static int show_tiny_stats(struct seq_file *m, void *unused)
-{
-       seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
-       seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
-       return 0;
-}
-
-static int show_tiny_stats_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_tiny_stats, NULL);
-}
-
-static const struct file_operations show_tiny_stats_fops = {
-       .owner = THIS_MODULE,
-       .open = show_tiny_stats_open,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-};
-
-static struct dentry *rcudir;
-
-static int __init rcutiny_trace_init(void)
-{
-       struct dentry *retval;
-
-       rcudir = debugfs_create_dir("rcu", NULL);
-       if (!rcudir)
-               goto free_out;
-       retval = debugfs_create_file("rcudata", 0444, rcudir,
-                                    NULL, &show_tiny_stats_fops);
-       if (!retval)
-               goto free_out;
-       return 0;
-free_out:
-       debugfs_remove_recursive(rcudir);
-       return 1;
-}
-
-static void __exit rcutiny_trace_cleanup(void)
-{
-       debugfs_remove_recursive(rcudir);
-}
-
-module_init(rcutiny_trace_init);
-module_exit(rcutiny_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
-MODULE_LICENSE("GPL");
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-       unsigned long j;
-       unsigned long js;
-
-       if (rcu_cpu_stall_suppress)
-               return;
-       rcp->ticks_this_gp++;
-       j = jiffies;
-       js = rcp->jiffies_stall;
-       if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
-               pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
-                      rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
-                      jiffies - rcp->gp_start, rcp->qlen);
-               dump_stack();
-       }
-       if (*rcp->curtail && ULONG_CMP_GE(j, js))
-               rcp->jiffies_stall = jiffies +
-                       3 * rcu_jiffies_till_stall_check() + 3;
-       else if (ULONG_CMP_GE(j, js))
-               rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
-}
-
-static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
-{
-       rcp->ticks_this_gp = 0;
-       rcp->gp_start = jiffies;
-       rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
-}
-
-static void check_cpu_stalls(void)
-{
-       RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
-       RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
-}
-
-#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
deleted file mode 100644 (file)
index be63101..0000000
+++ /dev/null
@@ -1,2139 +0,0 @@
-/*
- * Read-Copy Update module-based torture test facility
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2005, 2006
- *
- * Authors: Paul E. McKenney <paulmck@us.ibm.com>
- *       Josh Triplett <josh@freedesktop.org>
- *
- * See also:  Documentation/RCU/torture.txt
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/err.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/completion.h>
-#include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/freezer.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
-#include <linux/srcu.h>
-#include <linux/slab.h>
-#include <linux/trace_clock.h>
-#include <asm/byteorder.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
-
-static int fqs_duration;
-module_param(fqs_duration, int, 0444);
-MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable");
-static int fqs_holdoff;
-module_param(fqs_holdoff, int, 0444);
-MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
-static int fqs_stutter = 3;
-module_param(fqs_stutter, int, 0444);
-MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
-static bool gp_exp;
-module_param(gp_exp, bool, 0444);
-MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives");
-static bool gp_normal;
-module_param(gp_normal, bool, 0444);
-MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives");
-static int irqreader = 1;
-module_param(irqreader, int, 0444);
-MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
-static int n_barrier_cbs;
-module_param(n_barrier_cbs, int, 0444);
-MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
-static int nfakewriters = 4;
-module_param(nfakewriters, int, 0444);
-MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
-static int nreaders = -1;
-module_param(nreaders, int, 0444);
-MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
-static int object_debug;
-module_param(object_debug, int, 0444);
-MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing");
-static int onoff_holdoff;
-module_param(onoff_holdoff, int, 0444);
-MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)");
-static int onoff_interval;
-module_param(onoff_interval, int, 0444);
-MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
-static int shuffle_interval = 3;
-module_param(shuffle_interval, int, 0444);
-MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
-static int shutdown_secs;
-module_param(shutdown_secs, int, 0444);
-MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable.");
-static int stall_cpu;
-module_param(stall_cpu, int, 0444);
-MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable.");
-static int stall_cpu_holdoff = 10;
-module_param(stall_cpu_holdoff, int, 0444);
-MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s).");
-static int stat_interval = 60;
-module_param(stat_interval, int, 0644);
-MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
-static int stutter = 5;
-module_param(stutter, int, 0444);
-MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
-static int test_boost = 1;
-module_param(test_boost, int, 0444);
-MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
-static int test_boost_duration = 4;
-module_param(test_boost_duration, int, 0444);
-MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
-static int test_boost_interval = 7;
-module_param(test_boost_interval, int, 0444);
-MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
-static bool test_no_idle_hz = true;
-module_param(test_no_idle_hz, bool, 0444);
-MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
-static char *torture_type = "rcu";
-module_param(torture_type, charp, 0444);
-MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)");
-static bool verbose;
-module_param(verbose, bool, 0444);
-MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
-
-#define TORTURE_FLAG "-torture:"
-#define PRINTK_STRING(s) \
-       do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
-#define VERBOSE_PRINTK_STRING(s) \
-       do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
-#define VERBOSE_PRINTK_ERRSTRING(s) \
-       do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0)
-
-static char printk_buf[4096];
-
-static int nrealreaders;
-static struct task_struct *writer_task;
-static struct task_struct **fakewriter_tasks;
-static struct task_struct **reader_tasks;
-static struct task_struct *stats_task;
-static struct task_struct *shuffler_task;
-static struct task_struct *stutter_task;
-static struct task_struct *fqs_task;
-static struct task_struct *boost_tasks[NR_CPUS];
-static struct task_struct *shutdown_task;
-#ifdef CONFIG_HOTPLUG_CPU
-static struct task_struct *onoff_task;
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static struct task_struct *stall_task;
-static struct task_struct **barrier_cbs_tasks;
-static struct task_struct *barrier_task;
-
-#define RCU_TORTURE_PIPE_LEN 10
-
-struct rcu_torture {
-       struct rcu_head rtort_rcu;
-       int rtort_pipe_count;
-       struct list_head rtort_free;
-       int rtort_mbtest;
-};
-
-static LIST_HEAD(rcu_torture_freelist);
-static struct rcu_torture __rcu *rcu_torture_current;
-static unsigned long rcu_torture_current_version;
-static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
-static DEFINE_SPINLOCK(rcu_torture_lock);
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
-       { 0 };
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) =
-       { 0 };
-static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
-static atomic_t n_rcu_torture_alloc;
-static atomic_t n_rcu_torture_alloc_fail;
-static atomic_t n_rcu_torture_free;
-static atomic_t n_rcu_torture_mberror;
-static atomic_t n_rcu_torture_error;
-static long n_rcu_torture_barrier_error;
-static long n_rcu_torture_boost_ktrerror;
-static long n_rcu_torture_boost_rterror;
-static long n_rcu_torture_boost_failure;
-static long n_rcu_torture_boosts;
-static long n_rcu_torture_timers;
-static long n_offline_attempts;
-static long n_offline_successes;
-static unsigned long sum_offline;
-static int min_offline = -1;
-static int max_offline;
-static long n_online_attempts;
-static long n_online_successes;
-static unsigned long sum_online;
-static int min_online = -1;
-static int max_online;
-static long n_barrier_attempts;
-static long n_barrier_successes;
-static struct list_head rcu_torture_removed;
-static cpumask_var_t shuffle_tmp_mask;
-
-static int stutter_pause_test;
-
-#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
-#define RCUTORTURE_RUNNABLE_INIT 1
-#else
-#define RCUTORTURE_RUNNABLE_INIT 0
-#endif
-int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
-module_param(rcutorture_runnable, int, 0444);
-MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
-
-#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
-#define rcu_can_boost() 1
-#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
-#define rcu_can_boost() 0
-#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
-
-#ifdef CONFIG_RCU_TRACE
-static u64 notrace rcu_trace_clock_local(void)
-{
-       u64 ts = trace_clock_local();
-       unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
-       return ts;
-}
-#else /* #ifdef CONFIG_RCU_TRACE */
-static u64 notrace rcu_trace_clock_local(void)
-{
-       return 0ULL;
-}
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
-static unsigned long shutdown_time;    /* jiffies to system shutdown. */
-static unsigned long boost_starttime;  /* jiffies of next boost test start. */
-DEFINE_MUTEX(boost_mutex);             /* protect setting boost_starttime */
-                                       /*  and boost task create/destroy. */
-static atomic_t barrier_cbs_count;     /* Barrier callbacks registered. */
-static bool barrier_phase;             /* Test phase. */
-static atomic_t barrier_cbs_invoked;   /* Barrier callbacks invoked. */
-static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
-static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
-
-/* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
-
-#define FULLSTOP_DONTSTOP 0    /* Normal operation. */
-#define FULLSTOP_SHUTDOWN 1    /* System shutdown with rcutorture running. */
-#define FULLSTOP_RMMOD    2    /* Normal rmmod of rcutorture. */
-static int fullstop = FULLSTOP_RMMOD;
-/*
- * Protect fullstop transitions and spawning of kthreads.
- */
-static DEFINE_MUTEX(fullstop_mutex);
-
-/* Forward reference. */
-static void rcu_torture_cleanup(void);
-
-/*
- * Detect and respond to a system shutdown.
- */
-static int
-rcutorture_shutdown_notify(struct notifier_block *unused1,
-                          unsigned long unused2, void *unused3)
-{
-       mutex_lock(&fullstop_mutex);
-       if (fullstop == FULLSTOP_DONTSTOP)
-               fullstop = FULLSTOP_SHUTDOWN;
-       else
-               pr_warn(/* but going down anyway, so... */
-                      "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
-       mutex_unlock(&fullstop_mutex);
-       return NOTIFY_DONE;
-}
-
-/*
- * Absorb kthreads into a kernel function that won't return, so that
- * they won't ever access module text or data again.
- */
-static void rcutorture_shutdown_absorb(const char *title)
-{
-       if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
-               pr_notice(
-                      "rcutorture thread %s parking due to system shutdown\n",
-                      title);
-               schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
-       }
-}
-
-/*
- * Allocate an element from the rcu_tortures pool.
- */
-static struct rcu_torture *
-rcu_torture_alloc(void)
-{
-       struct list_head *p;
-
-       spin_lock_bh(&rcu_torture_lock);
-       if (list_empty(&rcu_torture_freelist)) {
-               atomic_inc(&n_rcu_torture_alloc_fail);
-               spin_unlock_bh(&rcu_torture_lock);
-               return NULL;
-       }
-       atomic_inc(&n_rcu_torture_alloc);
-       p = rcu_torture_freelist.next;
-       list_del_init(p);
-       spin_unlock_bh(&rcu_torture_lock);
-       return container_of(p, struct rcu_torture, rtort_free);
-}
-
-/*
- * Free an element to the rcu_tortures pool.
- */
-static void
-rcu_torture_free(struct rcu_torture *p)
-{
-       atomic_inc(&n_rcu_torture_free);
-       spin_lock_bh(&rcu_torture_lock);
-       list_add_tail(&p->rtort_free, &rcu_torture_freelist);
-       spin_unlock_bh(&rcu_torture_lock);
-}
-
-struct rcu_random_state {
-       unsigned long rrs_state;
-       long rrs_count;
-};
-
-#define RCU_RANDOM_MULT 39916801  /* prime */
-#define RCU_RANDOM_ADD 479001701 /* prime */
-#define RCU_RANDOM_REFRESH 10000
-
-#define DEFINE_RCU_RANDOM(name) struct rcu_random_state name = { 0, 0 }
-
-/*
- * Crude but fast random-number generator.  Uses a linear congruential
- * generator, with occasional help from cpu_clock().
- */
-static unsigned long
-rcu_random(struct rcu_random_state *rrsp)
-{
-       if (--rrsp->rrs_count < 0) {
-               rrsp->rrs_state += (unsigned long)local_clock();
-               rrsp->rrs_count = RCU_RANDOM_REFRESH;
-       }
-       rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
-       return swahw32(rrsp->rrs_state);
-}
-
-static void
-rcu_stutter_wait(const char *title)
-{
-       while (stutter_pause_test || !rcutorture_runnable) {
-               if (rcutorture_runnable)
-                       schedule_timeout_interruptible(1);
-               else
-                       schedule_timeout_interruptible(round_jiffies_relative(HZ));
-               rcutorture_shutdown_absorb(title);
-       }
-}
-
-/*
- * Operations vector for selecting different types of tests.
- */
-
-struct rcu_torture_ops {
-       void (*init)(void);
-       int (*readlock)(void);
-       void (*read_delay)(struct rcu_random_state *rrsp);
-       void (*readunlock)(int idx);
-       int (*completed)(void);
-       void (*deferred_free)(struct rcu_torture *p);
-       void (*sync)(void);
-       void (*exp_sync)(void);
-       void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
-       void (*cb_barrier)(void);
-       void (*fqs)(void);
-       int (*stats)(char *page);
-       int irq_capable;
-       int can_boost;
-       const char *name;
-};
-
-static struct rcu_torture_ops *cur_ops;
-
-/*
- * Definitions for rcu torture testing.
- */
-
-static int rcu_torture_read_lock(void) __acquires(RCU)
-{
-       rcu_read_lock();
-       return 0;
-}
-
-static void rcu_read_delay(struct rcu_random_state *rrsp)
-{
-       const unsigned long shortdelay_us = 200;
-       const unsigned long longdelay_ms = 50;
-
-       /* We want a short delay sometimes to make a reader delay the grace
-        * period, and we want a long delay occasionally to trigger
-        * force_quiescent_state. */
-
-       if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms)))
-               mdelay(longdelay_ms);
-       if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
-               udelay(shortdelay_us);
-#ifdef CONFIG_PREEMPT
-       if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000)))
-               preempt_schedule();  /* No QS if preempt_disable() in effect */
-#endif
-}
-
-static void rcu_torture_read_unlock(int idx) __releases(RCU)
-{
-       rcu_read_unlock();
-}
-
-static int rcu_torture_completed(void)
-{
-       return rcu_batches_completed();
-}
-
-static void
-rcu_torture_cb(struct rcu_head *p)
-{
-       int i;
-       struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
-
-       if (fullstop != FULLSTOP_DONTSTOP) {
-               /* Test is ending, just drop callbacks on the floor. */
-               /* The next initialization will pick up the pieces. */
-               return;
-       }
-       i = rp->rtort_pipe_count;
-       if (i > RCU_TORTURE_PIPE_LEN)
-               i = RCU_TORTURE_PIPE_LEN;
-       atomic_inc(&rcu_torture_wcount[i]);
-       if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
-               rp->rtort_mbtest = 0;
-               rcu_torture_free(rp);
-       } else {
-               cur_ops->deferred_free(rp);
-       }
-}
-
-static int rcu_no_completed(void)
-{
-       return 0;
-}
-
-static void rcu_torture_deferred_free(struct rcu_torture *p)
-{
-       call_rcu(&p->rtort_rcu, rcu_torture_cb);
-}
-
-static void rcu_sync_torture_init(void)
-{
-       INIT_LIST_HEAD(&rcu_torture_removed);
-}
-
-static struct rcu_torture_ops rcu_ops = {
-       .init           = rcu_sync_torture_init,
-       .readlock       = rcu_torture_read_lock,
-       .read_delay     = rcu_read_delay,
-       .readunlock     = rcu_torture_read_unlock,
-       .completed      = rcu_torture_completed,
-       .deferred_free  = rcu_torture_deferred_free,
-       .sync           = synchronize_rcu,
-       .exp_sync       = synchronize_rcu_expedited,
-       .call           = call_rcu,
-       .cb_barrier     = rcu_barrier,
-       .fqs            = rcu_force_quiescent_state,
-       .stats          = NULL,
-       .irq_capable    = 1,
-       .can_boost      = rcu_can_boost(),
-       .name           = "rcu"
-};
-
-/*
- * Definitions for rcu_bh torture testing.
- */
-
-static int rcu_bh_torture_read_lock(void) __acquires(RCU_BH)
-{
-       rcu_read_lock_bh();
-       return 0;
-}
-
-static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH)
-{
-       rcu_read_unlock_bh();
-}
-
-static int rcu_bh_torture_completed(void)
-{
-       return rcu_batches_completed_bh();
-}
-
-static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
-{
-       call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
-}
-
-static struct rcu_torture_ops rcu_bh_ops = {
-       .init           = rcu_sync_torture_init,
-       .readlock       = rcu_bh_torture_read_lock,
-       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
-       .readunlock     = rcu_bh_torture_read_unlock,
-       .completed      = rcu_bh_torture_completed,
-       .deferred_free  = rcu_bh_torture_deferred_free,
-       .sync           = synchronize_rcu_bh,
-       .exp_sync       = synchronize_rcu_bh_expedited,
-       .call           = call_rcu_bh,
-       .cb_barrier     = rcu_barrier_bh,
-       .fqs            = rcu_bh_force_quiescent_state,
-       .stats          = NULL,
-       .irq_capable    = 1,
-       .name           = "rcu_bh"
-};
-
-/*
- * Definitions for srcu torture testing.
- */
-
-DEFINE_STATIC_SRCU(srcu_ctl);
-
-static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
-{
-       return srcu_read_lock(&srcu_ctl);
-}
-
-static void srcu_read_delay(struct rcu_random_state *rrsp)
-{
-       long delay;
-       const long uspertick = 1000000 / HZ;
-       const long longdelay = 10;
-
-       /* We want there to be long-running readers, but not all the time. */
-
-       delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
-       if (!delay)
-               schedule_timeout_interruptible(longdelay);
-       else
-               rcu_read_delay(rrsp);
-}
-
-static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
-{
-       srcu_read_unlock(&srcu_ctl, idx);
-}
-
-static int srcu_torture_completed(void)
-{
-       return srcu_batches_completed(&srcu_ctl);
-}
-
-static void srcu_torture_deferred_free(struct rcu_torture *rp)
-{
-       call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
-}
-
-static void srcu_torture_synchronize(void)
-{
-       synchronize_srcu(&srcu_ctl);
-}
-
-static void srcu_torture_call(struct rcu_head *head,
-                             void (*func)(struct rcu_head *head))
-{
-       call_srcu(&srcu_ctl, head, func);
-}
-
-static void srcu_torture_barrier(void)
-{
-       srcu_barrier(&srcu_ctl);
-}
-
-static int srcu_torture_stats(char *page)
-{
-       int cnt = 0;
-       int cpu;
-       int idx = srcu_ctl.completed & 0x1;
-
-       cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
-                      torture_type, TORTURE_FLAG, idx);
-       for_each_possible_cpu(cpu) {
-               cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu,
-                              per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
-                              per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
-       }
-       cnt += sprintf(&page[cnt], "\n");
-       return cnt;
-}
-
-static void srcu_torture_synchronize_expedited(void)
-{
-       synchronize_srcu_expedited(&srcu_ctl);
-}
-
-static struct rcu_torture_ops srcu_ops = {
-       .init           = rcu_sync_torture_init,
-       .readlock       = srcu_torture_read_lock,
-       .read_delay     = srcu_read_delay,
-       .readunlock     = srcu_torture_read_unlock,
-       .completed      = srcu_torture_completed,
-       .deferred_free  = srcu_torture_deferred_free,
-       .sync           = srcu_torture_synchronize,
-       .exp_sync       = srcu_torture_synchronize_expedited,
-       .call           = srcu_torture_call,
-       .cb_barrier     = srcu_torture_barrier,
-       .stats          = srcu_torture_stats,
-       .name           = "srcu"
-};
-
-/*
- * Definitions for sched torture testing.
- */
-
-static int sched_torture_read_lock(void)
-{
-       preempt_disable();
-       return 0;
-}
-
-static void sched_torture_read_unlock(int idx)
-{
-       preempt_enable();
-}
-
-static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
-{
-       call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
-}
-
-static struct rcu_torture_ops sched_ops = {
-       .init           = rcu_sync_torture_init,
-       .readlock       = sched_torture_read_lock,
-       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
-       .readunlock     = sched_torture_read_unlock,
-       .completed      = rcu_no_completed,
-       .deferred_free  = rcu_sched_torture_deferred_free,
-       .sync           = synchronize_sched,
-       .exp_sync       = synchronize_sched_expedited,
-       .call           = call_rcu_sched,
-       .cb_barrier     = rcu_barrier_sched,
-       .fqs            = rcu_sched_force_quiescent_state,
-       .stats          = NULL,
-       .irq_capable    = 1,
-       .name           = "sched"
-};
-
-/*
- * RCU torture priority-boost testing.  Runs one real-time thread per
- * CPU for moderate bursts, repeatedly registering RCU callbacks and
- * spinning waiting for them to be invoked.  If a given callback takes
- * too long to be invoked, we assume that priority inversion has occurred.
- */
-
-struct rcu_boost_inflight {
-       struct rcu_head rcu;
-       int inflight;
-};
-
-static void rcu_torture_boost_cb(struct rcu_head *head)
-{
-       struct rcu_boost_inflight *rbip =
-               container_of(head, struct rcu_boost_inflight, rcu);
-
-       smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
-       rbip->inflight = 0;
-}
-
-static int rcu_torture_boost(void *arg)
-{
-       unsigned long call_rcu_time;
-       unsigned long endtime;
-       unsigned long oldstarttime;
-       struct rcu_boost_inflight rbi = { .inflight = 0 };
-       struct sched_param sp;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_boost started");
-
-       /* Set real-time priority. */
-       sp.sched_priority = 1;
-       if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
-               VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
-               n_rcu_torture_boost_rterror++;
-       }
-
-       init_rcu_head_on_stack(&rbi.rcu);
-       /* Each pass through the following loop does one boost-test cycle. */
-       do {
-               /* Wait for the next test interval. */
-               oldstarttime = boost_starttime;
-               while (ULONG_CMP_LT(jiffies, oldstarttime)) {
-                       schedule_timeout_interruptible(oldstarttime - jiffies);
-                       rcu_stutter_wait("rcu_torture_boost");
-                       if (kthread_should_stop() ||
-                           fullstop != FULLSTOP_DONTSTOP)
-                               goto checkwait;
-               }
-
-               /* Do one boost-test interval. */
-               endtime = oldstarttime + test_boost_duration * HZ;
-               call_rcu_time = jiffies;
-               while (ULONG_CMP_LT(jiffies, endtime)) {
-                       /* If we don't have a callback in flight, post one. */
-                       if (!rbi.inflight) {
-                               smp_mb(); /* RCU core before ->inflight = 1. */
-                               rbi.inflight = 1;
-                               call_rcu(&rbi.rcu, rcu_torture_boost_cb);
-                               if (jiffies - call_rcu_time >
-                                        test_boost_duration * HZ - HZ / 2) {
-                                       VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
-                                       n_rcu_torture_boost_failure++;
-                               }
-                               call_rcu_time = jiffies;
-                       }
-                       cond_resched();
-                       rcu_stutter_wait("rcu_torture_boost");
-                       if (kthread_should_stop() ||
-                           fullstop != FULLSTOP_DONTSTOP)
-                               goto checkwait;
-               }
-
-               /*
-                * Set the start time of the next test interval.
-                * Yes, this is vulnerable to long delays, but such
-                * delays simply cause a false negative for the next
-                * interval.  Besides, we are running at RT priority,
-                * so delays should be relatively rare.
-                */
-               while (oldstarttime == boost_starttime &&
-                      !kthread_should_stop()) {
-                       if (mutex_trylock(&boost_mutex)) {
-                               boost_starttime = jiffies +
-                                                 test_boost_interval * HZ;
-                               n_rcu_torture_boosts++;
-                               mutex_unlock(&boost_mutex);
-                               break;
-                       }
-                       schedule_timeout_uninterruptible(1);
-               }
-
-               /* Go do the stutter. */
-checkwait:     rcu_stutter_wait("rcu_torture_boost");
-       } while (!kthread_should_stop() && fullstop  == FULLSTOP_DONTSTOP);
-
-       /* Clean up and exit. */
-       VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_boost");
-       while (!kthread_should_stop() || rbi.inflight)
-               schedule_timeout_uninterruptible(1);
-       smp_mb(); /* order accesses to ->inflight before stack-frame death. */
-       destroy_rcu_head_on_stack(&rbi.rcu);
-       return 0;
-}
-
-/*
- * RCU torture force-quiescent-state kthread.  Repeatedly induces
- * bursts of calls to force_quiescent_state(), increasing the probability
- * of occurrence of some important types of race conditions.
- */
-static int
-rcu_torture_fqs(void *arg)
-{
-       unsigned long fqs_resume_time;
-       int fqs_burst_remaining;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
-       do {
-               fqs_resume_time = jiffies + fqs_stutter * HZ;
-               while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
-                      !kthread_should_stop()) {
-                       schedule_timeout_interruptible(1);
-               }
-               fqs_burst_remaining = fqs_duration;
-               while (fqs_burst_remaining > 0 &&
-                      !kthread_should_stop()) {
-                       cur_ops->fqs();
-                       udelay(fqs_holdoff);
-                       fqs_burst_remaining -= fqs_holdoff;
-               }
-               rcu_stutter_wait("rcu_torture_fqs");
-       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
-       VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_fqs");
-       while (!kthread_should_stop())
-               schedule_timeout_uninterruptible(1);
-       return 0;
-}
-
-/*
- * RCU torture writer kthread.  Repeatedly substitutes a new structure
- * for that pointed to by rcu_torture_current, freeing the old structure
- * after a series of grace periods (the "pipeline").
- */
-static int
-rcu_torture_writer(void *arg)
-{
-       bool exp;
-       int i;
-       struct rcu_torture *rp;
-       struct rcu_torture *rp1;
-       struct rcu_torture *old_rp;
-       static DEFINE_RCU_RANDOM(rand);
-
-       VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
-       set_user_nice(current, 19);
-
-       do {
-               schedule_timeout_uninterruptible(1);
-               rp = rcu_torture_alloc();
-               if (rp == NULL)
-                       continue;
-               rp->rtort_pipe_count = 0;
-               udelay(rcu_random(&rand) & 0x3ff);
-               old_rp = rcu_dereference_check(rcu_torture_current,
-                                              current == writer_task);
-               rp->rtort_mbtest = 1;
-               rcu_assign_pointer(rcu_torture_current, rp);
-               smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
-               if (old_rp) {
-                       i = old_rp->rtort_pipe_count;
-                       if (i > RCU_TORTURE_PIPE_LEN)
-                               i = RCU_TORTURE_PIPE_LEN;
-                       atomic_inc(&rcu_torture_wcount[i]);
-                       old_rp->rtort_pipe_count++;
-                       if (gp_normal == gp_exp)
-                               exp = !!(rcu_random(&rand) & 0x80);
-                       else
-                               exp = gp_exp;
-                       if (!exp) {
-                               cur_ops->deferred_free(old_rp);
-                       } else {
-                               cur_ops->exp_sync();
-                               list_add(&old_rp->rtort_free,
-                                        &rcu_torture_removed);
-                               list_for_each_entry_safe(rp, rp1,
-                                                        &rcu_torture_removed,
-                                                        rtort_free) {
-                                       i = rp->rtort_pipe_count;
-                                       if (i > RCU_TORTURE_PIPE_LEN)
-                                               i = RCU_TORTURE_PIPE_LEN;
-                                       atomic_inc(&rcu_torture_wcount[i]);
-                                       if (++rp->rtort_pipe_count >=
-                                           RCU_TORTURE_PIPE_LEN) {
-                                               rp->rtort_mbtest = 0;
-                                               list_del(&rp->rtort_free);
-                                               rcu_torture_free(rp);
-                                       }
-                                }
-                       }
-               }
-               rcutorture_record_progress(++rcu_torture_current_version);
-               rcu_stutter_wait("rcu_torture_writer");
-       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
-       VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_writer");
-       while (!kthread_should_stop())
-               schedule_timeout_uninterruptible(1);
-       return 0;
-}
-
-/*
- * RCU torture fake writer kthread.  Repeatedly calls sync, with a random
- * delay between calls.
- */
-static int
-rcu_torture_fakewriter(void *arg)
-{
-       DEFINE_RCU_RANDOM(rand);
-
-       VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
-       set_user_nice(current, 19);
-
-       do {
-               schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
-               udelay(rcu_random(&rand) & 0x3ff);
-               if (cur_ops->cb_barrier != NULL &&
-                   rcu_random(&rand) % (nfakewriters * 8) == 0) {
-                       cur_ops->cb_barrier();
-               } else if (gp_normal == gp_exp) {
-                       if (rcu_random(&rand) & 0x80)
-                               cur_ops->sync();
-                       else
-                               cur_ops->exp_sync();
-               } else if (gp_normal) {
-                       cur_ops->sync();
-               } else {
-                       cur_ops->exp_sync();
-               }
-               rcu_stutter_wait("rcu_torture_fakewriter");
-       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
-
-       VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_fakewriter");
-       while (!kthread_should_stop())
-               schedule_timeout_uninterruptible(1);
-       return 0;
-}
-
-void rcutorture_trace_dump(void)
-{
-       static atomic_t beenhere = ATOMIC_INIT(0);
-
-       if (atomic_read(&beenhere))
-               return;
-       if (atomic_xchg(&beenhere, 1) != 0)
-               return;
-       ftrace_dump(DUMP_ALL);
-}
-
-/*
- * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
- * incrementing the corresponding element of the pipeline array.  The
- * counter in the element should never be greater than 1, otherwise, the
- * RCU implementation is broken.
- */
-static void rcu_torture_timer(unsigned long unused)
-{
-       int idx;
-       int completed;
-       int completed_end;
-       static DEFINE_RCU_RANDOM(rand);
-       static DEFINE_SPINLOCK(rand_lock);
-       struct rcu_torture *p;
-       int pipe_count;
-       unsigned long long ts;
-
-       idx = cur_ops->readlock();
-       completed = cur_ops->completed();
-       ts = rcu_trace_clock_local();
-       p = rcu_dereference_check(rcu_torture_current,
-                                 rcu_read_lock_bh_held() ||
-                                 rcu_read_lock_sched_held() ||
-                                 srcu_read_lock_held(&srcu_ctl));
-       if (p == NULL) {
-               /* Leave because rcu_torture_writer is not yet underway */
-               cur_ops->readunlock(idx);
-               return;
-       }
-       if (p->rtort_mbtest == 0)
-               atomic_inc(&n_rcu_torture_mberror);
-       spin_lock(&rand_lock);
-       cur_ops->read_delay(&rand);
-       n_rcu_torture_timers++;
-       spin_unlock(&rand_lock);
-       preempt_disable();
-       pipe_count = p->rtort_pipe_count;
-       if (pipe_count > RCU_TORTURE_PIPE_LEN) {
-               /* Should not happen, but... */
-               pipe_count = RCU_TORTURE_PIPE_LEN;
-       }
-       completed_end = cur_ops->completed();
-       if (pipe_count > 1) {
-               do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
-                                         completed, completed_end);
-               rcutorture_trace_dump();
-       }
-       __this_cpu_inc(rcu_torture_count[pipe_count]);
-       completed = completed_end - completed;
-       if (completed > RCU_TORTURE_PIPE_LEN) {
-               /* Should not happen, but... */
-               completed = RCU_TORTURE_PIPE_LEN;
-       }
-       __this_cpu_inc(rcu_torture_batch[completed]);
-       preempt_enable();
-       cur_ops->readunlock(idx);
-}
-
-/*
- * RCU torture reader kthread.  Repeatedly dereferences rcu_torture_current,
- * incrementing the corresponding element of the pipeline array.  The
- * counter in the element should never be greater than 1, otherwise, the
- * RCU implementation is broken.
- */
-static int
-rcu_torture_reader(void *arg)
-{
-       int completed;
-       int completed_end;
-       int idx;
-       DEFINE_RCU_RANDOM(rand);
-       struct rcu_torture *p;
-       int pipe_count;
-       struct timer_list t;
-       unsigned long long ts;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
-       set_user_nice(current, 19);
-       if (irqreader && cur_ops->irq_capable)
-               setup_timer_on_stack(&t, rcu_torture_timer, 0);
-
-       do {
-               if (irqreader && cur_ops->irq_capable) {
-                       if (!timer_pending(&t))
-                               mod_timer(&t, jiffies + 1);
-               }
-               idx = cur_ops->readlock();
-               completed = cur_ops->completed();
-               ts = rcu_trace_clock_local();
-               p = rcu_dereference_check(rcu_torture_current,
-                                         rcu_read_lock_bh_held() ||
-                                         rcu_read_lock_sched_held() ||
-                                         srcu_read_lock_held(&srcu_ctl));
-               if (p == NULL) {
-                       /* Wait for rcu_torture_writer to get underway */
-                       cur_ops->readunlock(idx);
-                       schedule_timeout_interruptible(HZ);
-                       continue;
-               }
-               if (p->rtort_mbtest == 0)
-                       atomic_inc(&n_rcu_torture_mberror);
-               cur_ops->read_delay(&rand);
-               preempt_disable();
-               pipe_count = p->rtort_pipe_count;
-               if (pipe_count > RCU_TORTURE_PIPE_LEN) {
-                       /* Should not happen, but... */
-                       pipe_count = RCU_TORTURE_PIPE_LEN;
-               }
-               completed_end = cur_ops->completed();
-               if (pipe_count > 1) {
-                       do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
-                                                 ts, completed, completed_end);
-                       rcutorture_trace_dump();
-               }
-               __this_cpu_inc(rcu_torture_count[pipe_count]);
-               completed = completed_end - completed;
-               if (completed > RCU_TORTURE_PIPE_LEN) {
-                       /* Should not happen, but... */
-                       completed = RCU_TORTURE_PIPE_LEN;
-               }
-               __this_cpu_inc(rcu_torture_batch[completed]);
-               preempt_enable();
-               cur_ops->readunlock(idx);
-               schedule();
-               rcu_stutter_wait("rcu_torture_reader");
-       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
-       VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_reader");
-       if (irqreader && cur_ops->irq_capable)
-               del_timer_sync(&t);
-       while (!kthread_should_stop())
-               schedule_timeout_uninterruptible(1);
-       return 0;
-}
-
-/*
- * Create an RCU-torture statistics message in the specified buffer.
- */
-static int
-rcu_torture_printk(char *page)
-{
-       int cnt = 0;
-       int cpu;
-       int i;
-       long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
-       long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
-
-       for_each_possible_cpu(cpu) {
-               for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
-                       pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i];
-                       batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i];
-               }
-       }
-       for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) {
-               if (pipesummary[i] != 0)
-                       break;
-       }
-       cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
-       cnt += sprintf(&page[cnt],
-                      "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
-                      rcu_torture_current,
-                      rcu_torture_current_version,
-                      list_empty(&rcu_torture_freelist),
-                      atomic_read(&n_rcu_torture_alloc),
-                      atomic_read(&n_rcu_torture_alloc_fail),
-                      atomic_read(&n_rcu_torture_free));
-       cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
-                      atomic_read(&n_rcu_torture_mberror),
-                      n_rcu_torture_boost_ktrerror,
-                      n_rcu_torture_boost_rterror);
-       cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
-                      n_rcu_torture_boost_failure,
-                      n_rcu_torture_boosts,
-                      n_rcu_torture_timers);
-       cnt += sprintf(&page[cnt],
-                      "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
-                      n_online_successes, n_online_attempts,
-                      n_offline_successes, n_offline_attempts,
-                      min_online, max_online,
-                      min_offline, max_offline,
-                      sum_online, sum_offline, HZ);
-       cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
-                      n_barrier_successes,
-                      n_barrier_attempts,
-                      n_rcu_torture_barrier_error);
-       cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
-       if (atomic_read(&n_rcu_torture_mberror) != 0 ||
-           n_rcu_torture_barrier_error != 0 ||
-           n_rcu_torture_boost_ktrerror != 0 ||
-           n_rcu_torture_boost_rterror != 0 ||
-           n_rcu_torture_boost_failure != 0 ||
-           i > 1) {
-               cnt += sprintf(&page[cnt], "!!! ");
-               atomic_inc(&n_rcu_torture_error);
-               WARN_ON_ONCE(1);
-       }
-       cnt += sprintf(&page[cnt], "Reader Pipe: ");
-       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-               cnt += sprintf(&page[cnt], " %ld", pipesummary[i]);
-       cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
-       cnt += sprintf(&page[cnt], "Reader Batch: ");
-       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-               cnt += sprintf(&page[cnt], " %ld", batchsummary[i]);
-       cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
-       cnt += sprintf(&page[cnt], "Free-Block Circulation: ");
-       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
-               cnt += sprintf(&page[cnt], " %d",
-                              atomic_read(&rcu_torture_wcount[i]));
-       }
-       cnt += sprintf(&page[cnt], "\n");
-       if (cur_ops->stats)
-               cnt += cur_ops->stats(&page[cnt]);
-       return cnt;
-}
-
-/*
- * Print torture statistics.  Caller must ensure that there is only
- * one call to this function at a given time!!!  This is normally
- * accomplished by relying on the module system to only have one copy
- * of the module loaded, and then by giving the rcu_torture_stats
- * kthread full control (or the init/cleanup functions when rcu_torture_stats
- * thread is not running).
- */
-static void
-rcu_torture_stats_print(void)
-{
-       int cnt;
-
-       cnt = rcu_torture_printk(printk_buf);
-       pr_alert("%s", printk_buf);
-}
-
-/*
- * Periodically prints torture statistics, if periodic statistics printing
- * was specified via the stat_interval module parameter.
- *
- * No need to worry about fullstop here, since this one doesn't reference
- * volatile state or register callbacks.
- */
-static int
-rcu_torture_stats(void *arg)
-{
-       VERBOSE_PRINTK_STRING("rcu_torture_stats task started");
-       do {
-               schedule_timeout_interruptible(stat_interval * HZ);
-               rcu_torture_stats_print();
-               rcutorture_shutdown_absorb("rcu_torture_stats");
-       } while (!kthread_should_stop());
-       VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
-       return 0;
-}
-
-static int rcu_idle_cpu;       /* Force all torture tasks off this CPU */
-
-/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
- * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
- */
-static void rcu_torture_shuffle_tasks(void)
-{
-       int i;
-
-       cpumask_setall(shuffle_tmp_mask);
-       get_online_cpus();
-
-       /* No point in shuffling if there is only one online CPU (ex: UP) */
-       if (num_online_cpus() == 1) {
-               put_online_cpus();
-               return;
-       }
-
-       if (rcu_idle_cpu != -1)
-               cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask);
-
-       set_cpus_allowed_ptr(current, shuffle_tmp_mask);
-
-       if (reader_tasks) {
-               for (i = 0; i < nrealreaders; i++)
-                       if (reader_tasks[i])
-                               set_cpus_allowed_ptr(reader_tasks[i],
-                                                    shuffle_tmp_mask);
-       }
-       if (fakewriter_tasks) {
-               for (i = 0; i < nfakewriters; i++)
-                       if (fakewriter_tasks[i])
-                               set_cpus_allowed_ptr(fakewriter_tasks[i],
-                                                    shuffle_tmp_mask);
-       }
-       if (writer_task)
-               set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
-       if (stats_task)
-               set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
-       if (stutter_task)
-               set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
-       if (fqs_task)
-               set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
-       if (shutdown_task)
-               set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
-#ifdef CONFIG_HOTPLUG_CPU
-       if (onoff_task)
-               set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-       if (stall_task)
-               set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
-       if (barrier_cbs_tasks)
-               for (i = 0; i < n_barrier_cbs; i++)
-                       if (barrier_cbs_tasks[i])
-                               set_cpus_allowed_ptr(barrier_cbs_tasks[i],
-                                                    shuffle_tmp_mask);
-       if (barrier_task)
-               set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
-
-       if (rcu_idle_cpu == -1)
-               rcu_idle_cpu = num_online_cpus() - 1;
-       else
-               rcu_idle_cpu--;
-
-       put_online_cpus();
-}
-
-/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
- * system to become idle at a time and cut off its timer ticks. This is meant
- * to test the support for such tickless idle CPU in RCU.
- */
-static int
-rcu_torture_shuffle(void *arg)
-{
-       VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started");
-       do {
-               schedule_timeout_interruptible(shuffle_interval * HZ);
-               rcu_torture_shuffle_tasks();
-               rcutorture_shutdown_absorb("rcu_torture_shuffle");
-       } while (!kthread_should_stop());
-       VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
-       return 0;
-}
-
-/* Cause the rcutorture test to "stutter", starting and stopping all
- * threads periodically.
- */
-static int
-rcu_torture_stutter(void *arg)
-{
-       VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
-       do {
-               schedule_timeout_interruptible(stutter * HZ);
-               stutter_pause_test = 1;
-               if (!kthread_should_stop())
-                       schedule_timeout_interruptible(stutter * HZ);
-               stutter_pause_test = 0;
-               rcutorture_shutdown_absorb("rcu_torture_stutter");
-       } while (!kthread_should_stop());
-       VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
-       return 0;
-}
-
-static inline void
-rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
-{
-       pr_alert("%s" TORTURE_FLAG
-                "--- %s: nreaders=%d nfakewriters=%d "
-                "stat_interval=%d verbose=%d test_no_idle_hz=%d "
-                "shuffle_interval=%d stutter=%d irqreader=%d "
-                "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
-                "test_boost=%d/%d test_boost_interval=%d "
-                "test_boost_duration=%d shutdown_secs=%d "
-                "stall_cpu=%d stall_cpu_holdoff=%d "
-                "n_barrier_cbs=%d "
-                "onoff_interval=%d onoff_holdoff=%d\n",
-                torture_type, tag, nrealreaders, nfakewriters,
-                stat_interval, verbose, test_no_idle_hz, shuffle_interval,
-                stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
-                test_boost, cur_ops->can_boost,
-                test_boost_interval, test_boost_duration, shutdown_secs,
-                stall_cpu, stall_cpu_holdoff,
-                n_barrier_cbs,
-                onoff_interval, onoff_holdoff);
-}
-
-static struct notifier_block rcutorture_shutdown_nb = {
-       .notifier_call = rcutorture_shutdown_notify,
-};
-
-static void rcutorture_booster_cleanup(int cpu)
-{
-       struct task_struct *t;
-
-       if (boost_tasks[cpu] == NULL)
-               return;
-       mutex_lock(&boost_mutex);
-       VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
-       t = boost_tasks[cpu];
-       boost_tasks[cpu] = NULL;
-       mutex_unlock(&boost_mutex);
-
-       /* This must be outside of the mutex, otherwise deadlock! */
-       kthread_stop(t);
-       boost_tasks[cpu] = NULL;
-}
-
-static int rcutorture_booster_init(int cpu)
-{
-       int retval;
-
-       if (boost_tasks[cpu] != NULL)
-               return 0;  /* Already created, nothing more to do. */
-
-       /* Don't allow time recalculation while creating a new task. */
-       mutex_lock(&boost_mutex);
-       VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
-       boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
-                                                 cpu_to_node(cpu),
-                                                 "rcu_torture_boost");
-       if (IS_ERR(boost_tasks[cpu])) {
-               retval = PTR_ERR(boost_tasks[cpu]);
-               VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
-               n_rcu_torture_boost_ktrerror++;
-               boost_tasks[cpu] = NULL;
-               mutex_unlock(&boost_mutex);
-               return retval;
-       }
-       kthread_bind(boost_tasks[cpu], cpu);
-       wake_up_process(boost_tasks[cpu]);
-       mutex_unlock(&boost_mutex);
-       return 0;
-}
-
-/*
- * Cause the rcutorture test to shutdown the system after the test has
- * run for the time specified by the shutdown_secs module parameter.
- */
-static int
-rcu_torture_shutdown(void *arg)
-{
-       long delta;
-       unsigned long jiffies_snap;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started");
-       jiffies_snap = ACCESS_ONCE(jiffies);
-       while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
-              !kthread_should_stop()) {
-               delta = shutdown_time - jiffies_snap;
-               if (verbose)
-                       pr_alert("%s" TORTURE_FLAG
-                                "rcu_torture_shutdown task: %lu jiffies remaining\n",
-                                torture_type, delta);
-               schedule_timeout_interruptible(delta);
-               jiffies_snap = ACCESS_ONCE(jiffies);
-       }
-       if (kthread_should_stop()) {
-               VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping");
-               return 0;
-       }
-
-       /* OK, shut down the system. */
-
-       VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system");
-       shutdown_task = NULL;   /* Avoid self-kill deadlock. */
-       rcu_torture_cleanup();  /* Get the success/failure message. */
-       kernel_power_off();     /* Shut down the system. */
-       return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Execute random CPU-hotplug operations at the interval specified
- * by the onoff_interval.
- */
-static int
-rcu_torture_onoff(void *arg)
-{
-       int cpu;
-       unsigned long delta;
-       int maxcpu = -1;
-       DEFINE_RCU_RANDOM(rand);
-       int ret;
-       unsigned long starttime;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
-       for_each_online_cpu(cpu)
-               maxcpu = cpu;
-       WARN_ON(maxcpu < 0);
-       if (onoff_holdoff > 0) {
-               VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff");
-               schedule_timeout_interruptible(onoff_holdoff * HZ);
-               VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff");
-       }
-       while (!kthread_should_stop()) {
-               cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
-               if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
-                       if (verbose)
-                               pr_alert("%s" TORTURE_FLAG
-                                        "rcu_torture_onoff task: offlining %d\n",
-                                        torture_type, cpu);
-                       starttime = jiffies;
-                       n_offline_attempts++;
-                       ret = cpu_down(cpu);
-                       if (ret) {
-                               if (verbose)
-                                       pr_alert("%s" TORTURE_FLAG
-                                                "rcu_torture_onoff task: offline %d failed: errno %d\n",
-                                                torture_type, cpu, ret);
-                       } else {
-                               if (verbose)
-                                       pr_alert("%s" TORTURE_FLAG
-                                                "rcu_torture_onoff task: offlined %d\n",
-                                                torture_type, cpu);
-                               n_offline_successes++;
-                               delta = jiffies - starttime;
-                               sum_offline += delta;
-                               if (min_offline < 0) {
-                                       min_offline = delta;
-                                       max_offline = delta;
-                               }
-                               if (min_offline > delta)
-                                       min_offline = delta;
-                               if (max_offline < delta)
-                                       max_offline = delta;
-                       }
-               } else if (cpu_is_hotpluggable(cpu)) {
-                       if (verbose)
-                               pr_alert("%s" TORTURE_FLAG
-                                        "rcu_torture_onoff task: onlining %d\n",
-                                        torture_type, cpu);
-                       starttime = jiffies;
-                       n_online_attempts++;
-                       ret = cpu_up(cpu);
-                       if (ret) {
-                               if (verbose)
-                                       pr_alert("%s" TORTURE_FLAG
-                                                "rcu_torture_onoff task: online %d failed: errno %d\n",
-                                                torture_type, cpu, ret);
-                       } else {
-                               if (verbose)
-                                       pr_alert("%s" TORTURE_FLAG
-                                                "rcu_torture_onoff task: onlined %d\n",
-                                                torture_type, cpu);
-                               n_online_successes++;
-                               delta = jiffies - starttime;
-                               sum_online += delta;
-                               if (min_online < 0) {
-                                       min_online = delta;
-                                       max_online = delta;
-                               }
-                               if (min_online > delta)
-                                       min_online = delta;
-                               if (max_online < delta)
-                                       max_online = delta;
-                       }
-               }
-               schedule_timeout_interruptible(onoff_interval * HZ);
-       }
-       VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping");
-       return 0;
-}
-
-static int
-rcu_torture_onoff_init(void)
-{
-       int ret;
-
-       if (onoff_interval <= 0)
-               return 0;
-       onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
-       if (IS_ERR(onoff_task)) {
-               ret = PTR_ERR(onoff_task);
-               onoff_task = NULL;
-               return ret;
-       }
-       return 0;
-}
-
-static void rcu_torture_onoff_cleanup(void)
-{
-       if (onoff_task == NULL)
-               return;
-       VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
-       kthread_stop(onoff_task);
-       onoff_task = NULL;
-}
-
-#else /* #ifdef CONFIG_HOTPLUG_CPU */
-
-static int
-rcu_torture_onoff_init(void)
-{
-       return 0;
-}
-
-static void rcu_torture_onoff_cleanup(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
-
-/*
- * CPU-stall kthread.  It waits as specified by stall_cpu_holdoff, then
- * induces a CPU stall for the time specified by stall_cpu.
- */
-static int rcu_torture_stall(void *args)
-{
-       unsigned long stop_at;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_stall task started");
-       if (stall_cpu_holdoff > 0) {
-               VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff");
-               schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
-               VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff");
-       }
-       if (!kthread_should_stop()) {
-               stop_at = get_seconds() + stall_cpu;
-               /* RCU CPU stall is expected behavior in following code. */
-               pr_alert("rcu_torture_stall start.\n");
-               rcu_read_lock();
-               preempt_disable();
-               while (ULONG_CMP_LT(get_seconds(), stop_at))
-                       continue;  /* Induce RCU CPU stall warning. */
-               preempt_enable();
-               rcu_read_unlock();
-               pr_alert("rcu_torture_stall end.\n");
-       }
-       rcutorture_shutdown_absorb("rcu_torture_stall");
-       while (!kthread_should_stop())
-               schedule_timeout_interruptible(10 * HZ);
-       return 0;
-}
-
-/* Spawn CPU-stall kthread, if stall_cpu specified. */
-static int __init rcu_torture_stall_init(void)
-{
-       int ret;
-
-       if (stall_cpu <= 0)
-               return 0;
-       stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall");
-       if (IS_ERR(stall_task)) {
-               ret = PTR_ERR(stall_task);
-               stall_task = NULL;
-               return ret;
-       }
-       return 0;
-}
-
-/* Clean up after the CPU-stall kthread, if one was spawned. */
-static void rcu_torture_stall_cleanup(void)
-{
-       if (stall_task == NULL)
-               return;
-       VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
-       kthread_stop(stall_task);
-       stall_task = NULL;
-}
-
-/* Callback function for RCU barrier testing. */
-void rcu_torture_barrier_cbf(struct rcu_head *rcu)
-{
-       atomic_inc(&barrier_cbs_invoked);
-}
-
-/* kthread function to register callbacks used to test RCU barriers. */
-static int rcu_torture_barrier_cbs(void *arg)
-{
-       long myid = (long)arg;
-       bool lastphase = 0;
-       struct rcu_head rcu;
-
-       init_rcu_head_on_stack(&rcu);
-       VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started");
-       set_user_nice(current, 19);
-       do {
-               wait_event(barrier_cbs_wq[myid],
-                          barrier_phase != lastphase ||
-                          kthread_should_stop() ||
-                          fullstop != FULLSTOP_DONTSTOP);
-               lastphase = barrier_phase;
-               smp_mb(); /* ensure barrier_phase load before ->call(). */
-               if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
-                       break;
-               cur_ops->call(&rcu, rcu_torture_barrier_cbf);
-               if (atomic_dec_and_test(&barrier_cbs_count))
-                       wake_up(&barrier_wq);
-       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
-       VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
-       while (!kthread_should_stop())
-               schedule_timeout_interruptible(1);
-       cur_ops->cb_barrier();
-       destroy_rcu_head_on_stack(&rcu);
-       return 0;
-}
-
-/* kthread function to drive and coordinate RCU barrier testing. */
-static int rcu_torture_barrier(void *arg)
-{
-       int i;
-
-       VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting");
-       do {
-               atomic_set(&barrier_cbs_invoked, 0);
-               atomic_set(&barrier_cbs_count, n_barrier_cbs);
-               smp_mb(); /* Ensure barrier_phase after prior assignments. */
-               barrier_phase = !barrier_phase;
-               for (i = 0; i < n_barrier_cbs; i++)
-                       wake_up(&barrier_cbs_wq[i]);
-               wait_event(barrier_wq,
-                          atomic_read(&barrier_cbs_count) == 0 ||
-                          kthread_should_stop() ||
-                          fullstop != FULLSTOP_DONTSTOP);
-               if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
-                       break;
-               n_barrier_attempts++;
-               cur_ops->cb_barrier();
-               if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
-                       n_rcu_torture_barrier_error++;
-                       WARN_ON_ONCE(1);
-               }
-               n_barrier_successes++;
-               schedule_timeout_interruptible(HZ / 10);
-       } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
-       VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
-       rcutorture_shutdown_absorb("rcu_torture_barrier");
-       while (!kthread_should_stop())
-               schedule_timeout_interruptible(1);
-       return 0;
-}
-
-/* Initialize RCU barrier testing. */
-static int rcu_torture_barrier_init(void)
-{
-       int i;
-       int ret;
-
-       if (n_barrier_cbs == 0)
-               return 0;
-       if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
-               pr_alert("%s" TORTURE_FLAG
-                        " Call or barrier ops missing for %s,\n",
-                        torture_type, cur_ops->name);
-               pr_alert("%s" TORTURE_FLAG
-                        " RCU barrier testing omitted from run.\n",
-                        torture_type);
-               return 0;
-       }
-       atomic_set(&barrier_cbs_count, 0);
-       atomic_set(&barrier_cbs_invoked, 0);
-       barrier_cbs_tasks =
-               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
-                       GFP_KERNEL);
-       barrier_cbs_wq =
-               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
-                       GFP_KERNEL);
-       if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
-               return -ENOMEM;
-       for (i = 0; i < n_barrier_cbs; i++) {
-               init_waitqueue_head(&barrier_cbs_wq[i]);
-               barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs,
-                                                  (void *)(long)i,
-                                                  "rcu_torture_barrier_cbs");
-               if (IS_ERR(barrier_cbs_tasks[i])) {
-                       ret = PTR_ERR(barrier_cbs_tasks[i]);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs");
-                       barrier_cbs_tasks[i] = NULL;
-                       return ret;
-               }
-       }
-       barrier_task = kthread_run(rcu_torture_barrier, NULL,
-                                  "rcu_torture_barrier");
-       if (IS_ERR(barrier_task)) {
-               ret = PTR_ERR(barrier_task);
-               VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier");
-               barrier_task = NULL;
-       }
-       return 0;
-}
-
-/* Clean up after RCU barrier testing. */
-static void rcu_torture_barrier_cleanup(void)
-{
-       int i;
-
-       if (barrier_task != NULL) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task");
-               kthread_stop(barrier_task);
-               barrier_task = NULL;
-       }
-       if (barrier_cbs_tasks != NULL) {
-               for (i = 0; i < n_barrier_cbs; i++) {
-                       if (barrier_cbs_tasks[i] != NULL) {
-                               VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task");
-                               kthread_stop(barrier_cbs_tasks[i]);
-                               barrier_cbs_tasks[i] = NULL;
-                       }
-               }
-               kfree(barrier_cbs_tasks);
-               barrier_cbs_tasks = NULL;
-       }
-       if (barrier_cbs_wq != NULL) {
-               kfree(barrier_cbs_wq);
-               barrier_cbs_wq = NULL;
-       }
-}
-
-static int rcutorture_cpu_notify(struct notifier_block *self,
-                                unsigned long action, void *hcpu)
-{
-       long cpu = (long)hcpu;
-
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
-               (void)rcutorture_booster_init(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               rcutorture_booster_cleanup(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block rcutorture_cpu_nb = {
-       .notifier_call = rcutorture_cpu_notify,
-};
-
-static void
-rcu_torture_cleanup(void)
-{
-       int i;
-
-       mutex_lock(&fullstop_mutex);
-       rcutorture_record_test_transition();
-       if (fullstop == FULLSTOP_SHUTDOWN) {
-               pr_warn(/* but going down anyway, so... */
-                      "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
-               mutex_unlock(&fullstop_mutex);
-               schedule_timeout_uninterruptible(10);
-               if (cur_ops->cb_barrier != NULL)
-                       cur_ops->cb_barrier();
-               return;
-       }
-       fullstop = FULLSTOP_RMMOD;
-       mutex_unlock(&fullstop_mutex);
-       unregister_reboot_notifier(&rcutorture_shutdown_nb);
-       rcu_torture_barrier_cleanup();
-       rcu_torture_stall_cleanup();
-       if (stutter_task) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
-               kthread_stop(stutter_task);
-       }
-       stutter_task = NULL;
-       if (shuffler_task) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
-               kthread_stop(shuffler_task);
-               free_cpumask_var(shuffle_tmp_mask);
-       }
-       shuffler_task = NULL;
-
-       if (writer_task) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
-               kthread_stop(writer_task);
-       }
-       writer_task = NULL;
-
-       if (reader_tasks) {
-               for (i = 0; i < nrealreaders; i++) {
-                       if (reader_tasks[i]) {
-                               VERBOSE_PRINTK_STRING(
-                                       "Stopping rcu_torture_reader task");
-                               kthread_stop(reader_tasks[i]);
-                       }
-                       reader_tasks[i] = NULL;
-               }
-               kfree(reader_tasks);
-               reader_tasks = NULL;
-       }
-       rcu_torture_current = NULL;
-
-       if (fakewriter_tasks) {
-               for (i = 0; i < nfakewriters; i++) {
-                       if (fakewriter_tasks[i]) {
-                               VERBOSE_PRINTK_STRING(
-                                       "Stopping rcu_torture_fakewriter task");
-                               kthread_stop(fakewriter_tasks[i]);
-                       }
-                       fakewriter_tasks[i] = NULL;
-               }
-               kfree(fakewriter_tasks);
-               fakewriter_tasks = NULL;
-       }
-
-       if (stats_task) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
-               kthread_stop(stats_task);
-       }
-       stats_task = NULL;
-
-       if (fqs_task) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
-               kthread_stop(fqs_task);
-       }
-       fqs_task = NULL;
-       if ((test_boost == 1 && cur_ops->can_boost) ||
-           test_boost == 2) {
-               unregister_cpu_notifier(&rcutorture_cpu_nb);
-               for_each_possible_cpu(i)
-                       rcutorture_booster_cleanup(i);
-       }
-       if (shutdown_task != NULL) {
-               VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
-               kthread_stop(shutdown_task);
-       }
-       shutdown_task = NULL;
-       rcu_torture_onoff_cleanup();
-
-       /* Wait for all RCU callbacks to fire.  */
-
-       if (cur_ops->cb_barrier != NULL)
-               cur_ops->cb_barrier();
-
-       rcu_torture_stats_print();  /* -After- the stats thread is stopped! */
-
-       if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
-               rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
-       else if (n_online_successes != n_online_attempts ||
-                n_offline_successes != n_offline_attempts)
-               rcu_torture_print_module_parms(cur_ops,
-                                              "End of test: RCU_HOTPLUG");
-       else
-               rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
-}
-
-#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-static void rcu_torture_leak_cb(struct rcu_head *rhp)
-{
-}
-
-static void rcu_torture_err_cb(struct rcu_head *rhp)
-{
-       /*
-        * This -might- happen due to race conditions, but is unlikely.
-        * The scenario that leads to this happening is that the
-        * first of the pair of duplicate callbacks is queued,
-        * someone else starts a grace period that includes that
-        * callback, then the second of the pair must wait for the
-        * next grace period.  Unlikely, but can happen.  If it
-        * does happen, the debug-objects subsystem won't have splatted.
-        */
-       pr_alert("rcutorture: duplicated callback was invoked.\n");
-}
-#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-
-/*
- * Verify that double-free causes debug-objects to complain, but only
- * if CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.  Otherwise, say that the test
- * cannot be carried out.
- */
-static void rcu_test_debug_objects(void)
-{
-#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-       struct rcu_head rh1;
-       struct rcu_head rh2;
-
-       init_rcu_head_on_stack(&rh1);
-       init_rcu_head_on_stack(&rh2);
-       pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n");
-
-       /* Try to queue the rh2 pair of callbacks for the same grace period. */
-       preempt_disable(); /* Prevent preemption from interrupting test. */
-       rcu_read_lock(); /* Make it impossible to finish a grace period. */
-       call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */
-       local_irq_disable(); /* Make it harder to start a new grace period. */
-       call_rcu(&rh2, rcu_torture_leak_cb);
-       call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
-       local_irq_enable();
-       rcu_read_unlock();
-       preempt_enable();
-
-       /* Wait for them all to get done so we can safely return. */
-       rcu_barrier();
-       pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n");
-       destroy_rcu_head_on_stack(&rh1);
-       destroy_rcu_head_on_stack(&rh2);
-#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-       pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n");
-#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-}
-
-static int __init
-rcu_torture_init(void)
-{
-       int i;
-       int cpu;
-       int firsterr = 0;
-       int retval;
-       static struct rcu_torture_ops *torture_ops[] = {
-               &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
-       };
-
-       mutex_lock(&fullstop_mutex);
-
-       /* Process args and tell the world that the torturer is on the job. */
-       for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
-               cur_ops = torture_ops[i];
-               if (strcmp(torture_type, cur_ops->name) == 0)
-                       break;
-       }
-       if (i == ARRAY_SIZE(torture_ops)) {
-               pr_alert("rcu-torture: invalid torture type: \"%s\"\n",
-                        torture_type);
-               pr_alert("rcu-torture types:");
-               for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
-                       pr_alert(" %s", torture_ops[i]->name);
-               pr_alert("\n");
-               mutex_unlock(&fullstop_mutex);
-               return -EINVAL;
-       }
-       if (cur_ops->fqs == NULL && fqs_duration != 0) {
-               pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
-               fqs_duration = 0;
-       }
-       if (cur_ops->init)
-               cur_ops->init(); /* no "goto unwind" prior to this point!!! */
-
-       if (nreaders >= 0)
-               nrealreaders = nreaders;
-       else
-               nrealreaders = 2 * num_online_cpus();
-       rcu_torture_print_module_parms(cur_ops, "Start of test");
-       fullstop = FULLSTOP_DONTSTOP;
-
-       /* Set up the freelist. */
-
-       INIT_LIST_HEAD(&rcu_torture_freelist);
-       for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) {
-               rcu_tortures[i].rtort_mbtest = 0;
-               list_add_tail(&rcu_tortures[i].rtort_free,
-                             &rcu_torture_freelist);
-       }
-
-       /* Initialize the statistics so that each run gets its own numbers. */
-
-       rcu_torture_current = NULL;
-       rcu_torture_current_version = 0;
-       atomic_set(&n_rcu_torture_alloc, 0);
-       atomic_set(&n_rcu_torture_alloc_fail, 0);
-       atomic_set(&n_rcu_torture_free, 0);
-       atomic_set(&n_rcu_torture_mberror, 0);
-       atomic_set(&n_rcu_torture_error, 0);
-       n_rcu_torture_barrier_error = 0;
-       n_rcu_torture_boost_ktrerror = 0;
-       n_rcu_torture_boost_rterror = 0;
-       n_rcu_torture_boost_failure = 0;
-       n_rcu_torture_boosts = 0;
-       for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-               atomic_set(&rcu_torture_wcount[i], 0);
-       for_each_possible_cpu(cpu) {
-               for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
-                       per_cpu(rcu_torture_count, cpu)[i] = 0;
-                       per_cpu(rcu_torture_batch, cpu)[i] = 0;
-               }
-       }
-
-       /* Start up the kthreads. */
-
-       VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task");
-       writer_task = kthread_create(rcu_torture_writer, NULL,
-                                    "rcu_torture_writer");
-       if (IS_ERR(writer_task)) {
-               firsterr = PTR_ERR(writer_task);
-               VERBOSE_PRINTK_ERRSTRING("Failed to create writer");
-               writer_task = NULL;
-               goto unwind;
-       }
-       wake_up_process(writer_task);
-       fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
-                                  GFP_KERNEL);
-       if (fakewriter_tasks == NULL) {
-               VERBOSE_PRINTK_ERRSTRING("out of memory");
-               firsterr = -ENOMEM;
-               goto unwind;
-       }
-       for (i = 0; i < nfakewriters; i++) {
-               VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
-               fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
-                                                 "rcu_torture_fakewriter");
-               if (IS_ERR(fakewriter_tasks[i])) {
-                       firsterr = PTR_ERR(fakewriter_tasks[i]);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
-                       fakewriter_tasks[i] = NULL;
-                       goto unwind;
-               }
-       }
-       reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
-                              GFP_KERNEL);
-       if (reader_tasks == NULL) {
-               VERBOSE_PRINTK_ERRSTRING("out of memory");
-               firsterr = -ENOMEM;
-               goto unwind;
-       }
-       for (i = 0; i < nrealreaders; i++) {
-               VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task");
-               reader_tasks[i] = kthread_run(rcu_torture_reader, NULL,
-                                             "rcu_torture_reader");
-               if (IS_ERR(reader_tasks[i])) {
-                       firsterr = PTR_ERR(reader_tasks[i]);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create reader");
-                       reader_tasks[i] = NULL;
-                       goto unwind;
-               }
-       }
-       if (stat_interval > 0) {
-               VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task");
-               stats_task = kthread_run(rcu_torture_stats, NULL,
-                                       "rcu_torture_stats");
-               if (IS_ERR(stats_task)) {
-                       firsterr = PTR_ERR(stats_task);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create stats");
-                       stats_task = NULL;
-                       goto unwind;
-               }
-       }
-       if (test_no_idle_hz) {
-               rcu_idle_cpu = num_online_cpus() - 1;
-
-               if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
-                       firsterr = -ENOMEM;
-                       VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask");
-                       goto unwind;
-               }
-
-               /* Create the shuffler thread */
-               shuffler_task = kthread_run(rcu_torture_shuffle, NULL,
-                                         "rcu_torture_shuffle");
-               if (IS_ERR(shuffler_task)) {
-                       free_cpumask_var(shuffle_tmp_mask);
-                       firsterr = PTR_ERR(shuffler_task);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler");
-                       shuffler_task = NULL;
-                       goto unwind;
-               }
-       }
-       if (stutter < 0)
-               stutter = 0;
-       if (stutter) {
-               /* Create the stutter thread */
-               stutter_task = kthread_run(rcu_torture_stutter, NULL,
-                                         "rcu_torture_stutter");
-               if (IS_ERR(stutter_task)) {
-                       firsterr = PTR_ERR(stutter_task);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
-                       stutter_task = NULL;
-                       goto unwind;
-               }
-       }
-       if (fqs_duration < 0)
-               fqs_duration = 0;
-       if (fqs_duration) {
-               /* Create the stutter thread */
-               fqs_task = kthread_run(rcu_torture_fqs, NULL,
-                                      "rcu_torture_fqs");
-               if (IS_ERR(fqs_task)) {
-                       firsterr = PTR_ERR(fqs_task);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
-                       fqs_task = NULL;
-                       goto unwind;
-               }
-       }
-       if (test_boost_interval < 1)
-               test_boost_interval = 1;
-       if (test_boost_duration < 2)
-               test_boost_duration = 2;
-       if ((test_boost == 1 && cur_ops->can_boost) ||
-           test_boost == 2) {
-
-               boost_starttime = jiffies + test_boost_interval * HZ;
-               register_cpu_notifier(&rcutorture_cpu_nb);
-               for_each_possible_cpu(i) {
-                       if (cpu_is_offline(i))
-                               continue;  /* Heuristic: CPU can go offline. */
-                       retval = rcutorture_booster_init(i);
-                       if (retval < 0) {
-                               firsterr = retval;
-                               goto unwind;
-                       }
-               }
-       }
-       if (shutdown_secs > 0) {
-               shutdown_time = jiffies + shutdown_secs * HZ;
-               shutdown_task = kthread_create(rcu_torture_shutdown, NULL,
-                                              "rcu_torture_shutdown");
-               if (IS_ERR(shutdown_task)) {
-                       firsterr = PTR_ERR(shutdown_task);
-                       VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
-                       shutdown_task = NULL;
-                       goto unwind;
-               }
-               wake_up_process(shutdown_task);
-       }
-       i = rcu_torture_onoff_init();
-       if (i != 0) {
-               firsterr = i;
-               goto unwind;
-       }
-       register_reboot_notifier(&rcutorture_shutdown_nb);
-       i = rcu_torture_stall_init();
-       if (i != 0) {
-               firsterr = i;
-               goto unwind;
-       }
-       retval = rcu_torture_barrier_init();
-       if (retval != 0) {
-               firsterr = retval;
-               goto unwind;
-       }
-       if (object_debug)
-               rcu_test_debug_objects();
-       rcutorture_record_test_transition();
-       mutex_unlock(&fullstop_mutex);
-       return 0;
-
-unwind:
-       mutex_unlock(&fullstop_mutex);
-       rcu_torture_cleanup();
-       return firsterr;
-}
-
-module_init(rcu_torture_init);
-module_exit(rcu_torture_cleanup);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
deleted file mode 100644 (file)
index 1dc9f36..0000000
+++ /dev/null
@@ -1,3327 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2008
- *
- * Authors: Dipankar Sarma <dipankar@in.ibm.com>
- *         Manfred Spraul <manfred@colorfullife.com>
- *         Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
- *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *     Documentation/RCU
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/nmi.h>
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/export.h>
-#include <linux/completion.h>
-#include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/time.h>
-#include <linux/kernel_stat.h>
-#include <linux/wait.h>
-#include <linux/kthread.h>
-#include <linux/prefetch.h>
-#include <linux/delay.h>
-#include <linux/stop_machine.h>
-#include <linux/random.h>
-#include <linux/ftrace_event.h>
-#include <linux/suspend.h>
-
-#include "rcutree.h"
-#include <trace/events/rcu.h>
-
-#include "rcu.h"
-
-/*
- * Strings used in tracepoints need to be exported via the
- * tracing system such that tools like perf and trace-cmd can
- * translate the string address pointers to actual text.
- */
-#define TPS(x) tracepoint_string(x)
-
-/* Data structures. */
-
-static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-
-/*
- * In order to export the rcu_state name to the tracing tools, it
- * needs to be added in the __tracepoint_string section.
- * This requires defining a separate variable tp_<sname>_varname
- * that points to the string being used, and this will allow
- * the tracing userspace tools to be able to decipher the string
- * address to the matching string.
- */
-#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
-static char sname##_varname[] = #sname; \
-static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
-struct rcu_state sname##_state = { \
-       .level = { &sname##_state.node[0] }, \
-       .call = cr, \
-       .fqs_state = RCU_GP_IDLE, \
-       .gpnum = 0UL - 300UL, \
-       .completed = 0UL - 300UL, \
-       .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
-       .orphan_nxttail = &sname##_state.orphan_nxtlist, \
-       .orphan_donetail = &sname##_state.orphan_donelist, \
-       .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
-       .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
-       .name = sname##_varname, \
-       .abbr = sabbr, \
-}; \
-DEFINE_PER_CPU(struct rcu_data, sname##_data)
-
-RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
-
-static struct rcu_state *rcu_state;
-LIST_HEAD(rcu_struct_flavors);
-
-/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
-static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
-module_param(rcu_fanout_leaf, int, 0444);
-int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
-static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */
-       NUM_RCU_LVL_0,
-       NUM_RCU_LVL_1,
-       NUM_RCU_LVL_2,
-       NUM_RCU_LVL_3,
-       NUM_RCU_LVL_4,
-};
-int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
-
-/*
- * The rcu_scheduler_active variable transitions from zero to one just
- * before the first task is spawned.  So when this variable is zero, RCU
- * can assume that there is but one task, allowing RCU to (for example)
- * optimize synchronize_sched() to a simple barrier().  When this variable
- * is one, RCU must actually do all the hard work required to detect real
- * grace periods.  This variable is also used to suppress boot-time false
- * positives from lockdep-RCU error checking.
- */
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-
-/*
- * The rcu_scheduler_fully_active variable transitions from zero to one
- * during the early_initcall() processing, which is after the scheduler
- * is capable of creating new tasks.  So RCU processing (for example,
- * creating tasks for RCU priority boosting) must be delayed until after
- * rcu_scheduler_fully_active transitions from zero to one.  We also
- * currently delay invocation of any RCU callbacks until after this point.
- *
- * It might later prove better for people registering RCU callbacks during
- * early boot to take responsibility for these callbacks, but one step at
- * a time.
- */
-static int rcu_scheduler_fully_active __read_mostly;
-
-#ifdef CONFIG_RCU_BOOST
-
-/*
- * Control variables for per-CPU and per-rcu_node kthreads.  These
- * handle all flavors of RCU.
- */
-static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-DEFINE_PER_CPU(char, rcu_cpu_has_work);
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
-static void invoke_rcu_core(void);
-static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
-
-/*
- * Track the rcutorture test sequence number and the update version
- * number within a given test.  The rcutorture_testseq is incremented
- * on every rcutorture module load and unload, so has an odd value
- * when a test is running.  The rcutorture_vernum is set to zero
- * when rcutorture starts and is incremented on each rcutorture update.
- * These variables enable correlating rcutorture output with the
- * RCU tracing information.
- */
-unsigned long rcutorture_testseq;
-unsigned long rcutorture_vernum;
-
-/*
- * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
- * permit this function to be invoked without holding the root rcu_node
- * structure's ->lock, but of course results can be subject to change.
- */
-static int rcu_gp_in_progress(struct rcu_state *rsp)
-{
-       return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
-}
-
-/*
- * Note a quiescent state.  Because we do not need to know
- * how many quiescent states passed, just if there was at least
- * one since the start of the grace period, this just sets a flag.
- * The caller must have disabled preemption.
- */
-void rcu_sched_qs(int cpu)
-{
-       struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
-}
-
-void rcu_bh_qs(int cpu)
-{
-       struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
-}
-
-/*
- * Note a context switch.  This is a quiescent state for RCU-sched,
- * and requires special handling for preemptible RCU.
- * The caller must have disabled preemption.
- */
-void rcu_note_context_switch(int cpu)
-{
-       trace_rcu_utilization(TPS("Start context switch"));
-       rcu_sched_qs(cpu);
-       rcu_preempt_note_context_switch(cpu);
-       trace_rcu_utilization(TPS("End context switch"));
-}
-EXPORT_SYMBOL_GPL(rcu_note_context_switch);
-
-DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-       .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
-       .dynticks = ATOMIC_INIT(1),
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-       .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
-       .dynticks_idle = ATOMIC_INIT(1),
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-};
-
-static long blimit = 10;       /* Maximum callbacks per rcu_do_batch. */
-static long qhimark = 10000;   /* If this many pending, ignore blimit. */
-static long qlowmark = 100;    /* Once only this many pending, use blimit. */
-
-module_param(blimit, long, 0444);
-module_param(qhimark, long, 0444);
-module_param(qlowmark, long, 0444);
-
-static ulong jiffies_till_first_fqs = ULONG_MAX;
-static ulong jiffies_till_next_fqs = ULONG_MAX;
-
-module_param(jiffies_till_first_fqs, ulong, 0644);
-module_param(jiffies_till_next_fqs, ulong, 0644);
-
-static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
-                                 struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp,
-                        int (*f)(struct rcu_data *rsp, bool *isidle,
-                                 unsigned long *maxj),
-                        bool *isidle, unsigned long *maxj);
-static void force_quiescent_state(struct rcu_state *rsp);
-static int rcu_pending(int cpu);
-
-/*
- * Return the number of RCU-sched batches processed thus far for debug & stats.
- */
-long rcu_batches_completed_sched(void)
-{
-       return rcu_sched_state.completed;
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
-
-/*
- * Return the number of RCU BH batches processed thus far for debug & stats.
- */
-long rcu_batches_completed_bh(void)
-{
-       return rcu_bh_state.completed;
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
-
-/*
- * Force a quiescent state for RCU BH.
- */
-void rcu_bh_force_quiescent_state(void)
-{
-       force_quiescent_state(&rcu_bh_state);
-}
-EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
-
-/*
- * Record the number of times rcutorture tests have been initiated and
- * terminated.  This information allows the debugfs tracing stats to be
- * correlated to the rcutorture messages, even when the rcutorture module
- * is being repeatedly loaded and unloaded.  In other words, we cannot
- * store this state in rcutorture itself.
- */
-void rcutorture_record_test_transition(void)
-{
-       rcutorture_testseq++;
-       rcutorture_vernum = 0;
-}
-EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
-
-/*
- * Record the number of writer passes through the current rcutorture test.
- * This is also used to correlate debugfs tracing stats with the rcutorture
- * messages.
- */
-void rcutorture_record_progress(unsigned long vernum)
-{
-       rcutorture_vernum++;
-}
-EXPORT_SYMBOL_GPL(rcutorture_record_progress);
-
-/*
- * Force a quiescent state for RCU-sched.
- */
-void rcu_sched_force_quiescent_state(void)
-{
-       force_quiescent_state(&rcu_sched_state);
-}
-EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
-
-/*
- * Does the CPU have callbacks ready to be invoked?
- */
-static int
-cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
-{
-       return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
-              rdp->nxttail[RCU_DONE_TAIL] != NULL;
-}
-
-/*
- * Does the current CPU require a not-yet-started grace period?
- * The caller must have disabled interrupts to prevent races with
- * normal callback registry.
- */
-static int
-cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       int i;
-
-       if (rcu_gp_in_progress(rsp))
-               return 0;  /* No, a grace period is already in progress. */
-       if (rcu_nocb_needs_gp(rsp))
-               return 1;  /* Yes, a no-CBs CPU needs one. */
-       if (!rdp->nxttail[RCU_NEXT_TAIL])
-               return 0;  /* No, this is a no-CBs (or offline) CPU. */
-       if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
-               return 1;  /* Yes, this CPU has newly registered callbacks. */
-       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-               if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
-                   ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
-                                rdp->nxtcompleted[i]))
-                       return 1;  /* Yes, CBs for future grace period. */
-       return 0; /* No grace period needed. */
-}
-
-/*
- * Return the root node of the specified rcu_state structure.
- */
-static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
-{
-       return &rsp->node[0];
-}
-
-/*
- * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
- *
- * If the new value of the ->dynticks_nesting counter now is zero,
- * we really have entered idle, and must do the appropriate accounting.
- * The caller must have disabled interrupts.
- */
-static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
-                               bool user)
-{
-       trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
-       if (!user && !is_idle_task(current)) {
-               struct task_struct *idle = idle_task(smp_processor_id());
-
-               trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
-               ftrace_dump(DUMP_ORIG);
-               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-                         current->pid, current->comm,
-                         idle->pid, idle->comm); /* must be idle task! */
-       }
-       rcu_prepare_for_idle(smp_processor_id());
-       /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-       smp_mb__before_atomic_inc();  /* See above. */
-       atomic_inc(&rdtp->dynticks);
-       smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
-       WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
-
-       /*
-        * It is illegal to enter an extended quiescent state while
-        * in an RCU read-side critical section.
-        */
-       rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
-                          "Illegal idle entry in RCU read-side critical section.");
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
-                          "Illegal idle entry in RCU-bh read-side critical section.");
-       rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
-                          "Illegal idle entry in RCU-sched read-side critical section.");
-}
-
-/*
- * Enter an RCU extended quiescent state, which can be either the
- * idle loop or adaptive-tickless usermode execution.
- */
-static void rcu_eqs_enter(bool user)
-{
-       long long oldval;
-       struct rcu_dynticks *rdtp;
-
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
-       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
-               rdtp->dynticks_nesting = 0;
-       else
-               rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
-       rcu_eqs_enter_common(rdtp, oldval, user);
-}
-
-/**
- * rcu_idle_enter - inform RCU that current CPU is entering idle
- *
- * Enter idle mode, in other words, -leave- the mode in which RCU
- * read-side critical sections can occur.  (Though RCU read-side
- * critical sections can occur in irq handlers in idle, a possibility
- * handled by irq_enter() and irq_exit().)
- *
- * We crowbar the ->dynticks_nesting field to zero to allow for
- * the possibility of usermode upcalls having messed up our count
- * of interrupt nesting level during the prior busy period.
- */
-void rcu_idle_enter(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       rcu_eqs_enter(false);
-       rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(rcu_idle_enter);
-
-#ifdef CONFIG_RCU_USER_QS
-/**
- * rcu_user_enter - inform RCU that we are resuming userspace.
- *
- * Enter RCU idle mode right before resuming userspace.  No use of RCU
- * is permitted between this call and rcu_user_exit(). This way the
- * CPU doesn't need to maintain the tick for RCU maintenance purposes
- * when the CPU runs in userspace.
- */
-void rcu_user_enter(void)
-{
-       rcu_eqs_enter(1);
-}
-#endif /* CONFIG_RCU_USER_QS */
-
-/**
- * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
- *
- * Exit from an interrupt handler, which might possibly result in entering
- * idle mode, in other words, leaving the mode in which read-side critical
- * sections can occur.
- *
- * This code assumes that the idle loop never does anything that might
- * result in unbalanced calls to irq_enter() and irq_exit().  If your
- * architecture violates this assumption, RCU will give you what you
- * deserve, good and hard.  But very infrequently and irreproducibly.
- *
- * Use things like work queues to work around this limitation.
- *
- * You have been warned.
- */
-void rcu_irq_exit(void)
-{
-       unsigned long flags;
-       long long oldval;
-       struct rcu_dynticks *rdtp;
-
-       local_irq_save(flags);
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       oldval = rdtp->dynticks_nesting;
-       rdtp->dynticks_nesting--;
-       WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
-       if (rdtp->dynticks_nesting)
-               trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
-       else
-               rcu_eqs_enter_common(rdtp, oldval, true);
-       rcu_sysidle_enter(rdtp, 1);
-       local_irq_restore(flags);
-}
-
-/*
- * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
- *
- * If the new value of the ->dynticks_nesting counter was previously zero,
- * we really have exited idle, and must do the appropriate accounting.
- * The caller must have disabled interrupts.
- */
-static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
-                              int user)
-{
-       smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
-       atomic_inc(&rdtp->dynticks);
-       /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-       smp_mb__after_atomic_inc();  /* See above. */
-       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
-       rcu_cleanup_after_idle(smp_processor_id());
-       trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
-       if (!user && !is_idle_task(current)) {
-               struct task_struct *idle = idle_task(smp_processor_id());
-
-               trace_rcu_dyntick(TPS("Error on exit: not idle task"),
-                                 oldval, rdtp->dynticks_nesting);
-               ftrace_dump(DUMP_ORIG);
-               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-                         current->pid, current->comm,
-                         idle->pid, idle->comm); /* must be idle task! */
-       }
-}
-
-/*
- * Exit an RCU extended quiescent state, which can be either the
- * idle loop or adaptive-tickless usermode execution.
- */
-static void rcu_eqs_exit(bool user)
-{
-       struct rcu_dynticks *rdtp;
-       long long oldval;
-
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE(oldval < 0);
-       if (oldval & DYNTICK_TASK_NEST_MASK)
-               rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
-       else
-               rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-       rcu_eqs_exit_common(rdtp, oldval, user);
-}
-
-/**
- * rcu_idle_exit - inform RCU that current CPU is leaving idle
- *
- * Exit idle mode, in other words, -enter- the mode in which RCU
- * read-side critical sections can occur.
- *
- * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
- * allow for the possibility of usermode upcalls messing up our count
- * of interrupt nesting level during the busy period that is just
- * now starting.
- */
-void rcu_idle_exit(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       rcu_eqs_exit(false);
-       rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(rcu_idle_exit);
-
-#ifdef CONFIG_RCU_USER_QS
-/**
- * rcu_user_exit - inform RCU that we are exiting userspace.
- *
- * Exit RCU idle mode while entering the kernel because it can
- * run a RCU read side critical section anytime.
- */
-void rcu_user_exit(void)
-{
-       rcu_eqs_exit(1);
-}
-#endif /* CONFIG_RCU_USER_QS */
-
-/**
- * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
- *
- * Enter an interrupt handler, which might possibly result in exiting
- * idle mode, in other words, entering the mode in which read-side critical
- * sections can occur.
- *
- * Note that the Linux kernel is fully capable of entering an interrupt
- * handler that it never exits, for example when doing upcalls to
- * user mode!  This code assumes that the idle loop never does upcalls to
- * user mode.  If your architecture does do upcalls from the idle loop (or
- * does anything else that results in unbalanced calls to the irq_enter()
- * and irq_exit() functions), RCU will give you what you deserve, good
- * and hard.  But very infrequently and irreproducibly.
- *
- * Use things like work queues to work around this limitation.
- *
- * You have been warned.
- */
-void rcu_irq_enter(void)
-{
-       unsigned long flags;
-       struct rcu_dynticks *rdtp;
-       long long oldval;
-
-       local_irq_save(flags);
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       oldval = rdtp->dynticks_nesting;
-       rdtp->dynticks_nesting++;
-       WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
-       if (oldval)
-               trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
-       else
-               rcu_eqs_exit_common(rdtp, oldval, true);
-       rcu_sysidle_exit(rdtp, 1);
-       local_irq_restore(flags);
-}
-
-/**
- * rcu_nmi_enter - inform RCU of entry to NMI context
- *
- * If the CPU was idle with dynamic ticks active, and there is no
- * irq handler running, this updates rdtp->dynticks_nmi to let the
- * RCU grace-period handling know that the CPU is active.
- */
-void rcu_nmi_enter(void)
-{
-       struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
-
-       if (rdtp->dynticks_nmi_nesting == 0 &&
-           (atomic_read(&rdtp->dynticks) & 0x1))
-               return;
-       rdtp->dynticks_nmi_nesting++;
-       smp_mb__before_atomic_inc();  /* Force delay from prior write. */
-       atomic_inc(&rdtp->dynticks);
-       /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-       smp_mb__after_atomic_inc();  /* See above. */
-       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
-}
-
-/**
- * rcu_nmi_exit - inform RCU of exit from NMI context
- *
- * If the CPU was idle with dynamic ticks active, and there is no
- * irq handler running, this updates rdtp->dynticks_nmi to let the
- * RCU grace-period handling know that the CPU is no longer active.
- */
-void rcu_nmi_exit(void)
-{
-       struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
-
-       if (rdtp->dynticks_nmi_nesting == 0 ||
-           --rdtp->dynticks_nmi_nesting != 0)
-               return;
-       /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-       smp_mb__before_atomic_inc();  /* See above. */
-       atomic_inc(&rdtp->dynticks);
-       smp_mb__after_atomic_inc();  /* Force delay to next write. */
-       WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
-}
-
-/**
- * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
- *
- * If the current CPU is in its idle loop and is neither in an interrupt
- * or NMI handler, return true.
- */
-int rcu_is_cpu_idle(void)
-{
-       int ret;
-
-       preempt_disable();
-       ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
-       preempt_enable();
-       return ret;
-}
-EXPORT_SYMBOL(rcu_is_cpu_idle);
-
-#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
-
-/*
- * Is the current CPU online?  Disable preemption to avoid false positives
- * that could otherwise happen due to the current CPU number being sampled,
- * this task being preempted, its old CPU being taken offline, resuming
- * on some other CPU, then determining that its old CPU is now offline.
- * It is OK to use RCU on an offline processor during initial boot, hence
- * the check for rcu_scheduler_fully_active.  Note also that it is OK
- * for a CPU coming online to use RCU for one jiffy prior to marking itself
- * online in the cpu_online_mask.  Similarly, it is OK for a CPU going
- * offline to continue to use RCU for one jiffy after marking itself
- * offline in the cpu_online_mask.  This leniency is necessary given the
- * non-atomic nature of the online and offline processing, for example,
- * the fact that a CPU enters the scheduler after completing the CPU_DYING
- * notifiers.
- *
- * This is also why RCU internally marks CPUs online during the
- * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
- *
- * Disable checking if in an NMI handler because we cannot safely report
- * errors from NMI handlers anyway.
- */
-bool rcu_lockdep_current_cpu_online(void)
-{
-       struct rcu_data *rdp;
-       struct rcu_node *rnp;
-       bool ret;
-
-       if (in_nmi())
-               return 1;
-       preempt_disable();
-       rdp = &__get_cpu_var(rcu_sched_data);
-       rnp = rdp->mynode;
-       ret = (rdp->grpmask & rnp->qsmaskinit) ||
-             !rcu_scheduler_fully_active;
-       preempt_enable();
-       return ret;
-}
-EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
-
-#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
-
-/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
- *
- * If the current CPU is idle or running at a first-level (not nested)
- * interrupt from idle, return true.  The caller must have at least
- * disabled preemption.
- */
-static int rcu_is_cpu_rrupt_from_idle(void)
-{
-       return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
-}
-
-/*
- * Snapshot the specified CPU's dynticks counter so that we can later
- * credit them with an implicit quiescent state.  Return 1 if this CPU
- * is in dynticks idle mode, which is an extended quiescent state.
- */
-static int dyntick_save_progress_counter(struct rcu_data *rdp,
-                                        bool *isidle, unsigned long *maxj)
-{
-       rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
-       rcu_sysidle_check_cpu(rdp, isidle, maxj);
-       return (rdp->dynticks_snap & 0x1) == 0;
-}
-
-/*
- * Return true if the specified CPU has passed through a quiescent
- * state by virtue of being in or having passed through an dynticks
- * idle state since the last call to dyntick_save_progress_counter()
- * for this same CPU, or by virtue of having been offline.
- */
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
-                                   bool *isidle, unsigned long *maxj)
-{
-       unsigned int curr;
-       unsigned int snap;
-
-       curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
-       snap = (unsigned int)rdp->dynticks_snap;
-
-       /*
-        * If the CPU passed through or entered a dynticks idle phase with
-        * no active irq/NMI handlers, then we can safely pretend that the CPU
-        * already acknowledged the request to pass through a quiescent
-        * state.  Either way, that CPU cannot possibly be in an RCU
-        * read-side critical section that started before the beginning
-        * of the current RCU grace period.
-        */
-       if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
-               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
-               rdp->dynticks_fqs++;
-               return 1;
-       }
-
-       /*
-        * Check for the CPU being offline, but only if the grace period
-        * is old enough.  We don't need to worry about the CPU changing
-        * state: If we see it offline even once, it has been through a
-        * quiescent state.
-        *
-        * The reason for insisting that the grace period be at least
-        * one jiffy old is that CPUs that are not quite online and that
-        * have just gone offline can still execute RCU read-side critical
-        * sections.
-        */
-       if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
-               return 0;  /* Grace period is not old enough. */
-       barrier();
-       if (cpu_is_offline(rdp->cpu)) {
-               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
-               rdp->offline_fqs++;
-               return 1;
-       }
-
-       /*
-        * There is a possibility that a CPU in adaptive-ticks state
-        * might run in the kernel with the scheduling-clock tick disabled
-        * for an extended time period.  Invoke rcu_kick_nohz_cpu() to
-        * force the CPU to restart the scheduling-clock tick in this
-        * CPU is in this state.
-        */
-       rcu_kick_nohz_cpu(rdp->cpu);
-
-       return 0;
-}
-
-static void record_gp_stall_check_time(struct rcu_state *rsp)
-{
-       rsp->gp_start = jiffies;
-       rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
-}
-
-/*
- * Dump stacks of all tasks running on stalled CPUs.  This is a fallback
- * for architectures that do not implement trigger_all_cpu_backtrace().
- * The NMI-triggered stack traces are more accurate because they are
- * printed by the target CPU.
- */
-static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
-{
-       int cpu;
-       unsigned long flags;
-       struct rcu_node *rnp;
-
-       rcu_for_each_leaf_node(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               if (rnp->qsmask != 0) {
-                       for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
-                               if (rnp->qsmask & (1UL << cpu))
-                                       dump_cpu_task(rnp->grplo + cpu);
-               }
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       }
-}
-
-static void print_other_cpu_stall(struct rcu_state *rsp)
-{
-       int cpu;
-       long delta;
-       unsigned long flags;
-       int ndetected = 0;
-       struct rcu_node *rnp = rcu_get_root(rsp);
-       long totqlen = 0;
-
-       /* Only let one CPU complain about others per time interval. */
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       delta = jiffies - rsp->jiffies_stall;
-       if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-       rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-
-       /*
-        * OK, time to rat on our buddy...
-        * See Documentation/RCU/stallwarn.txt for info on how to debug
-        * RCU CPU stall warnings.
-        */
-       pr_err("INFO: %s detected stalls on CPUs/tasks:",
-              rsp->name);
-       print_cpu_stall_info_begin();
-       rcu_for_each_leaf_node(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               ndetected += rcu_print_task_stall(rnp);
-               if (rnp->qsmask != 0) {
-                       for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
-                               if (rnp->qsmask & (1UL << cpu)) {
-                                       print_cpu_stall_info(rsp,
-                                                            rnp->grplo + cpu);
-                                       ndetected++;
-                               }
-               }
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       }
-
-       /*
-        * Now rat on any tasks that got kicked up to the root rcu_node
-        * due to CPU offlining.
-        */
-       rnp = rcu_get_root(rsp);
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       ndetected += rcu_print_task_stall(rnp);
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-
-       print_cpu_stall_info_end();
-       for_each_possible_cpu(cpu)
-               totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
-       pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
-              smp_processor_id(), (long)(jiffies - rsp->gp_start),
-              rsp->gpnum, rsp->completed, totqlen);
-       if (ndetected == 0)
-               pr_err("INFO: Stall ended before state dump start\n");
-       else if (!trigger_all_cpu_backtrace())
-               rcu_dump_cpu_stacks(rsp);
-
-       /* Complain about tasks blocking the grace period. */
-
-       rcu_print_detail_task_stall(rsp);
-
-       force_quiescent_state(rsp);  /* Kick them all. */
-}
-
-/*
- * This function really isn't for public consumption, but RCU is special in
- * that context switches can allow the state machine to make progress.
- */
-extern void resched_cpu(int cpu);
-
-static void print_cpu_stall(struct rcu_state *rsp)
-{
-       int cpu;
-       unsigned long flags;
-       struct rcu_node *rnp = rcu_get_root(rsp);
-       long totqlen = 0;
-
-       /*
-        * OK, time to rat on ourselves...
-        * See Documentation/RCU/stallwarn.txt for info on how to debug
-        * RCU CPU stall warnings.
-        */
-       pr_err("INFO: %s self-detected stall on CPU", rsp->name);
-       print_cpu_stall_info_begin();
-       print_cpu_stall_info(rsp, smp_processor_id());
-       print_cpu_stall_info_end();
-       for_each_possible_cpu(cpu)
-               totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
-       pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
-               jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
-       if (!trigger_all_cpu_backtrace())
-               dump_stack();
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
-               rsp->jiffies_stall = jiffies +
-                                    3 * rcu_jiffies_till_stall_check() + 3;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-
-       /*
-        * Attempt to revive the RCU machinery by forcing a context switch.
-        *
-        * A context switch would normally allow the RCU state machine to make
-        * progress and it could be we're stuck in kernel space without context
-        * switches for an entirely unreasonable amount of time.
-        */
-       resched_cpu(smp_processor_id());
-}
-
-static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       unsigned long j;
-       unsigned long js;
-       struct rcu_node *rnp;
-
-       if (rcu_cpu_stall_suppress)
-               return;
-       j = ACCESS_ONCE(jiffies);
-       js = ACCESS_ONCE(rsp->jiffies_stall);
-       rnp = rdp->mynode;
-       if (rcu_gp_in_progress(rsp) &&
-           (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
-
-               /* We haven't checked in, so go dump stack. */
-               print_cpu_stall(rsp);
-
-       } else if (rcu_gp_in_progress(rsp) &&
-                  ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
-
-               /* They had a few time units to dump stack, so complain. */
-               print_other_cpu_stall(rsp);
-       }
-}
-
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
-       struct rcu_state *rsp;
-
-       for_each_rcu_flavor(rsp)
-               rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
-}
-
-/*
- * Initialize the specified rcu_data structure's callback list to empty.
- */
-static void init_callback_list(struct rcu_data *rdp)
-{
-       int i;
-
-       if (init_nocb_callback_list(rdp))
-               return;
-       rdp->nxtlist = NULL;
-       for (i = 0; i < RCU_NEXT_SIZE; i++)
-               rdp->nxttail[i] = &rdp->nxtlist;
-}
-
-/*
- * Determine the value that ->completed will have at the end of the
- * next subsequent grace period.  This is used to tag callbacks so that
- * a CPU can invoke callbacks in a timely fashion even if that CPU has
- * been dyntick-idle for an extended period with callbacks under the
- * influence of RCU_FAST_NO_HZ.
- *
- * The caller must hold rnp->lock with interrupts disabled.
- */
-static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
-                                      struct rcu_node *rnp)
-{
-       /*
-        * If RCU is idle, we just wait for the next grace period.
-        * But we can only be sure that RCU is idle if we are looking
-        * at the root rcu_node structure -- otherwise, a new grace
-        * period might have started, but just not yet gotten around
-        * to initializing the current non-root rcu_node structure.
-        */
-       if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
-               return rnp->completed + 1;
-
-       /*
-        * Otherwise, wait for a possible partial grace period and
-        * then the subsequent full grace period.
-        */
-       return rnp->completed + 2;
-}
-
-/*
- * Trace-event helper function for rcu_start_future_gp() and
- * rcu_nocb_wait_gp().
- */
-static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-                               unsigned long c, const char *s)
-{
-       trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
-                                     rnp->completed, c, rnp->level,
-                                     rnp->grplo, rnp->grphi, s);
-}
-
-/*
- * Start some future grace period, as needed to handle newly arrived
- * callbacks.  The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field.
- *
- * The caller must hold the specified rcu_node structure's ->lock.
- */
-static unsigned long __maybe_unused
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
-{
-       unsigned long c;
-       int i;
-       struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
-
-       /*
-        * Pick up grace-period number for new callbacks.  If this
-        * grace period is already marked as needed, return to the caller.
-        */
-       c = rcu_cbs_completed(rdp->rsp, rnp);
-       trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
-       if (rnp->need_future_gp[c & 0x1]) {
-               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
-               return c;
-       }
-
-       /*
-        * If either this rcu_node structure or the root rcu_node structure
-        * believe that a grace period is in progress, then we must wait
-        * for the one following, which is in "c".  Because our request
-        * will be noticed at the end of the current grace period, we don't
-        * need to explicitly start one.
-        */
-       if (rnp->gpnum != rnp->completed ||
-           ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
-               rnp->need_future_gp[c & 0x1]++;
-               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
-               return c;
-       }
-
-       /*
-        * There might be no grace period in progress.  If we don't already
-        * hold it, acquire the root rcu_node structure's lock in order to
-        * start one (if needed).
-        */
-       if (rnp != rnp_root)
-               raw_spin_lock(&rnp_root->lock);
-
-       /*
-        * Get a new grace-period number.  If there really is no grace
-        * period in progress, it will be smaller than the one we obtained
-        * earlier.  Adjust callbacks as needed.  Note that even no-CBs
-        * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
-        */
-       c = rcu_cbs_completed(rdp->rsp, rnp_root);
-       for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
-               if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
-                       rdp->nxtcompleted[i] = c;
-
-       /*
-        * If the needed for the required grace period is already
-        * recorded, trace and leave.
-        */
-       if (rnp_root->need_future_gp[c & 0x1]) {
-               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
-               goto unlock_out;
-       }
-
-       /* Record the need for the future grace period. */
-       rnp_root->need_future_gp[c & 0x1]++;
-
-       /* If a grace period is not already in progress, start one. */
-       if (rnp_root->gpnum != rnp_root->completed) {
-               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
-       } else {
-               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
-               rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
-       }
-unlock_out:
-       if (rnp != rnp_root)
-               raw_spin_unlock(&rnp_root->lock);
-       return c;
-}
-
-/*
- * Clean up any old requests for the just-ended grace period.  Also return
- * whether any additional grace periods have been requested.  Also invoke
- * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
- * waiting for this grace period to complete.
- */
-static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
-{
-       int c = rnp->completed;
-       int needmore;
-       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-
-       rcu_nocb_gp_cleanup(rsp, rnp);
-       rnp->need_future_gp[c & 0x1] = 0;
-       needmore = rnp->need_future_gp[(c + 1) & 0x1];
-       trace_rcu_future_gp(rnp, rdp, c,
-                           needmore ? TPS("CleanupMore") : TPS("Cleanup"));
-       return needmore;
-}
-
-/*
- * If there is room, assign a ->completed number to any callbacks on
- * this CPU that have not already been assigned.  Also accelerate any
- * callbacks that were previously assigned a ->completed number that has
- * since proven to be too conservative, which can happen if callbacks get
- * assigned a ->completed number while RCU is idle, but with reference to
- * a non-root rcu_node structure.  This function is idempotent, so it does
- * not hurt to call it repeatedly.
- *
- * The caller must hold rnp->lock with interrupts disabled.
- */
-static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
-                              struct rcu_data *rdp)
-{
-       unsigned long c;
-       int i;
-
-       /* If the CPU has no callbacks, nothing to do. */
-       if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-               return;
-
-       /*
-        * Starting from the sublist containing the callbacks most
-        * recently assigned a ->completed number and working down, find the
-        * first sublist that is not assignable to an upcoming grace period.
-        * Such a sublist has something in it (first two tests) and has
-        * a ->completed number assigned that will complete sooner than
-        * the ->completed number for newly arrived callbacks (last test).
-        *
-        * The key point is that any later sublist can be assigned the
-        * same ->completed number as the newly arrived callbacks, which
-        * means that the callbacks in any of these later sublist can be
-        * grouped into a single sublist, whether or not they have already
-        * been assigned a ->completed number.
-        */
-       c = rcu_cbs_completed(rsp, rnp);
-       for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
-               if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
-                   !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
-                       break;
-
-       /*
-        * If there are no sublist for unassigned callbacks, leave.
-        * At the same time, advance "i" one sublist, so that "i" will
-        * index into the sublist where all the remaining callbacks should
-        * be grouped into.
-        */
-       if (++i >= RCU_NEXT_TAIL)
-               return;
-
-       /*
-        * Assign all subsequent callbacks' ->completed number to the next
-        * full grace period and group them all in the sublist initially
-        * indexed by "i".
-        */
-       for (; i <= RCU_NEXT_TAIL; i++) {
-               rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
-               rdp->nxtcompleted[i] = c;
-       }
-       /* Record any needed additional grace periods. */
-       rcu_start_future_gp(rnp, rdp);
-
-       /* Trace depending on how much we were able to accelerate. */
-       if (!*rdp->nxttail[RCU_WAIT_TAIL])
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
-       else
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
-}
-
-/*
- * Move any callbacks whose grace period has completed to the
- * RCU_DONE_TAIL sublist, then compact the remaining sublists and
- * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
- * sublist.  This function is idempotent, so it does not hurt to
- * invoke it repeatedly.  As long as it is not invoked -too- often...
- *
- * The caller must hold rnp->lock with interrupts disabled.
- */
-static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
-                           struct rcu_data *rdp)
-{
-       int i, j;
-
-       /* If the CPU has no callbacks, nothing to do. */
-       if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-               return;
-
-       /*
-        * Find all callbacks whose ->completed numbers indicate that they
-        * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
-        */
-       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-               if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
-                       break;
-               rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
-       }
-       /* Clean up any sublist tail pointers that were misordered above. */
-       for (j = RCU_WAIT_TAIL; j < i; j++)
-               rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
-
-       /* Copy down callbacks to fill in empty sublists. */
-       for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-               if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
-                       break;
-               rdp->nxttail[j] = rdp->nxttail[i];
-               rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
-       }
-
-       /* Classify any remaining callbacks. */
-       rcu_accelerate_cbs(rsp, rnp, rdp);
-}
-
-/*
- * Update CPU-local rcu_data state to record the beginnings and ends of
- * grace periods.  The caller must hold the ->lock of the leaf rcu_node
- * structure corresponding to the current CPU, and must have irqs disabled.
- */
-static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
-{
-       /* Handle the ends of any preceding grace periods first. */
-       if (rdp->completed == rnp->completed) {
-
-               /* No grace period end, so just accelerate recent callbacks. */
-               rcu_accelerate_cbs(rsp, rnp, rdp);
-
-       } else {
-
-               /* Advance callbacks. */
-               rcu_advance_cbs(rsp, rnp, rdp);
-
-               /* Remember that we saw this grace-period completion. */
-               rdp->completed = rnp->completed;
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
-       }
-
-       if (rdp->gpnum != rnp->gpnum) {
-               /*
-                * If the current grace period is waiting for this CPU,
-                * set up to detect a quiescent state, otherwise don't
-                * go looking for one.
-                */
-               rdp->gpnum = rnp->gpnum;
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
-               rdp->passed_quiesce = 0;
-               rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
-               zero_cpu_stall_ticks(rdp);
-       }
-}
-
-static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       unsigned long flags;
-       struct rcu_node *rnp;
-
-       local_irq_save(flags);
-       rnp = rdp->mynode;
-       if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
-            rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
-           !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
-               local_irq_restore(flags);
-               return;
-       }
-       __note_gp_changes(rsp, rnp, rdp);
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
- * Initialize a new grace period.
- */
-static int rcu_gp_init(struct rcu_state *rsp)
-{
-       struct rcu_data *rdp;
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       rcu_bind_gp_kthread();
-       raw_spin_lock_irq(&rnp->lock);
-       rsp->gp_flags = 0; /* Clear all flags: New grace period. */
-
-       if (rcu_gp_in_progress(rsp)) {
-               /* Grace period already in progress, don't start another.  */
-               raw_spin_unlock_irq(&rnp->lock);
-               return 0;
-       }
-
-       /* Advance to a new grace period and initialize state. */
-       rsp->gpnum++;
-       trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
-       record_gp_stall_check_time(rsp);
-       raw_spin_unlock_irq(&rnp->lock);
-
-       /* Exclude any concurrent CPU-hotplug operations. */
-       mutex_lock(&rsp->onoff_mutex);
-
-       /*
-        * Set the quiescent-state-needed bits in all the rcu_node
-        * structures for all currently online CPUs in breadth-first order,
-        * starting from the root rcu_node structure, relying on the layout
-        * of the tree within the rsp->node[] array.  Note that other CPUs
-        * will access only the leaves of the hierarchy, thus seeing that no
-        * grace period is in progress, at least until the corresponding
-        * leaf node has been initialized.  In addition, we have excluded
-        * CPU-hotplug operations.
-        *
-        * The grace period cannot complete until the initialization
-        * process finishes, because this kthread handles both.
-        */
-       rcu_for_each_node_breadth_first(rsp, rnp) {
-               raw_spin_lock_irq(&rnp->lock);
-               rdp = this_cpu_ptr(rsp->rda);
-               rcu_preempt_check_blocked_tasks(rnp);
-               rnp->qsmask = rnp->qsmaskinit;
-               ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
-               WARN_ON_ONCE(rnp->completed != rsp->completed);
-               ACCESS_ONCE(rnp->completed) = rsp->completed;
-               if (rnp == rdp->mynode)
-                       __note_gp_changes(rsp, rnp, rdp);
-               rcu_preempt_boost_start_gp(rnp);
-               trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
-                                           rnp->level, rnp->grplo,
-                                           rnp->grphi, rnp->qsmask);
-               raw_spin_unlock_irq(&rnp->lock);
-#ifdef CONFIG_PROVE_RCU_DELAY
-               if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
-                   system_state == SYSTEM_RUNNING)
-                       udelay(200);
-#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
-               cond_resched();
-       }
-
-       mutex_unlock(&rsp->onoff_mutex);
-       return 1;
-}
-
-/*
- * Do one round of quiescent-state forcing.
- */
-int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
-{
-       int fqs_state = fqs_state_in;
-       bool isidle = false;
-       unsigned long maxj;
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       rsp->n_force_qs++;
-       if (fqs_state == RCU_SAVE_DYNTICK) {
-               /* Collect dyntick-idle snapshots. */
-               if (is_sysidle_rcu_state(rsp)) {
-                       isidle = 1;
-                       maxj = jiffies - ULONG_MAX / 4;
-               }
-               force_qs_rnp(rsp, dyntick_save_progress_counter,
-                            &isidle, &maxj);
-               rcu_sysidle_report_gp(rsp, isidle, maxj);
-               fqs_state = RCU_FORCE_QS;
-       } else {
-               /* Handle dyntick-idle and offline CPUs. */
-               isidle = 0;
-               force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
-       }
-       /* Clear flag to prevent immediate re-entry. */
-       if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-               raw_spin_lock_irq(&rnp->lock);
-               rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
-               raw_spin_unlock_irq(&rnp->lock);
-       }
-       return fqs_state;
-}
-
-/*
- * Clean up after the old grace period.
- */
-static void rcu_gp_cleanup(struct rcu_state *rsp)
-{
-       unsigned long gp_duration;
-       int nocb = 0;
-       struct rcu_data *rdp;
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       raw_spin_lock_irq(&rnp->lock);
-       gp_duration = jiffies - rsp->gp_start;
-       if (gp_duration > rsp->gp_max)
-               rsp->gp_max = gp_duration;
-
-       /*
-        * We know the grace period is complete, but to everyone else
-        * it appears to still be ongoing.  But it is also the case
-        * that to everyone else it looks like there is nothing that
-        * they can do to advance the grace period.  It is therefore
-        * safe for us to drop the lock in order to mark the grace
-        * period as completed in all of the rcu_node structures.
-        */
-       raw_spin_unlock_irq(&rnp->lock);
-
-       /*
-        * Propagate new ->completed value to rcu_node structures so
-        * that other CPUs don't have to wait until the start of the next
-        * grace period to process their callbacks.  This also avoids
-        * some nasty RCU grace-period initialization races by forcing
-        * the end of the current grace period to be completely recorded in
-        * all of the rcu_node structures before the beginning of the next
-        * grace period is recorded in any of the rcu_node structures.
-        */
-       rcu_for_each_node_breadth_first(rsp, rnp) {
-               raw_spin_lock_irq(&rnp->lock);
-               ACCESS_ONCE(rnp->completed) = rsp->gpnum;
-               rdp = this_cpu_ptr(rsp->rda);
-               if (rnp == rdp->mynode)
-                       __note_gp_changes(rsp, rnp, rdp);
-               nocb += rcu_future_gp_cleanup(rsp, rnp);
-               raw_spin_unlock_irq(&rnp->lock);
-               cond_resched();
-       }
-       rnp = rcu_get_root(rsp);
-       raw_spin_lock_irq(&rnp->lock);
-       rcu_nocb_gp_set(rnp, nocb);
-
-       rsp->completed = rsp->gpnum; /* Declare grace period done. */
-       trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
-       rsp->fqs_state = RCU_GP_IDLE;
-       rdp = this_cpu_ptr(rsp->rda);
-       rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
-       if (cpu_needs_another_gp(rsp, rdp))
-               rsp->gp_flags = 1;
-       raw_spin_unlock_irq(&rnp->lock);
-}
-
-/*
- * Body of kthread that handles grace periods.
- */
-static int __noreturn rcu_gp_kthread(void *arg)
-{
-       int fqs_state;
-       unsigned long j;
-       int ret;
-       struct rcu_state *rsp = arg;
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       for (;;) {
-
-               /* Handle grace-period start. */
-               for (;;) {
-                       wait_event_interruptible(rsp->gp_wq,
-                                                rsp->gp_flags &
-                                                RCU_GP_FLAG_INIT);
-                       if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
-                           rcu_gp_init(rsp))
-                               break;
-                       cond_resched();
-                       flush_signals(current);
-               }
-
-               /* Handle quiescent-state forcing. */
-               fqs_state = RCU_SAVE_DYNTICK;
-               j = jiffies_till_first_fqs;
-               if (j > HZ) {
-                       j = HZ;
-                       jiffies_till_first_fqs = HZ;
-               }
-               for (;;) {
-                       rsp->jiffies_force_qs = jiffies + j;
-                       ret = wait_event_interruptible_timeout(rsp->gp_wq,
-                                       (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
-                                       (!ACCESS_ONCE(rnp->qsmask) &&
-                                        !rcu_preempt_blocked_readers_cgp(rnp)),
-                                       j);
-                       /* If grace period done, leave loop. */
-                       if (!ACCESS_ONCE(rnp->qsmask) &&
-                           !rcu_preempt_blocked_readers_cgp(rnp))
-                               break;
-                       /* If time for quiescent-state forcing, do it. */
-                       if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) {
-                               fqs_state = rcu_gp_fqs(rsp, fqs_state);
-                               cond_resched();
-                       } else {
-                               /* Deal with stray signal. */
-                               cond_resched();
-                               flush_signals(current);
-                       }
-                       j = jiffies_till_next_fqs;
-                       if (j > HZ) {
-                               j = HZ;
-                               jiffies_till_next_fqs = HZ;
-                       } else if (j < 1) {
-                               j = 1;
-                               jiffies_till_next_fqs = 1;
-                       }
-               }
-
-               /* Handle grace-period end. */
-               rcu_gp_cleanup(rsp);
-       }
-}
-
-static void rsp_wakeup(struct irq_work *work)
-{
-       struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
-
-       /* Wake up rcu_gp_kthread() to start the grace period. */
-       wake_up(&rsp->gp_wq);
-}
-
-/*
- * Start a new RCU grace period if warranted, re-initializing the hierarchy
- * in preparation for detecting the next grace period.  The caller must hold
- * the root node's ->lock and hard irqs must be disabled.
- *
- * Note that it is legal for a dying CPU (which is marked as offline) to
- * invoke this function.  This can happen when the dying CPU reports its
- * quiescent state.
- */
-static void
-rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
-                     struct rcu_data *rdp)
-{
-       if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
-               /*
-                * Either we have not yet spawned the grace-period
-                * task, this CPU does not need another grace period,
-                * or a grace period is already in progress.
-                * Either way, don't start a new grace period.
-                */
-               return;
-       }
-       rsp->gp_flags = RCU_GP_FLAG_INIT;
-
-       /*
-        * We can't do wakeups while holding the rnp->lock, as that
-        * could cause possible deadlocks with the rq->lock. Defer
-        * the wakeup to interrupt context.  And don't bother waking
-        * up the running kthread.
-        */
-       if (current != rsp->gp_kthread)
-               irq_work_queue(&rsp->wakeup_work);
-}
-
-/*
- * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
- * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it
- * is invoked indirectly from rcu_advance_cbs(), which would result in
- * endless recursion -- or would do so if it wasn't for the self-deadlock
- * that is encountered beforehand.
- */
-static void
-rcu_start_gp(struct rcu_state *rsp)
-{
-       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       /*
-        * If there is no grace period in progress right now, any
-        * callbacks we have up to this point will be satisfied by the
-        * next grace period.  Also, advancing the callbacks reduces the
-        * probability of false positives from cpu_needs_another_gp()
-        * resulting in pointless grace periods.  So, advance callbacks
-        * then start the grace period!
-        */
-       rcu_advance_cbs(rsp, rnp, rdp);
-       rcu_start_gp_advanced(rsp, rnp, rdp);
-}
-
-/*
- * Report a full set of quiescent states to the specified rcu_state
- * data structure.  This involves cleaning up after the prior grace
- * period and letting rcu_start_gp() start up the next grace period
- * if one is needed.  Note that the caller must hold rnp->lock, which
- * is released before return.
- */
-static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
-       __releases(rcu_get_root(rsp)->lock)
-{
-       WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
-       raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
-}
-
-/*
- * Similar to rcu_report_qs_rdp(), for which it is a helper function.
- * Allows quiescent states for a group of CPUs to be reported at one go
- * to the specified rcu_node structure, though all the CPUs in the group
- * must be represented by the same rcu_node structure (which need not be
- * a leaf rcu_node structure, though it often will be).  That structure's
- * lock must be held upon entry, and it is released before return.
- */
-static void
-rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
-                 struct rcu_node *rnp, unsigned long flags)
-       __releases(rnp->lock)
-{
-       struct rcu_node *rnp_c;
-
-       /* Walk up the rcu_node hierarchy. */
-       for (;;) {
-               if (!(rnp->qsmask & mask)) {
-
-                       /* Our bit has already been cleared, so done. */
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       return;
-               }
-               rnp->qsmask &= ~mask;
-               trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
-                                                mask, rnp->qsmask, rnp->level,
-                                                rnp->grplo, rnp->grphi,
-                                                !!rnp->gp_tasks);
-               if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
-
-                       /* Other bits still set at this level, so done. */
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       return;
-               }
-               mask = rnp->grpmask;
-               if (rnp->parent == NULL) {
-
-                       /* No more levels.  Exit loop holding root lock. */
-
-                       break;
-               }
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               rnp_c = rnp;
-               rnp = rnp->parent;
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               WARN_ON_ONCE(rnp_c->qsmask);
-       }
-
-       /*
-        * Get here if we are the last CPU to pass through a quiescent
-        * state for this grace period.  Invoke rcu_report_qs_rsp()
-        * to clean up and start the next grace period if one is needed.
-        */
-       rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
-}
-
-/*
- * Record a quiescent state for the specified CPU to that CPU's rcu_data
- * structure.  This must be either called from the specified CPU, or
- * called when the specified CPU is known to be offline (and when it is
- * also known that no other CPU is concurrently trying to help the offline
- * CPU).  The lastcomp argument is used to make sure we are still in the
- * grace period of interest.  We don't want to end the current grace period
- * based on quiescent states detected in an earlier grace period!
- */
-static void
-rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       unsigned long flags;
-       unsigned long mask;
-       struct rcu_node *rnp;
-
-       rnp = rdp->mynode;
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
-           rnp->completed == rnp->gpnum) {
-
-               /*
-                * The grace period in which this quiescent state was
-                * recorded has ended, so don't report it upwards.
-                * We will instead need a new quiescent state that lies
-                * within the current grace period.
-                */
-               rdp->passed_quiesce = 0;        /* need qs for new gp. */
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-       mask = rdp->grpmask;
-       if ((rnp->qsmask & mask) == 0) {
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       } else {
-               rdp->qs_pending = 0;
-
-               /*
-                * This GP can't end until cpu checks in, so all of our
-                * callbacks can be processed during the next GP.
-                */
-               rcu_accelerate_cbs(rsp, rnp, rdp);
-
-               rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
-       }
-}
-
-/*
- * Check to see if there is a new grace period of which this CPU
- * is not yet aware, and if so, set up local rcu_data state for it.
- * Otherwise, see if this CPU has just passed through its first
- * quiescent state for this grace period, and record that fact if so.
- */
-static void
-rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       /* Check for grace-period ends and beginnings. */
-       note_gp_changes(rsp, rdp);
-
-       /*
-        * Does this CPU still need to do its part for current grace period?
-        * If no, return and let the other CPUs do their part as well.
-        */
-       if (!rdp->qs_pending)
-               return;
-
-       /*
-        * Was there a quiescent state since the beginning of the grace
-        * period? If no, then exit and wait for the next call.
-        */
-       if (!rdp->passed_quiesce)
-               return;
-
-       /*
-        * Tell RCU we are done (but rcu_report_qs_rdp() will be the
-        * judge of that).
-        */
-       rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Send the specified CPU's RCU callbacks to the orphanage.  The
- * specified CPU must be offline, and the caller must hold the
- * ->orphan_lock.
- */
-static void
-rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
-                         struct rcu_node *rnp, struct rcu_data *rdp)
-{
-       /* No-CBs CPUs do not have orphanable callbacks. */
-       if (rcu_is_nocb_cpu(rdp->cpu))
-               return;
-
-       /*
-        * Orphan the callbacks.  First adjust the counts.  This is safe
-        * because _rcu_barrier() excludes CPU-hotplug operations, so it
-        * cannot be running now.  Thus no memory barrier is required.
-        */
-       if (rdp->nxtlist != NULL) {
-               rsp->qlen_lazy += rdp->qlen_lazy;
-               rsp->qlen += rdp->qlen;
-               rdp->n_cbs_orphaned += rdp->qlen;
-               rdp->qlen_lazy = 0;
-               ACCESS_ONCE(rdp->qlen) = 0;
-       }
-
-       /*
-        * Next, move those callbacks still needing a grace period to
-        * the orphanage, where some other CPU will pick them up.
-        * Some of the callbacks might have gone partway through a grace
-        * period, but that is too bad.  They get to start over because we
-        * cannot assume that grace periods are synchronized across CPUs.
-        * We don't bother updating the ->nxttail[] array yet, instead
-        * we just reset the whole thing later on.
-        */
-       if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
-               *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
-               rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
-               *rdp->nxttail[RCU_DONE_TAIL] = NULL;
-       }
-
-       /*
-        * Then move the ready-to-invoke callbacks to the orphanage,
-        * where some other CPU will pick them up.  These will not be
-        * required to pass though another grace period: They are done.
-        */
-       if (rdp->nxtlist != NULL) {
-               *rsp->orphan_donetail = rdp->nxtlist;
-               rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
-       }
-
-       /* Finally, initialize the rcu_data structure's list to empty.  */
-       init_callback_list(rdp);
-}
-
-/*
- * Adopt the RCU callbacks from the specified rcu_state structure's
- * orphanage.  The caller must hold the ->orphan_lock.
- */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
-{
-       int i;
-       struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
-
-       /* No-CBs CPUs are handled specially. */
-       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
-               return;
-
-       /* Do the accounting first. */
-       rdp->qlen_lazy += rsp->qlen_lazy;
-       rdp->qlen += rsp->qlen;
-       rdp->n_cbs_adopted += rsp->qlen;
-       if (rsp->qlen_lazy != rsp->qlen)
-               rcu_idle_count_callbacks_posted();
-       rsp->qlen_lazy = 0;
-       rsp->qlen = 0;
-
-       /*
-        * We do not need a memory barrier here because the only way we
-        * can get here if there is an rcu_barrier() in flight is if
-        * we are the task doing the rcu_barrier().
-        */
-
-       /* First adopt the ready-to-invoke callbacks. */
-       if (rsp->orphan_donelist != NULL) {
-               *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
-               *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
-               for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
-                       if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
-                               rdp->nxttail[i] = rsp->orphan_donetail;
-               rsp->orphan_donelist = NULL;
-               rsp->orphan_donetail = &rsp->orphan_donelist;
-       }
-
-       /* And then adopt the callbacks that still need a grace period. */
-       if (rsp->orphan_nxtlist != NULL) {
-               *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
-               rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
-               rsp->orphan_nxtlist = NULL;
-               rsp->orphan_nxttail = &rsp->orphan_nxtlist;
-       }
-}
-
-/*
- * Trace the fact that this CPU is going offline.
- */
-static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
-{
-       RCU_TRACE(unsigned long mask);
-       RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
-       RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
-
-       RCU_TRACE(mask = rdp->grpmask);
-       trace_rcu_grace_period(rsp->name,
-                              rnp->gpnum + 1 - !!(rnp->qsmask & mask),
-                              TPS("cpuofl"));
-}
-
-/*
- * The CPU has been completely removed, and some other CPU is reporting
- * this fact from process context.  Do the remainder of the cleanup,
- * including orphaning the outgoing CPU's RCU callbacks, and also
- * adopting them.  There can only be one CPU hotplug operation at a time,
- * so no other CPU can be attempting to update rcu_cpu_kthread_task.
- */
-static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
-{
-       unsigned long flags;
-       unsigned long mask;
-       int need_report = 0;
-       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
-
-       /* Adjust any no-longer-needed kthreads. */
-       rcu_boost_kthread_setaffinity(rnp, -1);
-
-       /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
-
-       /* Exclude any attempts to start a new grace period. */
-       mutex_lock(&rsp->onoff_mutex);
-       raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
-
-       /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
-       rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
-       rcu_adopt_orphan_cbs(rsp);
-
-       /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
-       mask = rdp->grpmask;    /* rnp->grplo is constant. */
-       do {
-               raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
-               rnp->qsmaskinit &= ~mask;
-               if (rnp->qsmaskinit != 0) {
-                       if (rnp != rdp->mynode)
-                               raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
-                       break;
-               }
-               if (rnp == rdp->mynode)
-                       need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
-               else
-                       raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
-               mask = rnp->grpmask;
-               rnp = rnp->parent;
-       } while (rnp != NULL);
-
-       /*
-        * We still hold the leaf rcu_node structure lock here, and
-        * irqs are still disabled.  The reason for this subterfuge is
-        * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
-        * held leads to deadlock.
-        */
-       raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
-       rnp = rdp->mynode;
-       if (need_report & RCU_OFL_TASKS_NORM_GP)
-               rcu_report_unblock_qs_rnp(rnp, flags);
-       else
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       if (need_report & RCU_OFL_TASKS_EXP_GP)
-               rcu_report_exp_rnp(rsp, rnp, true);
-       WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
-                 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
-                 cpu, rdp->qlen, rdp->nxtlist);
-       init_callback_list(rdp);
-       /* Disallow further callbacks on this CPU. */
-       rdp->nxttail[RCU_NEXT_TAIL] = NULL;
-       mutex_unlock(&rsp->onoff_mutex);
-}
-
-#else /* #ifdef CONFIG_HOTPLUG_CPU */
-
-static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
-{
-}
-
-static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
-
-/*
- * Invoke any RCU callbacks that have made it to the end of their grace
- * period.  Thottle as specified by rdp->blimit.
- */
-static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       unsigned long flags;
-       struct rcu_head *next, *list, **tail;
-       long bl, count, count_lazy;
-       int i;
-
-       /* If no callbacks are ready, just return. */
-       if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
-               trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
-               trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
-                                   need_resched(), is_idle_task(current),
-                                   rcu_is_callbacks_kthread());
-               return;
-       }
-
-       /*
-        * Extract the list of ready callbacks, disabling to prevent
-        * races with call_rcu() from interrupt handlers.
-        */
-       local_irq_save(flags);
-       WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
-       bl = rdp->blimit;
-       trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
-       list = rdp->nxtlist;
-       rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
-       *rdp->nxttail[RCU_DONE_TAIL] = NULL;
-       tail = rdp->nxttail[RCU_DONE_TAIL];
-       for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
-               if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
-                       rdp->nxttail[i] = &rdp->nxtlist;
-       local_irq_restore(flags);
-
-       /* Invoke callbacks. */
-       count = count_lazy = 0;
-       while (list) {
-               next = list->next;
-               prefetch(next);
-               debug_rcu_head_unqueue(list);
-               if (__rcu_reclaim(rsp->name, list))
-                       count_lazy++;
-               list = next;
-               /* Stop only if limit reached and CPU has something to do. */
-               if (++count >= bl &&
-                   (need_resched() ||
-                    (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
-                       break;
-       }
-
-       local_irq_save(flags);
-       trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
-                           is_idle_task(current),
-                           rcu_is_callbacks_kthread());
-
-       /* Update count, and requeue any remaining callbacks. */
-       if (list != NULL) {
-               *tail = rdp->nxtlist;
-               rdp->nxtlist = list;
-               for (i = 0; i < RCU_NEXT_SIZE; i++)
-                       if (&rdp->nxtlist == rdp->nxttail[i])
-                               rdp->nxttail[i] = tail;
-                       else
-                               break;
-       }
-       smp_mb(); /* List handling before counting for rcu_barrier(). */
-       rdp->qlen_lazy -= count_lazy;
-       ACCESS_ONCE(rdp->qlen) -= count;
-       rdp->n_cbs_invoked += count;
-
-       /* Reinstate batch limit if we have worked down the excess. */
-       if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
-               rdp->blimit = blimit;
-
-       /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
-       if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
-               rdp->qlen_last_fqs_check = 0;
-               rdp->n_force_qs_snap = rsp->n_force_qs;
-       } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
-               rdp->qlen_last_fqs_check = rdp->qlen;
-       WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
-
-       local_irq_restore(flags);
-
-       /* Re-invoke RCU core processing if there are callbacks remaining. */
-       if (cpu_has_callbacks_ready_to_invoke(rdp))
-               invoke_rcu_core();
-}
-
-/*
- * Check to see if this CPU is in a non-context-switch quiescent state
- * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
- * Also schedule RCU core processing.
- *
- * This function must be called from hardirq context.  It is normally
- * invoked from the scheduling-clock interrupt.  If rcu_pending returns
- * false, there is no point in invoking rcu_check_callbacks().
- */
-void rcu_check_callbacks(int cpu, int user)
-{
-       trace_rcu_utilization(TPS("Start scheduler-tick"));
-       increment_cpu_stall_ticks();
-       if (user || rcu_is_cpu_rrupt_from_idle()) {
-
-               /*
-                * Get here if this CPU took its interrupt from user
-                * mode or from the idle loop, and if this is not a
-                * nested interrupt.  In this case, the CPU is in
-                * a quiescent state, so note it.
-                *
-                * No memory barrier is required here because both
-                * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
-                * variables that other CPUs neither access nor modify,
-                * at least not while the corresponding CPU is online.
-                */
-
-               rcu_sched_qs(cpu);
-               rcu_bh_qs(cpu);
-
-       } else if (!in_softirq()) {
-
-               /*
-                * Get here if this CPU did not take its interrupt from
-                * softirq, in other words, if it is not interrupting
-                * a rcu_bh read-side critical section.  This is an _bh
-                * critical section, so note it.
-                */
-
-               rcu_bh_qs(cpu);
-       }
-       rcu_preempt_check_callbacks(cpu);
-       if (rcu_pending(cpu))
-               invoke_rcu_core();
-       trace_rcu_utilization(TPS("End scheduler-tick"));
-}
-
-/*
- * Scan the leaf rcu_node structures, processing dyntick state for any that
- * have not yet encountered a quiescent state, using the function specified.
- * Also initiate boosting for any threads blocked on the root rcu_node.
- *
- * The caller must have suppressed start of new grace periods.
- */
-static void force_qs_rnp(struct rcu_state *rsp,
-                        int (*f)(struct rcu_data *rsp, bool *isidle,
-                                 unsigned long *maxj),
-                        bool *isidle, unsigned long *maxj)
-{
-       unsigned long bit;
-       int cpu;
-       unsigned long flags;
-       unsigned long mask;
-       struct rcu_node *rnp;
-
-       rcu_for_each_leaf_node(rsp, rnp) {
-               cond_resched();
-               mask = 0;
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               if (!rcu_gp_in_progress(rsp)) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       return;
-               }
-               if (rnp->qsmask == 0) {
-                       rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
-                       continue;
-               }
-               cpu = rnp->grplo;
-               bit = 1;
-               for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
-                       if ((rnp->qsmask & bit) != 0) {
-                               if ((rnp->qsmaskinit & bit) != 0)
-                                       *isidle = 0;
-                               if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
-                                       mask |= bit;
-                       }
-               }
-               if (mask != 0) {
-
-                       /* rcu_report_qs_rnp() releases rnp->lock. */
-                       rcu_report_qs_rnp(mask, rsp, rnp, flags);
-                       continue;
-               }
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       }
-       rnp = rcu_get_root(rsp);
-       if (rnp->qsmask == 0) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
-       }
-}
-
-/*
- * Force quiescent states on reluctant CPUs, and also detect which
- * CPUs are in dyntick-idle mode.
- */
-static void force_quiescent_state(struct rcu_state *rsp)
-{
-       unsigned long flags;
-       bool ret;
-       struct rcu_node *rnp;
-       struct rcu_node *rnp_old = NULL;
-
-       /* Funnel through hierarchy to reduce memory contention. */
-       rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
-       for (; rnp != NULL; rnp = rnp->parent) {
-               ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
-                     !raw_spin_trylock(&rnp->fqslock);
-               if (rnp_old != NULL)
-                       raw_spin_unlock(&rnp_old->fqslock);
-               if (ret) {
-                       rsp->n_force_qs_lh++;
-                       return;
-               }
-               rnp_old = rnp;
-       }
-       /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
-
-       /* Reached the root of the rcu_node tree, acquire lock. */
-       raw_spin_lock_irqsave(&rnp_old->lock, flags);
-       raw_spin_unlock(&rnp_old->fqslock);
-       if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-               rsp->n_force_qs_lh++;
-               raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-               return;  /* Someone beat us to it. */
-       }
-       rsp->gp_flags |= RCU_GP_FLAG_FQS;
-       raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
-}
-
-/*
- * This does the RCU core processing work for the specified rcu_state
- * and rcu_data structures.  This may be called only from the CPU to
- * whom the rdp belongs.
- */
-static void
-__rcu_process_callbacks(struct rcu_state *rsp)
-{
-       unsigned long flags;
-       struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
-
-       WARN_ON_ONCE(rdp->beenonline == 0);
-
-       /* Update RCU state based on any recent quiescent states. */
-       rcu_check_quiescent_state(rsp, rdp);
-
-       /* Does this CPU require a not-yet-started grace period? */
-       local_irq_save(flags);
-       if (cpu_needs_another_gp(rsp, rdp)) {
-               raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
-               rcu_start_gp(rsp);
-               raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-       } else {
-               local_irq_restore(flags);
-       }
-
-       /* If there are callbacks ready, invoke them. */
-       if (cpu_has_callbacks_ready_to_invoke(rdp))
-               invoke_rcu_callbacks(rsp, rdp);
-}
-
-/*
- * Do RCU core processing for the current CPU.
- */
-static void rcu_process_callbacks(struct softirq_action *unused)
-{
-       struct rcu_state *rsp;
-
-       if (cpu_is_offline(smp_processor_id()))
-               return;
-       trace_rcu_utilization(TPS("Start RCU core"));
-       for_each_rcu_flavor(rsp)
-               __rcu_process_callbacks(rsp);
-       trace_rcu_utilization(TPS("End RCU core"));
-}
-
-/*
- * Schedule RCU callback invocation.  If the specified type of RCU
- * does not support RCU priority boosting, just do a direct call,
- * otherwise wake up the per-CPU kernel kthread.  Note that because we
- * are running on the current CPU with interrupts disabled, the
- * rcu_cpu_kthread_task cannot disappear out from under us.
- */
-static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
-               return;
-       if (likely(!rsp->boost)) {
-               rcu_do_batch(rsp, rdp);
-               return;
-       }
-       invoke_rcu_callbacks_kthread();
-}
-
-static void invoke_rcu_core(void)
-{
-       if (cpu_online(smp_processor_id()))
-               raise_softirq(RCU_SOFTIRQ);
-}
-
-/*
- * Handle any core-RCU processing required by a call_rcu() invocation.
- */
-static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
-                           struct rcu_head *head, unsigned long flags)
-{
-       /*
-        * If called from an extended quiescent state, invoke the RCU
-        * core in order to force a re-evaluation of RCU's idleness.
-        */
-       if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
-               invoke_rcu_core();
-
-       /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
-       if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
-               return;
-
-       /*
-        * Force the grace period if too many callbacks or too long waiting.
-        * Enforce hysteresis, and don't invoke force_quiescent_state()
-        * if some other CPU has recently done so.  Also, don't bother
-        * invoking force_quiescent_state() if the newly enqueued callback
-        * is the only one waiting for a grace period to complete.
-        */
-       if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
-
-               /* Are we ignoring a completed grace period? */
-               note_gp_changes(rsp, rdp);
-
-               /* Start a new grace period if one not already started. */
-               if (!rcu_gp_in_progress(rsp)) {
-                       struct rcu_node *rnp_root = rcu_get_root(rsp);
-
-                       raw_spin_lock(&rnp_root->lock);
-                       rcu_start_gp(rsp);
-                       raw_spin_unlock(&rnp_root->lock);
-               } else {
-                       /* Give the grace period a kick. */
-                       rdp->blimit = LONG_MAX;
-                       if (rsp->n_force_qs == rdp->n_force_qs_snap &&
-                           *rdp->nxttail[RCU_DONE_TAIL] != head)
-                               force_quiescent_state(rsp);
-                       rdp->n_force_qs_snap = rsp->n_force_qs;
-                       rdp->qlen_last_fqs_check = rdp->qlen;
-               }
-       }
-}
-
-/*
- * RCU callback function to leak a callback.
- */
-static void rcu_leak_callback(struct rcu_head *rhp)
-{
-}
-
-/*
- * Helper function for call_rcu() and friends.  The cpu argument will
- * normally be -1, indicating "currently running CPU".  It may specify
- * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier()
- * is expected to specify a CPU.
- */
-static void
-__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
-          struct rcu_state *rsp, int cpu, bool lazy)
-{
-       unsigned long flags;
-       struct rcu_data *rdp;
-
-       WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
-       if (debug_rcu_head_queue(head)) {
-               /* Probable double call_rcu(), so leak the callback. */
-               ACCESS_ONCE(head->func) = rcu_leak_callback;
-               WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
-               return;
-       }
-       head->func = func;
-       head->next = NULL;
-
-       /*
-        * Opportunistically note grace-period endings and beginnings.
-        * Note that we might see a beginning right after we see an
-        * end, but never vice versa, since this CPU has to pass through
-        * a quiescent state betweentimes.
-        */
-       local_irq_save(flags);
-       rdp = this_cpu_ptr(rsp->rda);
-
-       /* Add the callback to our list. */
-       if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
-               int offline;
-
-               if (cpu != -1)
-                       rdp = per_cpu_ptr(rsp->rda, cpu);
-               offline = !__call_rcu_nocb(rdp, head, lazy);
-               WARN_ON_ONCE(offline);
-               /* _call_rcu() is illegal on offline CPU; leak the callback. */
-               local_irq_restore(flags);
-               return;
-       }
-       ACCESS_ONCE(rdp->qlen)++;
-       if (lazy)
-               rdp->qlen_lazy++;
-       else
-               rcu_idle_count_callbacks_posted();
-       smp_mb();  /* Count before adding callback for rcu_barrier(). */
-       *rdp->nxttail[RCU_NEXT_TAIL] = head;
-       rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
-
-       if (__is_kfree_rcu_offset((unsigned long)func))
-               trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
-                                        rdp->qlen_lazy, rdp->qlen);
-       else
-               trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
-
-       /* Go handle any RCU core processing required. */
-       __call_rcu_core(rsp, rdp, head, flags);
-       local_irq_restore(flags);
-}
-
-/*
- * Queue an RCU-sched callback for invocation after a grace period.
- */
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_sched_state, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu_sched);
-
-/*
- * Queue an RCU callback for invocation after a quicker grace period.
- */
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_bh_state, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
-/*
- * Because a context switch is a grace period for RCU-sched and RCU-bh,
- * any blocking grace-period wait automatically implies a grace period
- * if there is only one CPU online at any point time during execution
- * of either synchronize_sched() or synchronize_rcu_bh().  It is OK to
- * occasionally incorrectly indicate that there are multiple CPUs online
- * when there was in fact only one the whole time, as this just adds
- * some overhead: RCU still operates correctly.
- */
-static inline int rcu_blocking_is_gp(void)
-{
-       int ret;
-
-       might_sleep();  /* Check for RCU read-side critical section. */
-       preempt_disable();
-       ret = num_online_cpus() <= 1;
-       preempt_enable();
-       return ret;
-}
-
-/**
- * synchronize_sched - wait until an rcu-sched grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu-sched
- * grace period has elapsed, in other words after all currently executing
- * rcu-sched read-side critical sections have completed.   These read-side
- * critical sections are delimited by rcu_read_lock_sched() and
- * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
- * local_irq_disable(), and so on may be used in place of
- * rcu_read_lock_sched().
- *
- * This means that all preempt_disable code sequences, including NMI and
- * non-threaded hardware-interrupt handlers, in progress on entry will
- * have completed before this primitive returns.  However, this does not
- * guarantee that softirq handlers will have completed, since in some
- * kernels, these handlers can run in process context, and can block.
- *
- * Note that this guarantee implies further memory-ordering guarantees.
- * On systems with more than one CPU, when synchronize_sched() returns,
- * each CPU is guaranteed to have executed a full memory barrier since the
- * end of its last RCU-sched read-side critical section whose beginning
- * preceded the call to synchronize_sched().  In addition, each CPU having
- * an RCU read-side critical section that extends beyond the return from
- * synchronize_sched() is guaranteed to have executed a full memory barrier
- * after the beginning of synchronize_sched() and before the beginning of
- * that RCU read-side critical section.  Note that these guarantees include
- * CPUs that are offline, idle, or executing in user mode, as well as CPUs
- * that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_sched(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
- */
-void synchronize_sched(void)
-{
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_sched() in RCU-sched read-side critical section");
-       if (rcu_blocking_is_gp())
-               return;
-       if (rcu_expedited)
-               synchronize_sched_expedited();
-       else
-               wait_rcu_gp(call_rcu_sched);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
-/**
- * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu_bh grace
- * period has elapsed, in other words after all currently executing rcu_bh
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
- * and may be nested.
- *
- * See the description of synchronize_sched() for more detailed information
- * on memory ordering guarantees.
- */
-void synchronize_rcu_bh(void)
-{
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
-       if (rcu_blocking_is_gp())
-               return;
-       if (rcu_expedited)
-               synchronize_rcu_bh_expedited();
-       else
-               wait_rcu_gp(call_rcu_bh);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
-static int synchronize_sched_expedited_cpu_stop(void *data)
-{
-       /*
-        * There must be a full memory barrier on each affected CPU
-        * between the time that try_stop_cpus() is called and the
-        * time that it returns.
-        *
-        * In the current initial implementation of cpu_stop, the
-        * above condition is already met when the control reaches
-        * this point and the following smp_mb() is not strictly
-        * necessary.  Do smp_mb() anyway for documentation and
-        * robustness against future implementation changes.
-        */
-       smp_mb(); /* See above comment block. */
-       return 0;
-}
-
-/**
- * synchronize_sched_expedited - Brute-force RCU-sched grace period
- *
- * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
- * approach to force the grace period to end quickly.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.  In fact,
- * if you are using synchronize_sched_expedited() in a loop, please
- * restructure your code to batch your updates, and then use a single
- * synchronize_sched() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- *
- * This implementation can be thought of as an application of ticket
- * locking to RCU, with sync_sched_expedited_started and
- * sync_sched_expedited_done taking on the roles of the halves
- * of the ticket-lock word.  Each task atomically increments
- * sync_sched_expedited_started upon entry, snapshotting the old value,
- * then attempts to stop all the CPUs.  If this succeeds, then each
- * CPU will have executed a context switch, resulting in an RCU-sched
- * grace period.  We are then done, so we use atomic_cmpxchg() to
- * update sync_sched_expedited_done to match our snapshot -- but
- * only if someone else has not already advanced past our snapshot.
- *
- * On the other hand, if try_stop_cpus() fails, we check the value
- * of sync_sched_expedited_done.  If it has advanced past our
- * initial snapshot, then someone else must have forced a grace period
- * some time after we took our snapshot.  In this case, our work is
- * done for us, and we can simply return.  Otherwise, we try again,
- * but keep our initial snapshot for purposes of checking for someone
- * doing our work for us.
- *
- * If we fail too many times in a row, we fall back to synchronize_sched().
- */
-void synchronize_sched_expedited(void)
-{
-       long firstsnap, s, snap;
-       int trycount = 0;
-       struct rcu_state *rsp = &rcu_sched_state;
-
-       /*
-        * If we are in danger of counter wrap, just do synchronize_sched().
-        * By allowing sync_sched_expedited_started to advance no more than
-        * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
-        * that more than 3.5 billion CPUs would be required to force a
-        * counter wrap on a 32-bit system.  Quite a few more CPUs would of
-        * course be required on a 64-bit system.
-        */
-       if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
-                        (ulong)atomic_long_read(&rsp->expedited_done) +
-                        ULONG_MAX / 8)) {
-               synchronize_sched();
-               atomic_long_inc(&rsp->expedited_wrap);
-               return;
-       }
-
-       /*
-        * Take a ticket.  Note that atomic_inc_return() implies a
-        * full memory barrier.
-        */
-       snap = atomic_long_inc_return(&rsp->expedited_start);
-       firstsnap = snap;
-       get_online_cpus();
-       WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
-
-       /*
-        * Each pass through the following loop attempts to force a
-        * context switch on each CPU.
-        */
-       while (try_stop_cpus(cpu_online_mask,
-                            synchronize_sched_expedited_cpu_stop,
-                            NULL) == -EAGAIN) {
-               put_online_cpus();
-               atomic_long_inc(&rsp->expedited_tryfail);
-
-               /* Check to see if someone else did our work for us. */
-               s = atomic_long_read(&rsp->expedited_done);
-               if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
-                       /* ensure test happens before caller kfree */
-                       smp_mb__before_atomic_inc(); /* ^^^ */
-                       atomic_long_inc(&rsp->expedited_workdone1);
-                       return;
-               }
-
-               /* No joy, try again later.  Or just synchronize_sched(). */
-               if (trycount++ < 10) {
-                       udelay(trycount * num_online_cpus());
-               } else {
-                       wait_rcu_gp(call_rcu_sched);
-                       atomic_long_inc(&rsp->expedited_normal);
-                       return;
-               }
-
-               /* Recheck to see if someone else did our work for us. */
-               s = atomic_long_read(&rsp->expedited_done);
-               if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
-                       /* ensure test happens before caller kfree */
-                       smp_mb__before_atomic_inc(); /* ^^^ */
-                       atomic_long_inc(&rsp->expedited_workdone2);
-                       return;
-               }
-
-               /*
-                * Refetching sync_sched_expedited_started allows later
-                * callers to piggyback on our grace period.  We retry
-                * after they started, so our grace period works for them,
-                * and they started after our first try, so their grace
-                * period works for us.
-                */
-               get_online_cpus();
-               snap = atomic_long_read(&rsp->expedited_start);
-               smp_mb(); /* ensure read is before try_stop_cpus(). */
-       }
-       atomic_long_inc(&rsp->expedited_stoppedcpus);
-
-       /*
-        * Everyone up to our most recent fetch is covered by our grace
-        * period.  Update the counter, but only if our work is still
-        * relevant -- which it won't be if someone who started later
-        * than we did already did their update.
-        */
-       do {
-               atomic_long_inc(&rsp->expedited_done_tries);
-               s = atomic_long_read(&rsp->expedited_done);
-               if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
-                       /* ensure test happens before caller kfree */
-                       smp_mb__before_atomic_inc(); /* ^^^ */
-                       atomic_long_inc(&rsp->expedited_done_lost);
-                       break;
-               }
-       } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
-       atomic_long_inc(&rsp->expedited_done_exit);
-
-       put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-/*
- * Check to see if there is any immediate RCU-related work to be done
- * by the current CPU, for the specified type of RCU, returning 1 if so.
- * The checks are in order of increasing expense: checks that can be
- * carried out against CPU-local state are performed first.  However,
- * we must check for CPU stalls first, else we might not get a chance.
- */
-static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-       struct rcu_node *rnp = rdp->mynode;
-
-       rdp->n_rcu_pending++;
-
-       /* Check for CPU stalls, if enabled. */
-       check_cpu_stall(rsp, rdp);
-
-       /* Is the RCU core waiting for a quiescent state from this CPU? */
-       if (rcu_scheduler_fully_active &&
-           rdp->qs_pending && !rdp->passed_quiesce) {
-               rdp->n_rp_qs_pending++;
-       } else if (rdp->qs_pending && rdp->passed_quiesce) {
-               rdp->n_rp_report_qs++;
-               return 1;
-       }
-
-       /* Does this CPU have callbacks ready to invoke? */
-       if (cpu_has_callbacks_ready_to_invoke(rdp)) {
-               rdp->n_rp_cb_ready++;
-               return 1;
-       }
-
-       /* Has RCU gone idle with this CPU needing another grace period? */
-       if (cpu_needs_another_gp(rsp, rdp)) {
-               rdp->n_rp_cpu_needs_gp++;
-               return 1;
-       }
-
-       /* Has another RCU grace period completed?  */
-       if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
-               rdp->n_rp_gp_completed++;
-               return 1;
-       }
-
-       /* Has a new RCU grace period started? */
-       if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
-               rdp->n_rp_gp_started++;
-               return 1;
-       }
-
-       /* nothing to do */
-       rdp->n_rp_need_nothing++;
-       return 0;
-}
-
-/*
- * Check to see if there is any immediate RCU-related work to be done
- * by the current CPU, returning 1 if so.  This function is part of the
- * RCU implementation; it is -not- an exported member of the RCU API.
- */
-static int rcu_pending(int cpu)
-{
-       struct rcu_state *rsp;
-
-       for_each_rcu_flavor(rsp)
-               if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
-                       return 1;
-       return 0;
-}
-
-/*
- * Return true if the specified CPU has any callback.  If all_lazy is
- * non-NULL, store an indication of whether all callbacks are lazy.
- * (If there are no callbacks, all of them are deemed to be lazy.)
- */
-static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
-{
-       bool al = true;
-       bool hc = false;
-       struct rcu_data *rdp;
-       struct rcu_state *rsp;
-
-       for_each_rcu_flavor(rsp) {
-               rdp = per_cpu_ptr(rsp->rda, cpu);
-               if (rdp->qlen != rdp->qlen_lazy)
-                       al = false;
-               if (rdp->nxtlist)
-                       hc = true;
-       }
-       if (all_lazy)
-               *all_lazy = al;
-       return hc;
-}
-
-/*
- * Helper function for _rcu_barrier() tracing.  If tracing is disabled,
- * the compiler is expected to optimize this away.
- */
-static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
-                              int cpu, unsigned long done)
-{
-       trace_rcu_barrier(rsp->name, s, cpu,
-                         atomic_read(&rsp->barrier_cpu_count), done);
-}
-
-/*
- * RCU callback function for _rcu_barrier().  If we are last, wake
- * up the task executing _rcu_barrier().
- */
-static void rcu_barrier_callback(struct rcu_head *rhp)
-{
-       struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
-       struct rcu_state *rsp = rdp->rsp;
-
-       if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
-               _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
-               complete(&rsp->barrier_completion);
-       } else {
-               _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
-       }
-}
-
-/*
- * Called with preemption disabled, and from cross-cpu IRQ context.
- */
-static void rcu_barrier_func(void *type)
-{
-       struct rcu_state *rsp = type;
-       struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
-
-       _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
-       atomic_inc(&rsp->barrier_cpu_count);
-       rsp->call(&rdp->barrier_head, rcu_barrier_callback);
-}
-
-/*
- * Orchestrate the specified type of RCU barrier, waiting for all
- * RCU callbacks of the specified type to complete.
- */
-static void _rcu_barrier(struct rcu_state *rsp)
-{
-       int cpu;
-       struct rcu_data *rdp;
-       unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
-       unsigned long snap_done;
-
-       _rcu_barrier_trace(rsp, "Begin", -1, snap);
-
-       /* Take mutex to serialize concurrent rcu_barrier() requests. */
-       mutex_lock(&rsp->barrier_mutex);
-
-       /*
-        * Ensure that all prior references, including to ->n_barrier_done,
-        * are ordered before the _rcu_barrier() machinery.
-        */
-       smp_mb();  /* See above block comment. */
-
-       /*
-        * Recheck ->n_barrier_done to see if others did our work for us.
-        * This means checking ->n_barrier_done for an even-to-odd-to-even
-        * transition.  The "if" expression below therefore rounds the old
-        * value up to the next even number and adds two before comparing.
-        */
-       snap_done = rsp->n_barrier_done;
-       _rcu_barrier_trace(rsp, "Check", -1, snap_done);
-
-       /*
-        * If the value in snap is odd, we needed to wait for the current
-        * rcu_barrier() to complete, then wait for the next one, in other
-        * words, we need the value of snap_done to be three larger than
-        * the value of snap.  On the other hand, if the value in snap is
-        * even, we only had to wait for the next rcu_barrier() to complete,
-        * in other words, we need the value of snap_done to be only two
-        * greater than the value of snap.  The "(snap + 3) & ~0x1" computes
-        * this for us (thank you, Linus!).
-        */
-       if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
-               _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
-               smp_mb(); /* caller's subsequent code after above check. */
-               mutex_unlock(&rsp->barrier_mutex);
-               return;
-       }
-
-       /*
-        * Increment ->n_barrier_done to avoid duplicate work.  Use
-        * ACCESS_ONCE() to prevent the compiler from speculating
-        * the increment to precede the early-exit check.
-        */
-       ACCESS_ONCE(rsp->n_barrier_done)++;
-       WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
-       _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
-       smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
-
-       /*
-        * Initialize the count to one rather than to zero in order to
-        * avoid a too-soon return to zero in case of a short grace period
-        * (or preemption of this task).  Exclude CPU-hotplug operations
-        * to ensure that no offline CPU has callbacks queued.
-        */
-       init_completion(&rsp->barrier_completion);
-       atomic_set(&rsp->barrier_cpu_count, 1);
-       get_online_cpus();
-
-       /*
-        * Force each CPU with callbacks to register a new callback.
-        * When that callback is invoked, we will know that all of the
-        * corresponding CPU's preceding callbacks have been invoked.
-        */
-       for_each_possible_cpu(cpu) {
-               if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
-                       continue;
-               rdp = per_cpu_ptr(rsp->rda, cpu);
-               if (rcu_is_nocb_cpu(cpu)) {
-                       _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
-                                          rsp->n_barrier_done);
-                       atomic_inc(&rsp->barrier_cpu_count);
-                       __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
-                                  rsp, cpu, 0);
-               } else if (ACCESS_ONCE(rdp->qlen)) {
-                       _rcu_barrier_trace(rsp, "OnlineQ", cpu,
-                                          rsp->n_barrier_done);
-                       smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
-               } else {
-                       _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
-                                          rsp->n_barrier_done);
-               }
-       }
-       put_online_cpus();
-
-       /*
-        * Now that we have an rcu_barrier_callback() callback on each
-        * CPU, and thus each counted, remove the initial count.
-        */
-       if (atomic_dec_and_test(&rsp->barrier_cpu_count))
-               complete(&rsp->barrier_completion);
-
-       /* Increment ->n_barrier_done to prevent duplicate work. */
-       smp_mb(); /* Keep increment after above mechanism. */
-       ACCESS_ONCE(rsp->n_barrier_done)++;
-       WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
-       _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
-       smp_mb(); /* Keep increment before caller's subsequent code. */
-
-       /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
-       wait_for_completion(&rsp->barrier_completion);
-
-       /* Other rcu_barrier() invocations can now safely proceed. */
-       mutex_unlock(&rsp->barrier_mutex);
-}
-
-/**
- * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
- */
-void rcu_barrier_bh(void)
-{
-       _rcu_barrier(&rcu_bh_state);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_bh);
-
-/**
- * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
- */
-void rcu_barrier_sched(void)
-{
-       _rcu_barrier(&rcu_sched_state);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-
-/*
- * Do boot-time initialization of a CPU's per-CPU RCU data.
- */
-static void __init
-rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
-{
-       unsigned long flags;
-       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       /* Set up local state, ensuring consistent view of global state. */
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
-       init_callback_list(rdp);
-       rdp->qlen_lazy = 0;
-       ACCESS_ONCE(rdp->qlen) = 0;
-       rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
-       WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
-       WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
-       rdp->cpu = cpu;
-       rdp->rsp = rsp;
-       rcu_boot_init_nocb_percpu_data(rdp);
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
- * Initialize a CPU's per-CPU RCU data.  Note that only one online or
- * offline event can be happening at a given time.  Note also that we
- * can accept some slop in the rsp->completed access due to the fact
- * that this CPU cannot possibly have any RCU callbacks in flight yet.
- */
-static void
-rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
-{
-       unsigned long flags;
-       unsigned long mask;
-       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       /* Exclude new grace periods. */
-       mutex_lock(&rsp->onoff_mutex);
-
-       /* Set up local state, ensuring consistent view of global state. */
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       rdp->beenonline = 1;     /* We have now been online. */
-       rdp->preemptible = preemptible;
-       rdp->qlen_last_fqs_check = 0;
-       rdp->n_force_qs_snap = rsp->n_force_qs;
-       rdp->blimit = blimit;
-       init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
-       rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-       rcu_sysidle_init_percpu_data(rdp->dynticks);
-       atomic_set(&rdp->dynticks->dynticks,
-                  (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
-       raw_spin_unlock(&rnp->lock);            /* irqs remain disabled. */
-
-       /* Add CPU to rcu_node bitmasks. */
-       rnp = rdp->mynode;
-       mask = rdp->grpmask;
-       do {
-               /* Exclude any attempts to start a new GP on small systems. */
-               raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
-               rnp->qsmaskinit |= mask;
-               mask = rnp->grpmask;
-               if (rnp == rdp->mynode) {
-                       /*
-                        * If there is a grace period in progress, we will
-                        * set up to wait for it next time we run the
-                        * RCU core code.
-                        */
-                       rdp->gpnum = rnp->completed;
-                       rdp->completed = rnp->completed;
-                       rdp->passed_quiesce = 0;
-                       rdp->qs_pending = 0;
-                       trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
-               }
-               raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
-               rnp = rnp->parent;
-       } while (rnp != NULL && !(rnp->qsmaskinit & mask));
-       local_irq_restore(flags);
-
-       mutex_unlock(&rsp->onoff_mutex);
-}
-
-static void rcu_prepare_cpu(int cpu)
-{
-       struct rcu_state *rsp;
-
-       for_each_rcu_flavor(rsp)
-               rcu_init_percpu_data(cpu, rsp,
-                                    strcmp(rsp->name, "rcu_preempt") == 0);
-}
-
-/*
- * Handle CPU online/offline notification events.
- */
-static int rcu_cpu_notify(struct notifier_block *self,
-                                   unsigned long action, void *hcpu)
-{
-       long cpu = (long)hcpu;
-       struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
-       struct rcu_node *rnp = rdp->mynode;
-       struct rcu_state *rsp;
-
-       trace_rcu_utilization(TPS("Start CPU hotplug"));
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               rcu_prepare_cpu(cpu);
-               rcu_prepare_kthreads(cpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
-               rcu_boost_kthread_setaffinity(rnp, -1);
-               break;
-       case CPU_DOWN_PREPARE:
-               rcu_boost_kthread_setaffinity(rnp, cpu);
-               break;
-       case CPU_DYING:
-       case CPU_DYING_FROZEN:
-               for_each_rcu_flavor(rsp)
-                       rcu_cleanup_dying_cpu(rsp);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               for_each_rcu_flavor(rsp)
-                       rcu_cleanup_dead_cpu(cpu, rsp);
-               break;
-       default:
-               break;
-       }
-       trace_rcu_utilization(TPS("End CPU hotplug"));
-       return NOTIFY_OK;
-}
-
-static int rcu_pm_notify(struct notifier_block *self,
-                        unsigned long action, void *hcpu)
-{
-       switch (action) {
-       case PM_HIBERNATION_PREPARE:
-       case PM_SUSPEND_PREPARE:
-               if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
-                       rcu_expedited = 1;
-               break;
-       case PM_POST_HIBERNATION:
-       case PM_POST_SUSPEND:
-               rcu_expedited = 0;
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-/*
- * Spawn the kthread that handles this RCU flavor's grace periods.
- */
-static int __init rcu_spawn_gp_kthread(void)
-{
-       unsigned long flags;
-       struct rcu_node *rnp;
-       struct rcu_state *rsp;
-       struct task_struct *t;
-
-       for_each_rcu_flavor(rsp) {
-               t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
-               BUG_ON(IS_ERR(t));
-               rnp = rcu_get_root(rsp);
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               rsp->gp_kthread = t;
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               rcu_spawn_nocb_kthreads(rsp);
-       }
-       return 0;
-}
-early_initcall(rcu_spawn_gp_kthread);
-
-/*
- * This function is invoked towards the end of the scheduler's initialization
- * process.  Before this is called, the idle task might contain
- * RCU read-side critical sections (during which time, this idle
- * task is booting the system).  After this function is called, the
- * idle tasks are prohibited from containing RCU read-side critical
- * sections.  This function also enables RCU lockdep checking.
- */
-void rcu_scheduler_starting(void)
-{
-       WARN_ON(num_online_cpus() != 1);
-       WARN_ON(nr_context_switches() > 0);
-       rcu_scheduler_active = 1;
-}
-
-/*
- * Compute the per-level fanout, either using the exact fanout specified
- * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
- */
-#ifdef CONFIG_RCU_FANOUT_EXACT
-static void __init rcu_init_levelspread(struct rcu_state *rsp)
-{
-       int i;
-
-       for (i = rcu_num_lvls - 1; i > 0; i--)
-               rsp->levelspread[i] = CONFIG_RCU_FANOUT;
-       rsp->levelspread[0] = rcu_fanout_leaf;
-}
-#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
-static void __init rcu_init_levelspread(struct rcu_state *rsp)
-{
-       int ccur;
-       int cprv;
-       int i;
-
-       cprv = nr_cpu_ids;
-       for (i = rcu_num_lvls - 1; i >= 0; i--) {
-               ccur = rsp->levelcnt[i];
-               rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
-               cprv = ccur;
-       }
-}
-#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
-
-/*
- * Helper function for rcu_init() that initializes one rcu_state structure.
- */
-static void __init rcu_init_one(struct rcu_state *rsp,
-               struct rcu_data __percpu *rda)
-{
-       static char *buf[] = { "rcu_node_0",
-                              "rcu_node_1",
-                              "rcu_node_2",
-                              "rcu_node_3" };  /* Match MAX_RCU_LVLS */
-       static char *fqs[] = { "rcu_node_fqs_0",
-                              "rcu_node_fqs_1",
-                              "rcu_node_fqs_2",
-                              "rcu_node_fqs_3" };  /* Match MAX_RCU_LVLS */
-       int cpustride = 1;
-       int i;
-       int j;
-       struct rcu_node *rnp;
-
-       BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
-
-       /* Silence gcc 4.8 warning about array index out of range. */
-       if (rcu_num_lvls > RCU_NUM_LVLS)
-               panic("rcu_init_one: rcu_num_lvls overflow");
-
-       /* Initialize the level-tracking arrays. */
-
-       for (i = 0; i < rcu_num_lvls; i++)
-               rsp->levelcnt[i] = num_rcu_lvl[i];
-       for (i = 1; i < rcu_num_lvls; i++)
-               rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
-       rcu_init_levelspread(rsp);
-
-       /* Initialize the elements themselves, starting from the leaves. */
-
-       for (i = rcu_num_lvls - 1; i >= 0; i--) {
-               cpustride *= rsp->levelspread[i];
-               rnp = rsp->level[i];
-               for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
-                       raw_spin_lock_init(&rnp->lock);
-                       lockdep_set_class_and_name(&rnp->lock,
-                                                  &rcu_node_class[i], buf[i]);
-                       raw_spin_lock_init(&rnp->fqslock);
-                       lockdep_set_class_and_name(&rnp->fqslock,
-                                                  &rcu_fqs_class[i], fqs[i]);
-                       rnp->gpnum = rsp->gpnum;
-                       rnp->completed = rsp->completed;
-                       rnp->qsmask = 0;
-                       rnp->qsmaskinit = 0;
-                       rnp->grplo = j * cpustride;
-                       rnp->grphi = (j + 1) * cpustride - 1;
-                       if (rnp->grphi >= NR_CPUS)
-                               rnp->grphi = NR_CPUS - 1;
-                       if (i == 0) {
-                               rnp->grpnum = 0;
-                               rnp->grpmask = 0;
-                               rnp->parent = NULL;
-                       } else {
-                               rnp->grpnum = j % rsp->levelspread[i - 1];
-                               rnp->grpmask = 1UL << rnp->grpnum;
-                               rnp->parent = rsp->level[i - 1] +
-                                             j / rsp->levelspread[i - 1];
-                       }
-                       rnp->level = i;
-                       INIT_LIST_HEAD(&rnp->blkd_tasks);
-                       rcu_init_one_nocb(rnp);
-               }
-       }
-
-       rsp->rda = rda;
-       init_waitqueue_head(&rsp->gp_wq);
-       init_irq_work(&rsp->wakeup_work, rsp_wakeup);
-       rnp = rsp->level[rcu_num_lvls - 1];
-       for_each_possible_cpu(i) {
-               while (i > rnp->grphi)
-                       rnp++;
-               per_cpu_ptr(rsp->rda, i)->mynode = rnp;
-               rcu_boot_init_percpu_data(i, rsp);
-       }
-       list_add(&rsp->flavors, &rcu_struct_flavors);
-}
-
-/*
- * Compute the rcu_node tree geometry from kernel parameters.  This cannot
- * replace the definitions in rcutree.h because those are needed to size
- * the ->node array in the rcu_state structure.
- */
-static void __init rcu_init_geometry(void)
-{
-       ulong d;
-       int i;
-       int j;
-       int n = nr_cpu_ids;
-       int rcu_capacity[MAX_RCU_LVLS + 1];
-
-       /*
-        * Initialize any unspecified boot parameters.
-        * The default values of jiffies_till_first_fqs and
-        * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
-        * value, which is a function of HZ, then adding one for each
-        * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
-        */
-       d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
-       if (jiffies_till_first_fqs == ULONG_MAX)
-               jiffies_till_first_fqs = d;
-       if (jiffies_till_next_fqs == ULONG_MAX)
-               jiffies_till_next_fqs = d;
-
-       /* If the compile-time values are accurate, just leave. */
-       if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
-           nr_cpu_ids == NR_CPUS)
-               return;
-
-       /*
-        * Compute number of nodes that can be handled an rcu_node tree
-        * with the given number of levels.  Setting rcu_capacity[0] makes
-        * some of the arithmetic easier.
-        */
-       rcu_capacity[0] = 1;
-       rcu_capacity[1] = rcu_fanout_leaf;
-       for (i = 2; i <= MAX_RCU_LVLS; i++)
-               rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
-
-       /*
-        * The boot-time rcu_fanout_leaf parameter is only permitted
-        * to increase the leaf-level fanout, not decrease it.  Of course,
-        * the leaf-level fanout cannot exceed the number of bits in
-        * the rcu_node masks.  Finally, the tree must be able to accommodate
-        * the configured number of CPUs.  Complain and fall back to the
-        * compile-time values if these limits are exceeded.
-        */
-       if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
-           rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
-           n > rcu_capacity[MAX_RCU_LVLS]) {
-               WARN_ON(1);
-               return;
-       }
-
-       /* Calculate the number of rcu_nodes at each level of the tree. */
-       for (i = 1; i <= MAX_RCU_LVLS; i++)
-               if (n <= rcu_capacity[i]) {
-                       for (j = 0; j <= i; j++)
-                               num_rcu_lvl[j] =
-                                       DIV_ROUND_UP(n, rcu_capacity[i - j]);
-                       rcu_num_lvls = i;
-                       for (j = i + 1; j <= MAX_RCU_LVLS; j++)
-                               num_rcu_lvl[j] = 0;
-                       break;
-               }
-
-       /* Calculate the total number of rcu_node structures. */
-       rcu_num_nodes = 0;
-       for (i = 0; i <= MAX_RCU_LVLS; i++)
-               rcu_num_nodes += num_rcu_lvl[i];
-       rcu_num_nodes -= n;
-}
-
-void __init rcu_init(void)
-{
-       int cpu;
-
-       rcu_bootup_announce();
-       rcu_init_geometry();
-       rcu_init_one(&rcu_sched_state, &rcu_sched_data);
-       rcu_init_one(&rcu_bh_state, &rcu_bh_data);
-       __rcu_init_preempt();
-       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-
-       /*
-        * We don't need protection against CPU-hotplug here because
-        * this is called early in boot, before either interrupts
-        * or the scheduler are operational.
-        */
-       cpu_notifier(rcu_cpu_notify, 0);
-       pm_notifier(rcu_pm_notify, 0);
-       for_each_online_cpu(cpu)
-               rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
-}
-
-#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
deleted file mode 100644 (file)
index 5f97eab..0000000
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion (tree-based version)
- * Internal non-public definitions.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2008
- *
- * Author: Ingo Molnar <mingo@elte.hu>
- *        Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- */
-
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-#include <linux/irq_work.h>
-
-/*
- * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
- * CONFIG_RCU_FANOUT_LEAF.
- * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this did work well going from three levels to four.
- * Of course, your mileage may vary.
- */
-#define MAX_RCU_LVLS 4
-#define RCU_FANOUT_1         (CONFIG_RCU_FANOUT_LEAF)
-#define RCU_FANOUT_2         (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_3         (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_4         (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
-
-#if NR_CPUS <= RCU_FANOUT_1
-#  define RCU_NUM_LVLS       1
-#  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              (NR_CPUS)
-#  define NUM_RCU_LVL_2              0
-#  define NUM_RCU_LVL_3              0
-#  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_2
-#  define RCU_NUM_LVLS       2
-#  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_LVL_2              (NR_CPUS)
-#  define NUM_RCU_LVL_3              0
-#  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_3
-#  define RCU_NUM_LVLS       3
-#  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_LVL_3              (NR_CPUS)
-#  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_4
-#  define RCU_NUM_LVLS       4
-#  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
-#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_LVL_4              (NR_CPUS)
-#else
-# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
-
-#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
-#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
-
-extern int rcu_num_lvls;
-extern int rcu_num_nodes;
-
-/*
- * Dynticks per-CPU state.
- */
-struct rcu_dynticks {
-       long long dynticks_nesting; /* Track irq/process nesting level. */
-                                   /* Process level is worth LLONG_MAX/2. */
-       int dynticks_nmi_nesting;   /* Track NMI nesting level. */
-       atomic_t dynticks;          /* Even value for idle, else odd. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-       long long dynticks_idle_nesting;
-                                   /* irq/process nesting level from idle. */
-       atomic_t dynticks_idle;     /* Even value for idle, else odd. */
-                                   /*  "Idle" excludes userspace execution. */
-       unsigned long dynticks_idle_jiffies;
-                                   /* End of last non-NMI non-idle period. */
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-#ifdef CONFIG_RCU_FAST_NO_HZ
-       bool all_lazy;              /* Are all CPU's CBs lazy? */
-       unsigned long nonlazy_posted;
-                                   /* # times non-lazy CBs posted to CPU. */
-       unsigned long nonlazy_posted_snap;
-                                   /* idle-period nonlazy_posted snapshot. */
-       unsigned long last_accelerate;
-                                   /* Last jiffy CBs were accelerated. */
-       int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
-#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-};
-
-/* RCU's kthread states for tracing. */
-#define RCU_KTHREAD_STOPPED  0
-#define RCU_KTHREAD_RUNNING  1
-#define RCU_KTHREAD_WAITING  2
-#define RCU_KTHREAD_OFFCPU   3
-#define RCU_KTHREAD_YIELDING 4
-#define RCU_KTHREAD_MAX      4
-
-/*
- * Definition for node within the RCU grace-period-detection hierarchy.
- */
-struct rcu_node {
-       raw_spinlock_t lock;    /* Root rcu_node's lock protects some */
-                               /*  rcu_state fields as well as following. */
-       unsigned long gpnum;    /* Current grace period for this node. */
-                               /*  This will either be equal to or one */
-                               /*  behind the root rcu_node's gpnum. */
-       unsigned long completed; /* Last GP completed for this node. */
-                               /*  This will either be equal to or one */
-                               /*  behind the root rcu_node's gpnum. */
-       unsigned long qsmask;   /* CPUs or groups that need to switch in */
-                               /*  order for current grace period to proceed.*/
-                               /*  In leaf rcu_node, each bit corresponds to */
-                               /*  an rcu_data structure, otherwise, each */
-                               /*  bit corresponds to a child rcu_node */
-                               /*  structure. */
-       unsigned long expmask;  /* Groups that have ->blkd_tasks */
-                               /*  elements that need to drain to allow the */
-                               /*  current expedited grace period to */
-                               /*  complete (only for TREE_PREEMPT_RCU). */
-       unsigned long qsmaskinit;
-                               /* Per-GP initial value for qsmask & expmask. */
-       unsigned long grpmask;  /* Mask to apply to parent qsmask. */
-                               /*  Only one bit will be set in this mask. */
-       int     grplo;          /* lowest-numbered CPU or group here. */
-       int     grphi;          /* highest-numbered CPU or group here. */
-       u8      grpnum;         /* CPU/group number for next level up. */
-       u8      level;          /* root is at level 0. */
-       struct rcu_node *parent;
-       struct list_head blkd_tasks;
-                               /* Tasks blocked in RCU read-side critical */
-                               /*  section.  Tasks are placed at the head */
-                               /*  of this list and age towards the tail. */
-       struct list_head *gp_tasks;
-                               /* Pointer to the first task blocking the */
-                               /*  current grace period, or NULL if there */
-                               /*  is no such task. */
-       struct list_head *exp_tasks;
-                               /* Pointer to the first task blocking the */
-                               /*  current expedited grace period, or NULL */
-                               /*  if there is no such task.  If there */
-                               /*  is no current expedited grace period, */
-                               /*  then there can cannot be any such task. */
-#ifdef CONFIG_RCU_BOOST
-       struct list_head *boost_tasks;
-                               /* Pointer to first task that needs to be */
-                               /*  priority boosted, or NULL if no priority */
-                               /*  boosting is needed for this rcu_node */
-                               /*  structure.  If there are no tasks */
-                               /*  queued on this rcu_node structure that */
-                               /*  are blocking the current grace period, */
-                               /*  there can be no such task. */
-       unsigned long boost_time;
-                               /* When to start boosting (jiffies). */
-       struct task_struct *boost_kthread_task;
-                               /* kthread that takes care of priority */
-                               /*  boosting for this rcu_node structure. */
-       unsigned int boost_kthread_status;
-                               /* State of boost_kthread_task for tracing. */
-       unsigned long n_tasks_boosted;
-                               /* Total number of tasks boosted. */
-       unsigned long n_exp_boosts;
-                               /* Number of tasks boosted for expedited GP. */
-       unsigned long n_normal_boosts;
-                               /* Number of tasks boosted for normal GP. */
-       unsigned long n_balk_blkd_tasks;
-                               /* Refused to boost: no blocked tasks. */
-       unsigned long n_balk_exp_gp_tasks;
-                               /* Refused to boost: nothing blocking GP. */
-       unsigned long n_balk_boost_tasks;
-                               /* Refused to boost: already boosting. */
-       unsigned long n_balk_notblocked;
-                               /* Refused to boost: RCU RS CS still running. */
-       unsigned long n_balk_notyet;
-                               /* Refused to boost: not yet time. */
-       unsigned long n_balk_nos;
-                               /* Refused to boost: not sure why, though. */
-                               /*  This can happen due to race conditions. */
-#endif /* #ifdef CONFIG_RCU_BOOST */
-#ifdef CONFIG_RCU_NOCB_CPU
-       wait_queue_head_t nocb_gp_wq[2];
-                               /* Place for rcu_nocb_kthread() to wait GP. */
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-       int need_future_gp[2];
-                               /* Counts of upcoming no-CB GP requests. */
-       raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
-} ____cacheline_internodealigned_in_smp;
-
-/*
- * Do a full breadth-first scan of the rcu_node structures for the
- * specified rcu_state structure.
- */
-#define rcu_for_each_node_breadth_first(rsp, rnp) \
-       for ((rnp) = &(rsp)->node[0]; \
-            (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
-
-/*
- * Do a breadth-first scan of the non-leaf rcu_node structures for the
- * specified rcu_state structure.  Note that if there is a singleton
- * rcu_node tree with but one rcu_node structure, this loop is a no-op.
- */
-#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
-       for ((rnp) = &(rsp)->node[0]; \
-            (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
-
-/*
- * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
- * structure.  Note that if there is a singleton rcu_node tree with but
- * one rcu_node structure, this loop -will- visit the rcu_node structure.
- * It is still a leaf node, even if it is also the root node.
- */
-#define rcu_for_each_leaf_node(rsp, rnp) \
-       for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
-            (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
-
-/* Index values for nxttail array in struct rcu_data. */
-#define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL    2       /* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL          3
-#define RCU_NEXT_SIZE          4
-
-/* Per-CPU data for read-copy update. */
-struct rcu_data {
-       /* 1) quiescent-state and grace-period handling : */
-       unsigned long   completed;      /* Track rsp->completed gp number */
-                                       /*  in order to detect GP end. */
-       unsigned long   gpnum;          /* Highest gp number that this CPU */
-                                       /*  is aware of having started. */
-       bool            passed_quiesce; /* User-mode/idle loop etc. */
-       bool            qs_pending;     /* Core waits for quiesc state. */
-       bool            beenonline;     /* CPU online at least once. */
-       bool            preemptible;    /* Preemptible RCU? */
-       struct rcu_node *mynode;        /* This CPU's leaf of hierarchy */
-       unsigned long grpmask;          /* Mask to apply to leaf qsmask. */
-#ifdef CONFIG_RCU_CPU_STALL_INFO
-       unsigned long   ticks_this_gp;  /* The number of scheduling-clock */
-                                       /*  ticks this CPU has handled */
-                                       /*  during and after the last grace */
-                                       /* period it is aware of. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-       /* 2) batch handling */
-       /*
-        * If nxtlist is not NULL, it is partitioned as follows.
-        * Any of the partitions might be empty, in which case the
-        * pointer to that partition will be equal to the pointer for
-        * the following partition.  When the list is empty, all of
-        * the nxttail elements point to the ->nxtlist pointer itself,
-        * which in that case is NULL.
-        *
-        * [nxtlist, *nxttail[RCU_DONE_TAIL]):
-        *      Entries that batch # <= ->completed
-        *      The grace period for these entries has completed, and
-        *      the other grace-period-completed entries may be moved
-        *      here temporarily in rcu_process_callbacks().
-        * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-        *      Entries that batch # <= ->completed - 1: waiting for current GP
-        * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-        *      Entries known to have arrived before current GP ended
-        * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
-        *      Entries that might have arrived after current GP ended
-        *      Note that the value of *nxttail[RCU_NEXT_TAIL] will
-        *      always be NULL, as this is the end of the list.
-        */
-       struct rcu_head *nxtlist;
-       struct rcu_head **nxttail[RCU_NEXT_SIZE];
-       unsigned long   nxtcompleted[RCU_NEXT_SIZE];
-                                       /* grace periods for sublists. */
-       long            qlen_lazy;      /* # of lazy queued callbacks */
-       long            qlen;           /* # of queued callbacks, incl lazy */
-       long            qlen_last_fqs_check;
-                                       /* qlen at last check for QS forcing */
-       unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
-       unsigned long   n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
-       unsigned long   n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
-       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from dying CPU */
-       unsigned long   n_force_qs_snap;
-                                       /* did other CPU force QS recently? */
-       long            blimit;         /* Upper limit on a processed batch */
-
-       /* 3) dynticks interface. */
-       struct rcu_dynticks *dynticks;  /* Shared per-CPU dynticks state. */
-       int dynticks_snap;              /* Per-GP tracking for dynticks. */
-
-       /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
-       unsigned long dynticks_fqs;     /* Kicked due to dynticks idle. */
-       unsigned long offline_fqs;      /* Kicked due to being offline. */
-
-       /* 5) __rcu_pending() statistics. */
-       unsigned long n_rcu_pending;    /* rcu_pending() calls since boot. */
-       unsigned long n_rp_qs_pending;
-       unsigned long n_rp_report_qs;
-       unsigned long n_rp_cb_ready;
-       unsigned long n_rp_cpu_needs_gp;
-       unsigned long n_rp_gp_completed;
-       unsigned long n_rp_gp_started;
-       unsigned long n_rp_need_nothing;
-
-       /* 6) _rcu_barrier() and OOM callbacks. */
-       struct rcu_head barrier_head;
-#ifdef CONFIG_RCU_FAST_NO_HZ
-       struct rcu_head oom_head;
-#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-       /* 7) Callback offloading. */
-#ifdef CONFIG_RCU_NOCB_CPU
-       struct rcu_head *nocb_head;     /* CBs waiting for kthread. */
-       struct rcu_head **nocb_tail;
-       atomic_long_t nocb_q_count;     /* # CBs waiting for kthread */
-       atomic_long_t nocb_q_count_lazy; /*  (approximate). */
-       int nocb_p_count;               /* # CBs being invoked by kthread */
-       int nocb_p_count_lazy;          /*  (approximate). */
-       wait_queue_head_t nocb_wq;      /* For nocb kthreads to sleep on. */
-       struct task_struct *nocb_kthread;
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-
-       /* 8) RCU CPU stall data. */
-#ifdef CONFIG_RCU_CPU_STALL_INFO
-       unsigned int softirq_snap;      /* Snapshot of softirq activity. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-       int cpu;
-       struct rcu_state *rsp;
-};
-
-/* Values for fqs_state field in struct rcu_state. */
-#define RCU_GP_IDLE            0       /* No grace period in progress. */
-#define RCU_GP_INIT            1       /* Grace period being initialized. */
-#define RCU_SAVE_DYNTICK       2       /* Need to scan dyntick state. */
-#define RCU_FORCE_QS           3       /* Need to force quiescent state. */
-#define RCU_SIGNAL_INIT                RCU_SAVE_DYNTICK
-
-#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
-                                       /* For jiffies_till_first_fqs and */
-                                       /*  and jiffies_till_next_fqs. */
-
-#define RCU_JIFFIES_FQS_DIV    256     /* Very large systems need more */
-                                       /*  delay between bouts of */
-                                       /*  quiescent-state forcing. */
-
-#define RCU_STALL_RAT_DELAY    2       /* Allow other CPUs time to take */
-                                       /*  at least one scheduling clock */
-                                       /*  irq before ratting on them. */
-
-#define rcu_wait(cond)                                                 \
-do {                                                                   \
-       for (;;) {                                                      \
-               set_current_state(TASK_INTERRUPTIBLE);                  \
-               if (cond)                                               \
-                       break;                                          \
-               schedule();                                             \
-       }                                                               \
-       __set_current_state(TASK_RUNNING);                              \
-} while (0)
-
-/*
- * RCU global state, including node hierarchy.  This hierarchy is
- * represented in "heap" form in a dense array.  The root (first level)
- * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
- * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
- * and the third level in ->node[m+1] and following (->node[m+1] referenced
- * by ->level[2]).  The number of levels is determined by the number of
- * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy"
- * consisting of a single rcu_node.
- */
-struct rcu_state {
-       struct rcu_node node[NUM_RCU_NODES];    /* Hierarchy. */
-       struct rcu_node *level[RCU_NUM_LVLS];   /* Hierarchy levels. */
-       u32 levelcnt[MAX_RCU_LVLS + 1];         /* # nodes in each level. */
-       u8 levelspread[RCU_NUM_LVLS];           /* kids/node in each level. */
-       struct rcu_data __percpu *rda;          /* pointer of percu rcu_data. */
-       void (*call)(struct rcu_head *head,     /* call_rcu() flavor. */
-                    void (*func)(struct rcu_head *head));
-
-       /* The following fields are guarded by the root rcu_node's lock. */
-
-       u8      fqs_state ____cacheline_internodealigned_in_smp;
-                                               /* Force QS state. */
-       u8      boost;                          /* Subject to priority boost. */
-       unsigned long gpnum;                    /* Current gp number. */
-       unsigned long completed;                /* # of last completed gp. */
-       struct task_struct *gp_kthread;         /* Task for grace periods. */
-       wait_queue_head_t gp_wq;                /* Where GP task waits. */
-       int gp_flags;                           /* Commands for GP task. */
-
-       /* End of fields guarded by root rcu_node's lock. */
-
-       raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
-                                               /* Protect following fields. */
-       struct rcu_head *orphan_nxtlist;        /* Orphaned callbacks that */
-                                               /*  need a grace period. */
-       struct rcu_head **orphan_nxttail;       /* Tail of above. */
-       struct rcu_head *orphan_donelist;       /* Orphaned callbacks that */
-                                               /*  are ready to invoke. */
-       struct rcu_head **orphan_donetail;      /* Tail of above. */
-       long qlen_lazy;                         /* Number of lazy callbacks. */
-       long qlen;                              /* Total number of callbacks. */
-       /* End of fields guarded by orphan_lock. */
-
-       struct mutex onoff_mutex;               /* Coordinate hotplug & GPs. */
-
-       struct mutex barrier_mutex;             /* Guards barrier fields. */
-       atomic_t barrier_cpu_count;             /* # CPUs waiting on. */
-       struct completion barrier_completion;   /* Wake at barrier end. */
-       unsigned long n_barrier_done;           /* ++ at start and end of */
-                                               /*  _rcu_barrier(). */
-       /* End of fields guarded by barrier_mutex. */
-
-       atomic_long_t expedited_start;          /* Starting ticket. */
-       atomic_long_t expedited_done;           /* Done ticket. */
-       atomic_long_t expedited_wrap;           /* # near-wrap incidents. */
-       atomic_long_t expedited_tryfail;        /* # acquisition failures. */
-       atomic_long_t expedited_workdone1;      /* # done by others #1. */
-       atomic_long_t expedited_workdone2;      /* # done by others #2. */
-       atomic_long_t expedited_normal;         /* # fallbacks to normal. */
-       atomic_long_t expedited_stoppedcpus;    /* # successful stop_cpus. */
-       atomic_long_t expedited_done_tries;     /* # tries to update _done. */
-       atomic_long_t expedited_done_lost;      /* # times beaten to _done. */
-       atomic_long_t expedited_done_exit;      /* # times exited _done loop. */
-
-       unsigned long jiffies_force_qs;         /* Time at which to invoke */
-                                               /*  force_quiescent_state(). */
-       unsigned long n_force_qs;               /* Number of calls to */
-                                               /*  force_quiescent_state(). */
-       unsigned long n_force_qs_lh;            /* ~Number of calls leaving */
-                                               /*  due to lock unavailable. */
-       unsigned long n_force_qs_ngp;           /* Number of calls leaving */
-                                               /*  due to no GP active. */
-       unsigned long gp_start;                 /* Time at which GP started, */
-                                               /*  but in jiffies. */
-       unsigned long jiffies_stall;            /* Time at which to check */
-                                               /*  for CPU stalls. */
-       unsigned long gp_max;                   /* Maximum GP duration in */
-                                               /*  jiffies. */
-       const char *name;                       /* Name of structure. */
-       char abbr;                              /* Abbreviated name. */
-       struct list_head flavors;               /* List of RCU flavors. */
-       struct irq_work wakeup_work;            /* Postponed wakeups */
-};
-
-/* Values for rcu_state structure's gp_flags field. */
-#define RCU_GP_FLAG_INIT 0x1   /* Need grace-period initialization. */
-#define RCU_GP_FLAG_FQS  0x2   /* Need grace-period quiescent-state forcing. */
-
-extern struct list_head rcu_struct_flavors;
-
-/* Sequence through rcu_state structures for each RCU flavor. */
-#define for_each_rcu_flavor(rsp) \
-       list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
-
-/* Return values for rcu_preempt_offline_tasks(). */
-
-#define RCU_OFL_TASKS_NORM_GP  0x1             /* Tasks blocking normal */
-                                               /*  GP were moved to root. */
-#define RCU_OFL_TASKS_EXP_GP   0x2             /* Tasks blocking expedited */
-                                               /*  GP were moved to root. */
-
-/*
- * RCU implementation internal declarations:
- */
-extern struct rcu_state rcu_sched_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
-
-extern struct rcu_state rcu_bh_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
-
-#ifdef CONFIG_TREE_PREEMPT_RCU
-extern struct rcu_state rcu_preempt_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-#ifdef CONFIG_RCU_BOOST
-DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
-DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
-DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-DECLARE_PER_CPU(char, rcu_cpu_has_work);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-#ifndef RCU_TREE_NONCORE
-
-/* Forward declarations for rcutree_plugin.h */
-static void rcu_bootup_announce(void);
-long rcu_batches_completed(void);
-static void rcu_preempt_note_context_switch(int cpu);
-static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
-#ifdef CONFIG_HOTPLUG_CPU
-static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
-                                     unsigned long flags);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_print_detail_task_stall(struct rcu_state *rsp);
-static int rcu_print_task_stall(struct rcu_node *rnp);
-static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
-#ifdef CONFIG_HOTPLUG_CPU
-static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
-                                    struct rcu_node *rnp,
-                                    struct rcu_data *rdp);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_preempt_check_callbacks(int cpu);
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake);
-#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
-static void __init __rcu_init_preempt(void);
-static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
-static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
-static bool rcu_is_callbacks_kthread(void);
-#ifdef CONFIG_RCU_BOOST
-static void rcu_preempt_do_callbacks(void);
-static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
-                                                struct rcu_node *rnp);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-static void rcu_prepare_kthreads(int cpu);
-static void rcu_cleanup_after_idle(int cpu);
-static void rcu_prepare_for_idle(int cpu);
-static void rcu_idle_count_callbacks_posted(void);
-static void print_cpu_stall_info_begin(void);
-static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
-static void print_cpu_stall_info_end(void);
-static void zero_cpu_stall_ticks(struct rcu_data *rdp);
-static void increment_cpu_stall_ticks(void);
-static int rcu_nocb_needs_gp(struct rcu_state *rsp);
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
-static void rcu_init_one_nocb(struct rcu_node *rnp);
-static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
-                           bool lazy);
-static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
-                                     struct rcu_data *rdp);
-static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
-static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
-static void rcu_kick_nohz_cpu(int cpu);
-static bool init_nocb_callback_list(struct rcu_data *rdp);
-static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
-static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-                                 unsigned long *maxj);
-static bool is_sysidle_rcu_state(struct rcu_state *rsp);
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-                                 unsigned long maxj);
-static void rcu_bind_gp_kthread(void);
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
-
-#endif /* #ifndef RCU_TREE_NONCORE */
-
-#ifdef CONFIG_RCU_TRACE
-#ifdef CONFIG_RCU_NOCB_CPU
-/* Sum up queue lengths for tracing. */
-static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
-{
-       *ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
-       *qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
-}
-#else /* #ifdef CONFIG_RCU_NOCB_CPU */
-static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
-{
-       *ql = 0;
-       *qll = 0;
-}
-#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
-#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
deleted file mode 100644 (file)
index 130c97b..0000000
+++ /dev/null
@@ -1,2797 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion (tree-based version)
- * Internal non-public definitions that provide either classic
- * or preemptible semantics.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright Red Hat, 2009
- * Copyright IBM Corporation, 2009
- *
- * Author: Ingo Molnar <mingo@elte.hu>
- *        Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/smpboot.h>
-#include "time/tick-internal.h"
-
-#define RCU_KTHREAD_PRIO 1
-
-#ifdef CONFIG_RCU_BOOST
-#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
-#else
-#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
-#endif
-
-#ifdef CONFIG_RCU_NOCB_CPU
-static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
-static bool have_rcu_nocb_mask;            /* Was rcu_nocb_mask allocated? */
-static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
-static char __initdata nocb_buf[NR_CPUS * 5];
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-
-/*
- * Check the RCU kernel configuration parameters and print informative
- * messages about anything out of the ordinary.  If you like #ifdef, you
- * will love this function.
- */
-static void __init rcu_bootup_announce_oddness(void)
-{
-#ifdef CONFIG_RCU_TRACE
-       pr_info("\tRCU debugfs-based tracing is enabled.\n");
-#endif
-#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
-       pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
-              CONFIG_RCU_FANOUT);
-#endif
-#ifdef CONFIG_RCU_FANOUT_EXACT
-       pr_info("\tHierarchical RCU autobalancing is disabled.\n");
-#endif
-#ifdef CONFIG_RCU_FAST_NO_HZ
-       pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
-#endif
-#ifdef CONFIG_PROVE_RCU
-       pr_info("\tRCU lockdep checking is enabled.\n");
-#endif
-#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
-       pr_info("\tRCU torture testing starts during boot.\n");
-#endif
-#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
-       pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");
-#endif
-#if defined(CONFIG_RCU_CPU_STALL_INFO)
-       pr_info("\tAdditional per-CPU info printed with stalls.\n");
-#endif
-#if NUM_RCU_LVL_4 != 0
-       pr_info("\tFour-level hierarchy is enabled.\n");
-#endif
-       if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
-               pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
-       if (nr_cpu_ids != NR_CPUS)
-               pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_NOCB_CPU
-#ifndef CONFIG_RCU_NOCB_CPU_NONE
-       if (!have_rcu_nocb_mask) {
-               zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
-               have_rcu_nocb_mask = true;
-       }
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
-       pr_info("\tOffload RCU callbacks from CPU 0\n");
-       cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
-       pr_info("\tOffload RCU callbacks from all CPUs\n");
-       cpumask_setall(rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
-       if (have_rcu_nocb_mask) {
-               cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
-               pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
-               if (rcu_nocb_poll)
-                       pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
-       }
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-}
-
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-static struct rcu_state *rcu_state = &rcu_preempt_state;
-
-static int rcu_preempted_readers_exp(struct rcu_node *rnp);
-
-/*
- * Tell them what RCU they are running.
- */
-static void __init rcu_bootup_announce(void)
-{
-       pr_info("Preemptible hierarchical RCU implementation.\n");
-       rcu_bootup_announce_oddness();
-}
-
-/*
- * Return the number of RCU-preempt batches processed thus far
- * for debug and statistics.
- */
-long rcu_batches_completed_preempt(void)
-{
-       return rcu_preempt_state.completed;
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
-
-/*
- * Return the number of RCU batches processed thus far for debug & stats.
- */
-long rcu_batches_completed(void)
-{
-       return rcu_batches_completed_preempt();
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed);
-
-/*
- * Force a quiescent state for preemptible RCU.
- */
-void rcu_force_quiescent_state(void)
-{
-       force_quiescent_state(&rcu_preempt_state);
-}
-EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-
-/*
- * Record a preemptible-RCU quiescent state for the specified CPU.  Note
- * that this just means that the task currently running on the CPU is
- * not in a quiescent state.  There might be any number of tasks blocked
- * while in an RCU read-side critical section.
- *
- * Unlike the other rcu_*_qs() functions, callers to this function
- * must disable irqs in order to protect the assignment to
- * ->rcu_read_unlock_special.
- */
-static void rcu_preempt_qs(int cpu)
-{
-       struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
-       current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
-}
-
-/*
- * We have entered the scheduler, and the current task might soon be
- * context-switched away from.  If this task is in an RCU read-side
- * critical section, we will no longer be able to rely on the CPU to
- * record that fact, so we enqueue the task on the blkd_tasks list.
- * The task will dequeue itself when it exits the outermost enclosing
- * RCU read-side critical section.  Therefore, the current grace period
- * cannot be permitted to complete until the blkd_tasks list entries
- * predating the current grace period drain, in other words, until
- * rnp->gp_tasks becomes NULL.
- *
- * Caller must disable preemption.
- */
-static void rcu_preempt_note_context_switch(int cpu)
-{
-       struct task_struct *t = current;
-       unsigned long flags;
-       struct rcu_data *rdp;
-       struct rcu_node *rnp;
-
-       if (t->rcu_read_lock_nesting > 0 &&
-           (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
-
-               /* Possibly blocking in an RCU read-side critical section. */
-               rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
-               rnp = rdp->mynode;
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
-               t->rcu_blocked_node = rnp;
-
-               /*
-                * If this CPU has already checked in, then this task
-                * will hold up the next grace period rather than the
-                * current grace period.  Queue the task accordingly.
-                * If the task is queued for the current grace period
-                * (i.e., this CPU has not yet passed through a quiescent
-                * state for the current grace period), then as long
-                * as that task remains queued, the current grace period
-                * cannot end.  Note that there is some uncertainty as
-                * to exactly when the current grace period started.
-                * We take a conservative approach, which can result
-                * in unnecessarily waiting on tasks that started very
-                * slightly after the current grace period began.  C'est
-                * la vie!!!
-                *
-                * But first, note that the current CPU must still be
-                * on line!
-                */
-               WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
-               WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
-               if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
-                       list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
-                       rnp->gp_tasks = &t->rcu_node_entry;
-#ifdef CONFIG_RCU_BOOST
-                       if (rnp->boost_tasks != NULL)
-                               rnp->boost_tasks = rnp->gp_tasks;
-#endif /* #ifdef CONFIG_RCU_BOOST */
-               } else {
-                       list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
-                       if (rnp->qsmask & rdp->grpmask)
-                               rnp->gp_tasks = &t->rcu_node_entry;
-               }
-               trace_rcu_preempt_task(rdp->rsp->name,
-                                      t->pid,
-                                      (rnp->qsmask & rdp->grpmask)
-                                      ? rnp->gpnum
-                                      : rnp->gpnum + 1);
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       } else if (t->rcu_read_lock_nesting < 0 &&
-                  t->rcu_read_unlock_special) {
-
-               /*
-                * Complete exit from RCU read-side critical section on
-                * behalf of preempted instance of __rcu_read_unlock().
-                */
-               rcu_read_unlock_special(t);
-       }
-
-       /*
-        * Either we were not in an RCU read-side critical section to
-        * begin with, or we have now recorded that critical section
-        * globally.  Either way, we can now note a quiescent state
-        * for this CPU.  Again, if we were in an RCU read-side critical
-        * section, and if that critical section was blocking the current
-        * grace period, then the fact that the task has been enqueued
-        * means that we continue to block the current grace period.
-        */
-       local_irq_save(flags);
-       rcu_preempt_qs(cpu);
-       local_irq_restore(flags);
-}
-
-/*
- * Check for preempted RCU readers blocking the current grace period
- * for the specified rcu_node structure.  If the caller needs a reliable
- * answer, it must hold the rcu_node's ->lock.
- */
-static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
-{
-       return rnp->gp_tasks != NULL;
-}
-
-/*
- * Record a quiescent state for all tasks that were previously queued
- * on the specified rcu_node structure and that were blocking the current
- * RCU grace period.  The caller must hold the specified rnp->lock with
- * irqs disabled, and this lock is released upon return, but irqs remain
- * disabled.
- */
-static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
-       __releases(rnp->lock)
-{
-       unsigned long mask;
-       struct rcu_node *rnp_p;
-
-       if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;  /* Still need more quiescent states! */
-       }
-
-       rnp_p = rnp->parent;
-       if (rnp_p == NULL) {
-               /*
-                * Either there is only one rcu_node in the tree,
-                * or tasks were kicked up to root rcu_node due to
-                * CPUs going offline.
-                */
-               rcu_report_qs_rsp(&rcu_preempt_state, flags);
-               return;
-       }
-
-       /* Report up the rest of the hierarchy. */
-       mask = rnp->grpmask;
-       raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
-       raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
-       rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
-}
-
-/*
- * Advance a ->blkd_tasks-list pointer to the next entry, instead
- * returning NULL if at the end of the list.
- */
-static struct list_head *rcu_next_node_entry(struct task_struct *t,
-                                            struct rcu_node *rnp)
-{
-       struct list_head *np;
-
-       np = t->rcu_node_entry.next;
-       if (np == &rnp->blkd_tasks)
-               np = NULL;
-       return np;
-}
-
-/*
- * Handle special cases during rcu_read_unlock(), such as needing to
- * notify RCU core processing or task having blocked during the RCU
- * read-side critical section.
- */
-void rcu_read_unlock_special(struct task_struct *t)
-{
-       int empty;
-       int empty_exp;
-       int empty_exp_now;
-       unsigned long flags;
-       struct list_head *np;
-#ifdef CONFIG_RCU_BOOST
-       struct rt_mutex *rbmp = NULL;
-#endif /* #ifdef CONFIG_RCU_BOOST */
-       struct rcu_node *rnp;
-       int special;
-
-       /* NMI handlers cannot block and cannot safely manipulate state. */
-       if (in_nmi())
-               return;
-
-       local_irq_save(flags);
-
-       /*
-        * If RCU core is waiting for this CPU to exit critical section,
-        * let it know that we have done so.
-        */
-       special = t->rcu_read_unlock_special;
-       if (special & RCU_READ_UNLOCK_NEED_QS) {
-               rcu_preempt_qs(smp_processor_id());
-       }
-
-       /* Hardware IRQ handlers cannot block. */
-       if (in_irq() || in_serving_softirq()) {
-               local_irq_restore(flags);
-               return;
-       }
-
-       /* Clean up if blocked during RCU read-side critical section. */
-       if (special & RCU_READ_UNLOCK_BLOCKED) {
-               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
-
-               /*
-                * Remove this task from the list it blocked on.  The
-                * task can migrate while we acquire the lock, but at
-                * most one time.  So at most two passes through loop.
-                */
-               for (;;) {
-                       rnp = t->rcu_blocked_node;
-                       raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
-                       if (rnp == t->rcu_blocked_node)
-                               break;
-                       raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
-               }
-               empty = !rcu_preempt_blocked_readers_cgp(rnp);
-               empty_exp = !rcu_preempted_readers_exp(rnp);
-               smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
-               np = rcu_next_node_entry(t, rnp);
-               list_del_init(&t->rcu_node_entry);
-               t->rcu_blocked_node = NULL;
-               trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
-                                               rnp->gpnum, t->pid);
-               if (&t->rcu_node_entry == rnp->gp_tasks)
-                       rnp->gp_tasks = np;
-               if (&t->rcu_node_entry == rnp->exp_tasks)
-                       rnp->exp_tasks = np;
-#ifdef CONFIG_RCU_BOOST
-               if (&t->rcu_node_entry == rnp->boost_tasks)
-                       rnp->boost_tasks = np;
-               /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
-               if (t->rcu_boost_mutex) {
-                       rbmp = t->rcu_boost_mutex;
-                       t->rcu_boost_mutex = NULL;
-               }
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-               /*
-                * If this was the last task on the current list, and if
-                * we aren't waiting on any CPUs, report the quiescent state.
-                * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
-                * so we must take a snapshot of the expedited state.
-                */
-               empty_exp_now = !rcu_preempted_readers_exp(rnp);
-               if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
-                       trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
-                                                        rnp->gpnum,
-                                                        0, rnp->qsmask,
-                                                        rnp->level,
-                                                        rnp->grplo,
-                                                        rnp->grphi,
-                                                        !!rnp->gp_tasks);
-                       rcu_report_unblock_qs_rnp(rnp, flags);
-               } else {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               }
-
-#ifdef CONFIG_RCU_BOOST
-               /* Unboost if we were boosted. */
-               if (rbmp)
-                       rt_mutex_unlock(rbmp);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-               /*
-                * If this was the last task on the expedited lists,
-                * then we need to report up the rcu_node hierarchy.
-                */
-               if (!empty_exp && empty_exp_now)
-                       rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
-       } else {
-               local_irq_restore(flags);
-       }
-}
-
-#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
-
-/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period on the specified rcu_node structure.
- */
-static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
-{
-       unsigned long flags;
-       struct task_struct *t;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       if (!rcu_preempt_blocked_readers_cgp(rnp)) {
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-       t = list_entry(rnp->gp_tasks,
-                      struct task_struct, rcu_node_entry);
-       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
-               sched_show_task(t);
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period.
- */
-static void rcu_print_detail_task_stall(struct rcu_state *rsp)
-{
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       rcu_print_detail_task_stall_rnp(rnp);
-       rcu_for_each_leaf_node(rsp, rnp)
-               rcu_print_detail_task_stall_rnp(rnp);
-}
-
-#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
-
-static void rcu_print_detail_task_stall(struct rcu_state *rsp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
-
-#ifdef CONFIG_RCU_CPU_STALL_INFO
-
-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
-{
-       pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
-              rnp->level, rnp->grplo, rnp->grphi);
-}
-
-static void rcu_print_task_stall_end(void)
-{
-       pr_cont("\n");
-}
-
-#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
-{
-}
-
-static void rcu_print_task_stall_end(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-/*
- * Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each.
- */
-static int rcu_print_task_stall(struct rcu_node *rnp)
-{
-       struct task_struct *t;
-       int ndetected = 0;
-
-       if (!rcu_preempt_blocked_readers_cgp(rnp))
-               return 0;
-       rcu_print_task_stall_begin(rnp);
-       t = list_entry(rnp->gp_tasks,
-                      struct task_struct, rcu_node_entry);
-       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
-               pr_cont(" P%d", t->pid);
-               ndetected++;
-       }
-       rcu_print_task_stall_end();
-       return ndetected;
-}
-
-/*
- * Check that the list of blocked tasks for the newly completed grace
- * period is in fact empty.  It is a serious bug to complete a grace
- * period that still has RCU readers blocked!  This function must be
- * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
- * must be held by the caller.
- *
- * Also, if there are blocked tasks on the list, they automatically
- * block the newly created grace period, so set up ->gp_tasks accordingly.
- */
-static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
-{
-       WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
-       if (!list_empty(&rnp->blkd_tasks))
-               rnp->gp_tasks = rnp->blkd_tasks.next;
-       WARN_ON_ONCE(rnp->qsmask);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Handle tasklist migration for case in which all CPUs covered by the
- * specified rcu_node have gone offline.  Move them up to the root
- * rcu_node.  The reason for not just moving them to the immediate
- * parent is to remove the need for rcu_read_unlock_special() to
- * make more than two attempts to acquire the target rcu_node's lock.
- * Returns true if there were tasks blocking the current RCU grace
- * period.
- *
- * Returns 1 if there was previously a task blocking the current grace
- * period on the specified rcu_node structure.
- *
- * The caller must hold rnp->lock with irqs disabled.
- */
-static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
-                                    struct rcu_node *rnp,
-                                    struct rcu_data *rdp)
-{
-       struct list_head *lp;
-       struct list_head *lp_root;
-       int retval = 0;
-       struct rcu_node *rnp_root = rcu_get_root(rsp);
-       struct task_struct *t;
-
-       if (rnp == rnp_root) {
-               WARN_ONCE(1, "Last CPU thought to be offlined?");
-               return 0;  /* Shouldn't happen: at least one CPU online. */
-       }
-
-       /* If we are on an internal node, complain bitterly. */
-       WARN_ON_ONCE(rnp != rdp->mynode);
-
-       /*
-        * Move tasks up to root rcu_node.  Don't try to get fancy for
-        * this corner-case operation -- just put this node's tasks
-        * at the head of the root node's list, and update the root node's
-        * ->gp_tasks and ->exp_tasks pointers to those of this node's,
-        * if non-NULL.  This might result in waiting for more tasks than
-        * absolutely necessary, but this is a good performance/complexity
-        * tradeoff.
-        */
-       if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
-               retval |= RCU_OFL_TASKS_NORM_GP;
-       if (rcu_preempted_readers_exp(rnp))
-               retval |= RCU_OFL_TASKS_EXP_GP;
-       lp = &rnp->blkd_tasks;
-       lp_root = &rnp_root->blkd_tasks;
-       while (!list_empty(lp)) {
-               t = list_entry(lp->next, typeof(*t), rcu_node_entry);
-               raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
-               list_del(&t->rcu_node_entry);
-               t->rcu_blocked_node = rnp_root;
-               list_add(&t->rcu_node_entry, lp_root);
-               if (&t->rcu_node_entry == rnp->gp_tasks)
-                       rnp_root->gp_tasks = rnp->gp_tasks;
-               if (&t->rcu_node_entry == rnp->exp_tasks)
-                       rnp_root->exp_tasks = rnp->exp_tasks;
-#ifdef CONFIG_RCU_BOOST
-               if (&t->rcu_node_entry == rnp->boost_tasks)
-                       rnp_root->boost_tasks = rnp->boost_tasks;
-#endif /* #ifdef CONFIG_RCU_BOOST */
-               raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
-       }
-
-       rnp->gp_tasks = NULL;
-       rnp->exp_tasks = NULL;
-#ifdef CONFIG_RCU_BOOST
-       rnp->boost_tasks = NULL;
-       /*
-        * In case root is being boosted and leaf was not.  Make sure
-        * that we boost the tasks blocking the current grace period
-        * in this case.
-        */
-       raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
-       if (rnp_root->boost_tasks != NULL &&
-           rnp_root->boost_tasks != rnp_root->gp_tasks &&
-           rnp_root->boost_tasks != rnp_root->exp_tasks)
-               rnp_root->boost_tasks = rnp_root->gp_tasks;
-       raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-       return retval;
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
-/*
- * Check for a quiescent state from the current CPU.  When a task blocks,
- * the task is recorded in the corresponding CPU's rcu_node structure,
- * which is checked elsewhere.
- *
- * Caller must disable hard irqs.
- */
-static void rcu_preempt_check_callbacks(int cpu)
-{
-       struct task_struct *t = current;
-
-       if (t->rcu_read_lock_nesting == 0) {
-               rcu_preempt_qs(cpu);
-               return;
-       }
-       if (t->rcu_read_lock_nesting > 0 &&
-           per_cpu(rcu_preempt_data, cpu).qs_pending)
-               t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
-}
-
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_preempt_do_callbacks(void)
-{
-       rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Queue a preemptible-RCU callback for invocation after a grace period.
- */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_preempt_state, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
-/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks.  Until then, this
- * function may only be called from __kfree_rcu().
- */
-void kfree_call_rcu(struct rcu_head *head,
-                   void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_preempt_state, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
-
-/**
- * synchronize_rcu - wait until a grace period has elapsed.
- *
- * Control will return to the caller some time after a full grace
- * period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed.  Note, however, that
- * upon return from synchronize_rcu(), the caller might well be executing
- * concurrently with new RCU read-side critical sections that began while
- * synchronize_rcu() was waiting.  RCU read-side critical sections are
- * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
- *
- * See the description of synchronize_sched() for more detailed information
- * on memory ordering guarantees.
- */
-void synchronize_rcu(void)
-{
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_rcu() in RCU read-side critical section");
-       if (!rcu_scheduler_active)
-               return;
-       if (rcu_expedited)
-               synchronize_rcu_expedited();
-       else
-               wait_rcu_gp(call_rcu);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu);
-
-static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
-static unsigned long sync_rcu_preempt_exp_count;
-static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
-
-/*
- * Return non-zero if there are any tasks in RCU read-side critical
- * sections blocking the current preemptible-RCU expedited grace period.
- * If there is no preemptible-RCU expedited grace period currently in
- * progress, returns zero unconditionally.
- */
-static int rcu_preempted_readers_exp(struct rcu_node *rnp)
-{
-       return rnp->exp_tasks != NULL;
-}
-
-/*
- * return non-zero if there is no RCU expedited grace period in progress
- * for the specified rcu_node structure, in other words, if all CPUs and
- * tasks covered by the specified rcu_node structure have done their bit
- * for the current expedited grace period.  Works only for preemptible
- * RCU -- other RCU implementation use other means.
- *
- * Caller must hold sync_rcu_preempt_exp_mutex.
- */
-static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
-{
-       return !rcu_preempted_readers_exp(rnp) &&
-              ACCESS_ONCE(rnp->expmask) == 0;
-}
-
-/*
- * Report the exit from RCU read-side critical section for the last task
- * that queued itself during or before the current expedited preemptible-RCU
- * grace period.  This event is reported either to the rcu_node structure on
- * which the task was queued or to one of that rcu_node structure's ancestors,
- * recursively up the tree.  (Calm down, calm down, we do the recursion
- * iteratively!)
- *
- * Most callers will set the "wake" flag, but the task initiating the
- * expedited grace period need not wake itself.
- *
- * Caller must hold sync_rcu_preempt_exp_mutex.
- */
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake)
-{
-       unsigned long flags;
-       unsigned long mask;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       for (;;) {
-               if (!sync_rcu_preempt_exp_done(rnp)) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       break;
-               }
-               if (rnp->parent == NULL) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       if (wake)
-                               wake_up(&sync_rcu_preempt_exp_wq);
-                       break;
-               }
-               mask = rnp->grpmask;
-               raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
-               rnp = rnp->parent;
-               raw_spin_lock(&rnp->lock); /* irqs already disabled */
-               rnp->expmask &= ~mask;
-       }
-}
-
-/*
- * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure.  If there are no such
- * tasks, report it up the rcu_node hierarchy.
- *
- * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
- * CPU hotplug operations.
- */
-static void
-sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
-{
-       unsigned long flags;
-       int must_wait = 0;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       if (list_empty(&rnp->blkd_tasks)) {
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       } else {
-               rnp->exp_tasks = rnp->blkd_tasks.next;
-               rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
-               must_wait = 1;
-       }
-       if (!must_wait)
-               rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
-}
-
-/**
- * synchronize_rcu_expedited - Brute-force RCU grace period
- *
- * Wait for an RCU-preempt grace period, but expedite it.  The basic
- * idea is to invoke synchronize_sched_expedited() to push all the tasks to
- * the ->blkd_tasks lists and wait for this list to drain.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.
- * In fact, if you are using synchronize_rcu_expedited() in a loop,
- * please restructure your code to batch your updates, and then Use a
- * single synchronize_rcu() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- */
-void synchronize_rcu_expedited(void)
-{
-       unsigned long flags;
-       struct rcu_node *rnp;
-       struct rcu_state *rsp = &rcu_preempt_state;
-       unsigned long snap;
-       int trycount = 0;
-
-       smp_mb(); /* Caller's modifications seen first by other CPUs. */
-       snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
-       smp_mb(); /* Above access cannot bleed into critical section. */
-
-       /*
-        * Block CPU-hotplug operations.  This means that any CPU-hotplug
-        * operation that finds an rcu_node structure with tasks in the
-        * process of being boosted will know that all tasks blocking
-        * this expedited grace period will already be in the process of
-        * being boosted.  This simplifies the process of moving tasks
-        * from leaf to root rcu_node structures.
-        */
-       get_online_cpus();
-
-       /*
-        * Acquire lock, falling back to synchronize_rcu() if too many
-        * lock-acquisition failures.  Of course, if someone does the
-        * expedited grace period for us, just leave.
-        */
-       while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
-               if (ULONG_CMP_LT(snap,
-                   ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
-                       put_online_cpus();
-                       goto mb_ret; /* Others did our work for us. */
-               }
-               if (trycount++ < 10) {
-                       udelay(trycount * num_online_cpus());
-               } else {
-                       put_online_cpus();
-                       wait_rcu_gp(call_rcu);
-                       return;
-               }
-       }
-       if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
-               put_online_cpus();
-               goto unlock_mb_ret; /* Others did our work for us. */
-       }
-
-       /* force all RCU readers onto ->blkd_tasks lists. */
-       synchronize_sched_expedited();
-
-       /* Initialize ->expmask for all non-leaf rcu_node structures. */
-       rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               rnp->expmask = rnp->qsmaskinit;
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       }
-
-       /* Snapshot current state of ->blkd_tasks lists. */
-       rcu_for_each_leaf_node(rsp, rnp)
-               sync_rcu_preempt_exp_init(rsp, rnp);
-       if (NUM_RCU_NODES > 1)
-               sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
-
-       put_online_cpus();
-
-       /* Wait for snapshotted ->blkd_tasks lists to drain. */
-       rnp = rcu_get_root(rsp);
-       wait_event(sync_rcu_preempt_exp_wq,
-                  sync_rcu_preempt_exp_done(rnp));
-
-       /* Clean up and exit. */
-       smp_mb(); /* ensure expedited GP seen before counter increment. */
-       ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
-unlock_mb_ret:
-       mutex_unlock(&sync_rcu_preempt_exp_mutex);
-mb_ret:
-       smp_mb(); /* ensure subsequent action seen after grace period. */
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
-
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- *
- * Note that this primitive does not necessarily wait for an RCU grace period
- * to complete.  For example, if there are no RCU callbacks queued anywhere
- * in the system, then rcu_barrier() is within its rights to return
- * immediately, without waiting for anything, much less an RCU grace period.
- */
-void rcu_barrier(void)
-{
-       _rcu_barrier(&rcu_preempt_state);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/*
- * Initialize preemptible RCU's state structures.
- */
-static void __init __rcu_init_preempt(void)
-{
-       rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
-}
-
-/*
- * Check for a task exiting while in a preemptible-RCU read-side
- * critical section, clean up if so.  No need to issue warnings,
- * as debug_check_no_locks_held() already does this if lockdep
- * is enabled.
- */
-void exit_rcu(void)
-{
-       struct task_struct *t = current;
-
-       if (likely(list_empty(&current->rcu_node_entry)))
-               return;
-       t->rcu_read_lock_nesting = 1;
-       barrier();
-       t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
-       __rcu_read_unlock();
-}
-
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-static struct rcu_state *rcu_state = &rcu_sched_state;
-
-/*
- * Tell them what RCU they are running.
- */
-static void __init rcu_bootup_announce(void)
-{
-       pr_info("Hierarchical RCU implementation.\n");
-       rcu_bootup_announce_oddness();
-}
-
-/*
- * Return the number of RCU batches processed thus far for debug & stats.
- */
-long rcu_batches_completed(void)
-{
-       return rcu_batches_completed_sched();
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed);
-
-/*
- * Force a quiescent state for RCU, which, because there is no preemptible
- * RCU, becomes the same as rcu-sched.
- */
-void rcu_force_quiescent_state(void)
-{
-       rcu_sched_force_quiescent_state();
-}
-EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * CPUs being in quiescent states.
- */
-static void rcu_preempt_note_context_switch(int cpu)
-{
-}
-
-/*
- * Because preemptible RCU does not exist, there are never any preempted
- * RCU readers.
- */
-static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
-{
-       return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Because preemptible RCU does not exist, no quieting of tasks. */
-static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
-{
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections.
- */
-static void rcu_print_detail_task_stall(struct rcu_state *rsp)
-{
-}
-
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections.
- */
-static int rcu_print_task_stall(struct rcu_node *rnp)
-{
-       return 0;
-}
-
-/*
- * Because there is no preemptible RCU, there can be no readers blocked,
- * so there is no need to check for blocked tasks.  So check only for
- * bogus qsmask values.
- */
-static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
-{
-       WARN_ON_ONCE(rnp->qsmask);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Because preemptible RCU does not exist, it never needs to migrate
- * tasks that were blocked within RCU read-side critical sections, and
- * such non-existent tasks cannot possibly have been blocking the current
- * grace period.
- */
-static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
-                                    struct rcu_node *rnp,
-                                    struct rcu_data *rdp)
-{
-       return 0;
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
-/*
- * Because preemptible RCU does not exist, it never has any callbacks
- * to check.
- */
-static void rcu_preempt_check_callbacks(int cpu)
-{
-}
-
-/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks.  Until then, this
- * function may only be called from __kfree_rcu().
- *
- * Because there is no preemptible RCU, we use RCU-sched instead.
- */
-void kfree_call_rcu(struct rcu_head *head,
-                   void (*func)(struct rcu_head *rcu))
-{
-       __call_rcu(head, func, &rcu_sched_state, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
-
-/*
- * Wait for an rcu-preempt grace period, but make it happen quickly.
- * But because preemptible RCU does not exist, map to rcu-sched.
- */
-void synchronize_rcu_expedited(void)
-{
-       synchronize_sched_expedited();
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Because preemptible RCU does not exist, there is never any need to
- * report on tasks preempted in RCU read-side critical sections during
- * expedited RCU grace periods.
- */
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake)
-{
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
-/*
- * Because preemptible RCU does not exist, rcu_barrier() is just
- * another name for rcu_barrier_sched().
- */
-void rcu_barrier(void)
-{
-       rcu_barrier_sched();
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/*
- * Because preemptible RCU does not exist, it need not be initialized.
- */
-static void __init __rcu_init_preempt(void)
-{
-}
-
-/*
- * Because preemptible RCU does not exist, tasks cannot possibly exit
- * while in preemptible RCU read-side critical sections.
- */
-void exit_rcu(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-#ifdef CONFIG_RCU_BOOST
-
-#include "rtmutex_common.h"
-
-#ifdef CONFIG_RCU_TRACE
-
-static void rcu_initiate_boost_trace(struct rcu_node *rnp)
-{
-       if (list_empty(&rnp->blkd_tasks))
-               rnp->n_balk_blkd_tasks++;
-       else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
-               rnp->n_balk_exp_gp_tasks++;
-       else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
-               rnp->n_balk_boost_tasks++;
-       else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
-               rnp->n_balk_notblocked++;
-       else if (rnp->gp_tasks != NULL &&
-                ULONG_CMP_LT(jiffies, rnp->boost_time))
-               rnp->n_balk_notyet++;
-       else
-               rnp->n_balk_nos++;
-}
-
-#else /* #ifdef CONFIG_RCU_TRACE */
-
-static void rcu_initiate_boost_trace(struct rcu_node *rnp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
-static void rcu_wake_cond(struct task_struct *t, int status)
-{
-       /*
-        * If the thread is yielding, only wake it when this
-        * is invoked from idle
-        */
-       if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
-               wake_up_process(t);
-}
-
-/*
- * Carry out RCU priority boosting on the task indicated by ->exp_tasks
- * or ->boost_tasks, advancing the pointer to the next task in the
- * ->blkd_tasks list.
- *
- * Note that irqs must be enabled: boosting the task can block.
- * Returns 1 if there are more tasks needing to be boosted.
- */
-static int rcu_boost(struct rcu_node *rnp)
-{
-       unsigned long flags;
-       struct rt_mutex mtx;
-       struct task_struct *t;
-       struct list_head *tb;
-
-       if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
-               return 0;  /* Nothing left to boost. */
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-
-       /*
-        * Recheck under the lock: all tasks in need of boosting
-        * might exit their RCU read-side critical sections on their own.
-        */
-       if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return 0;
-       }
-
-       /*
-        * Preferentially boost tasks blocking expedited grace periods.
-        * This cannot starve the normal grace periods because a second
-        * expedited grace period must boost all blocked tasks, including
-        * those blocking the pre-existing normal grace period.
-        */
-       if (rnp->exp_tasks != NULL) {
-               tb = rnp->exp_tasks;
-               rnp->n_exp_boosts++;
-       } else {
-               tb = rnp->boost_tasks;
-               rnp->n_normal_boosts++;
-       }
-       rnp->n_tasks_boosted++;
-
-       /*
-        * We boost task t by manufacturing an rt_mutex that appears to
-        * be held by task t.  We leave a pointer to that rt_mutex where
-        * task t can find it, and task t will release the mutex when it
-        * exits its outermost RCU read-side critical section.  Then
-        * simply acquiring this artificial rt_mutex will boost task
-        * t's priority.  (Thanks to tglx for suggesting this approach!)
-        *
-        * Note that task t must acquire rnp->lock to remove itself from
-        * the ->blkd_tasks list, which it will do from exit() if from
-        * nowhere else.  We therefore are guaranteed that task t will
-        * stay around at least until we drop rnp->lock.  Note that
-        * rnp->lock also resolves races between our priority boosting
-        * and task t's exiting its outermost RCU read-side critical
-        * section.
-        */
-       t = container_of(tb, struct task_struct, rcu_node_entry);
-       rt_mutex_init_proxy_locked(&mtx, t);
-       t->rcu_boost_mutex = &mtx;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
-       rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
-
-       return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
-              ACCESS_ONCE(rnp->boost_tasks) != NULL;
-}
-
-/*
- * Priority-boosting kthread.  One per leaf rcu_node and one for the
- * root rcu_node.
- */
-static int rcu_boost_kthread(void *arg)
-{
-       struct rcu_node *rnp = (struct rcu_node *)arg;
-       int spincnt = 0;
-       int more2boost;
-
-       trace_rcu_utilization(TPS("Start boost kthread@init"));
-       for (;;) {
-               rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
-               trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
-               rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
-               trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
-               rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
-               more2boost = rcu_boost(rnp);
-               if (more2boost)
-                       spincnt++;
-               else
-                       spincnt = 0;
-               if (spincnt > 10) {
-                       rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
-                       trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
-                       schedule_timeout_interruptible(2);
-                       trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
-                       spincnt = 0;
-               }
-       }
-       /* NOTREACHED */
-       trace_rcu_utilization(TPS("End boost kthread@notreached"));
-       return 0;
-}
-
-/*
- * Check to see if it is time to start boosting RCU readers that are
- * blocking the current grace period, and, if so, tell the per-rcu_node
- * kthread to start boosting them.  If there is an expedited grace
- * period in progress, it is always time to boost.
- *
- * The caller must hold rnp->lock, which this function releases.
- * The ->boost_kthread_task is immortal, so we don't need to worry
- * about it going away.
- */
-static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
-{
-       struct task_struct *t;
-
-       if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
-               rnp->n_balk_exp_gp_tasks++;
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-       if (rnp->exp_tasks != NULL ||
-           (rnp->gp_tasks != NULL &&
-            rnp->boost_tasks == NULL &&
-            rnp->qsmask == 0 &&
-            ULONG_CMP_GE(jiffies, rnp->boost_time))) {
-               if (rnp->exp_tasks == NULL)
-                       rnp->boost_tasks = rnp->gp_tasks;
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               t = rnp->boost_kthread_task;
-               if (t)
-                       rcu_wake_cond(t, rnp->boost_kthread_status);
-       } else {
-               rcu_initiate_boost_trace(rnp);
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       }
-}
-
-/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __this_cpu_write(rcu_cpu_has_work, 1);
-       if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
-           current != __this_cpu_read(rcu_cpu_kthread_task)) {
-               rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
-                             __this_cpu_read(rcu_cpu_kthread_status));
-       }
-       local_irq_restore(flags);
-}
-
-/*
- * Is the current CPU running the RCU-callbacks kthread?
- * Caller must have preemption disabled.
- */
-static bool rcu_is_callbacks_kthread(void)
-{
-       return __get_cpu_var(rcu_cpu_kthread_task) == current;
-}
-
-#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
-
-/*
- * Do priority-boost accounting for the start of a new grace period.
- */
-static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
-{
-       rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
-}
-
-/*
- * Create an RCU-boost kthread for the specified node if one does not
- * already exist.  We only create this kthread for preemptible RCU.
- * Returns zero if all is well, a negated errno otherwise.
- */
-static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
-                                                struct rcu_node *rnp)
-{
-       int rnp_index = rnp - &rsp->node[0];
-       unsigned long flags;
-       struct sched_param sp;
-       struct task_struct *t;
-
-       if (&rcu_preempt_state != rsp)
-               return 0;
-
-       if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
-               return 0;
-
-       rsp->boost = 1;
-       if (rnp->boost_kthread_task != NULL)
-               return 0;
-       t = kthread_create(rcu_boost_kthread, (void *)rnp,
-                          "rcub/%d", rnp_index);
-       if (IS_ERR(t))
-               return PTR_ERR(t);
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       rnp->boost_kthread_task = t;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       sp.sched_priority = RCU_BOOST_PRIO;
-       sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
-       wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
-       return 0;
-}
-
-static void rcu_kthread_do_work(void)
-{
-       rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
-       rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
-       rcu_preempt_do_callbacks();
-}
-
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-       struct sched_param sp;
-
-       sp.sched_priority = RCU_KTHREAD_PRIO;
-       sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
-       per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
-       return __get_cpu_var(rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
- * RCU softirq used in flavors and configurations of RCU that do not
- * support RCU priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
-       unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
-       char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
-       int spincnt;
-
-       for (spincnt = 0; spincnt < 10; spincnt++) {
-               trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
-               local_bh_disable();
-               *statusp = RCU_KTHREAD_RUNNING;
-               this_cpu_inc(rcu_cpu_kthread_loops);
-               local_irq_disable();
-               work = *workp;
-               *workp = 0;
-               local_irq_enable();
-               if (work)
-                       rcu_kthread_do_work();
-               local_bh_enable();
-               if (*workp == 0) {
-                       trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
-                       *statusp = RCU_KTHREAD_WAITING;
-                       return;
-               }
-       }
-       *statusp = RCU_KTHREAD_YIELDING;
-       trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-       schedule_timeout_interruptible(2);
-       trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
-       *statusp = RCU_KTHREAD_WAITING;
-}
-
-/*
- * Set the per-rcu_node kthread's affinity to cover all CPUs that are
- * served by the rcu_node in question.  The CPU hotplug lock is still
- * held, so the value of rnp->qsmaskinit will be stable.
- *
- * We don't include outgoingcpu in the affinity set, use -1 if there is
- * no outgoing CPU.  If there are no CPUs left in the affinity set,
- * this function allows the kthread to execute on any CPU.
- */
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
-{
-       struct task_struct *t = rnp->boost_kthread_task;
-       unsigned long mask = rnp->qsmaskinit;
-       cpumask_var_t cm;
-       int cpu;
-
-       if (!t)
-               return;
-       if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
-               return;
-       for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
-               if ((mask & 0x1) && cpu != outgoingcpu)
-                       cpumask_set_cpu(cpu, cm);
-       if (cpumask_weight(cm) == 0) {
-               cpumask_setall(cm);
-               for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
-                       cpumask_clear_cpu(cpu, cm);
-               WARN_ON_ONCE(cpumask_weight(cm) == 0);
-       }
-       set_cpus_allowed_ptr(t, cm);
-       free_cpumask_var(cm);
-}
-
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
-       .store                  = &rcu_cpu_kthread_task,
-       .thread_should_run      = rcu_cpu_kthread_should_run,
-       .thread_fn              = rcu_cpu_kthread,
-       .thread_comm            = "rcuc/%u",
-       .setup                  = rcu_cpu_kthread_setup,
-       .park                   = rcu_cpu_kthread_park,
-};
-
-/*
- * Spawn all kthreads -- called as soon as the scheduler is running.
- */
-static int __init rcu_spawn_kthreads(void)
-{
-       struct rcu_node *rnp;
-       int cpu;
-
-       rcu_scheduler_fully_active = 1;
-       for_each_possible_cpu(cpu)
-               per_cpu(rcu_cpu_has_work, cpu) = 0;
-       BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
-       rnp = rcu_get_root(rcu_state);
-       (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
-       if (NUM_RCU_NODES > 1) {
-               rcu_for_each_leaf_node(rcu_state, rnp)
-                       (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
-       }
-       return 0;
-}
-early_initcall(rcu_spawn_kthreads);
-
-static void rcu_prepare_kthreads(int cpu)
-{
-       struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
-       struct rcu_node *rnp = rdp->mynode;
-
-       /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
-       if (rcu_scheduler_fully_active)
-               (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
-}
-
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
-{
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-static void invoke_rcu_callbacks_kthread(void)
-{
-       WARN_ON_ONCE(1);
-}
-
-static bool rcu_is_callbacks_kthread(void)
-{
-       return false;
-}
-
-static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
-{
-}
-
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
-{
-}
-
-static int __init rcu_scheduler_really_started(void)
-{
-       rcu_scheduler_fully_active = 1;
-       return 0;
-}
-early_initcall(rcu_scheduler_really_started);
-
-static void rcu_prepare_kthreads(int cpu)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
-
-#if !defined(CONFIG_RCU_FAST_NO_HZ)
-
-/*
- * Check to see if any future RCU-related work will need to be done
- * by the current CPU, even if none need be done immediately, returning
- * 1 if so.  This function is part of the RCU implementation; it is -not-
- * an exported member of the RCU API.
- *
- * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
- * any flavor of RCU.
- */
-int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
-{
-       *delta_jiffies = ULONG_MAX;
-       return rcu_cpu_has_callbacks(cpu, NULL);
-}
-
-/*
- * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
- * after it.
- */
-static void rcu_cleanup_after_idle(int cpu)
-{
-}
-
-/*
- * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
- * is nothing.
- */
-static void rcu_prepare_for_idle(int cpu)
-{
-}
-
-/*
- * Don't bother keeping a running count of the number of RCU callbacks
- * posted because CONFIG_RCU_FAST_NO_HZ=n.
- */
-static void rcu_idle_count_callbacks_posted(void)
-{
-}
-
-#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-
-/*
- * This code is invoked when a CPU goes idle, at which point we want
- * to have the CPU do everything required for RCU so that it can enter
- * the energy-efficient dyntick-idle mode.  This is handled by a
- * state machine implemented by rcu_prepare_for_idle() below.
- *
- * The following three proprocessor symbols control this state machine:
- *
- * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
- *     to sleep in dyntick-idle mode with RCU callbacks pending.  This
- *     is sized to be roughly one RCU grace period.  Those energy-efficiency
- *     benchmarkers who might otherwise be tempted to set this to a large
- *     number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
- *     system.  And if you are -that- concerned about energy efficiency,
- *     just power the system down and be done with it!
- * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
- *     permitted to sleep in dyntick-idle mode with only lazy RCU
- *     callbacks pending.  Setting this too high can OOM your system.
- *
- * The values below work well in practice.  If future workloads require
- * adjustment, they can be converted into kernel config parameters, though
- * making the state machine smarter might be a better option.
- */
-#define RCU_IDLE_GP_DELAY 4            /* Roughly one grace period. */
-#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)        /* Roughly six seconds. */
-
-static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
-module_param(rcu_idle_gp_delay, int, 0644);
-static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
-module_param(rcu_idle_lazy_gp_delay, int, 0644);
-
-extern int tick_nohz_enabled;
-
-/*
- * Try to advance callbacks for all flavors of RCU on the current CPU.
- * Afterwards, if there are any callbacks ready for immediate invocation,
- * return true.
- */
-static bool rcu_try_advance_all_cbs(void)
-{
-       bool cbs_ready = false;
-       struct rcu_data *rdp;
-       struct rcu_node *rnp;
-       struct rcu_state *rsp;
-
-       for_each_rcu_flavor(rsp) {
-               rdp = this_cpu_ptr(rsp->rda);
-               rnp = rdp->mynode;
-
-               /*
-                * Don't bother checking unless a grace period has
-                * completed since we last checked and there are
-                * callbacks not yet ready to invoke.
-                */
-               if (rdp->completed != rnp->completed &&
-                   rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
-                       note_gp_changes(rsp, rdp);
-
-               if (cpu_has_callbacks_ready_to_invoke(rdp))
-                       cbs_ready = true;
-       }
-       return cbs_ready;
-}
-
-/*
- * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
- * to invoke.  If the CPU has callbacks, try to advance them.  Tell the
- * caller to set the timeout based on whether or not there are non-lazy
- * callbacks.
- *
- * The caller must have disabled interrupts.
- */
-int rcu_needs_cpu(int cpu, unsigned long *dj)
-{
-       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-
-       /* Snapshot to detect later posting of non-lazy callback. */
-       rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
-
-       /* If no callbacks, RCU doesn't need the CPU. */
-       if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
-               *dj = ULONG_MAX;
-               return 0;
-       }
-
-       /* Attempt to advance callbacks. */
-       if (rcu_try_advance_all_cbs()) {
-               /* Some ready to invoke, so initiate later invocation. */
-               invoke_rcu_core();
-               return 1;
-       }
-       rdtp->last_accelerate = jiffies;
-
-       /* Request timer delay depending on laziness, and round. */
-       if (!rdtp->all_lazy) {
-               *dj = round_up(rcu_idle_gp_delay + jiffies,
-                              rcu_idle_gp_delay) - jiffies;
-       } else {
-               *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
-       }
-       return 0;
-}
-
-/*
- * Prepare a CPU for idle from an RCU perspective.  The first major task
- * is to sense whether nohz mode has been enabled or disabled via sysfs.
- * The second major task is to check to see if a non-lazy callback has
- * arrived at a CPU that previously had only lazy callbacks.  The third
- * major task is to accelerate (that is, assign grace-period numbers to)
- * any recently arrived callbacks.
- *
- * The caller must have disabled interrupts.
- */
-static void rcu_prepare_for_idle(int cpu)
-{
-       struct rcu_data *rdp;
-       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-       struct rcu_node *rnp;
-       struct rcu_state *rsp;
-       int tne;
-
-       /* Handle nohz enablement switches conservatively. */
-       tne = ACCESS_ONCE(tick_nohz_enabled);
-       if (tne != rdtp->tick_nohz_enabled_snap) {
-               if (rcu_cpu_has_callbacks(cpu, NULL))
-                       invoke_rcu_core(); /* force nohz to see update. */
-               rdtp->tick_nohz_enabled_snap = tne;
-               return;
-       }
-       if (!tne)
-               return;
-
-       /* If this is a no-CBs CPU, no callbacks, just return. */
-       if (rcu_is_nocb_cpu(cpu))
-               return;
-
-       /*
-        * If a non-lazy callback arrived at a CPU having only lazy
-        * callbacks, invoke RCU core for the side-effect of recalculating
-        * idle duration on re-entry to idle.
-        */
-       if (rdtp->all_lazy &&
-           rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
-               invoke_rcu_core();
-               return;
-       }
-
-       /*
-        * If we have not yet accelerated this jiffy, accelerate all
-        * callbacks on this CPU.
-        */
-       if (rdtp->last_accelerate == jiffies)
-               return;
-       rdtp->last_accelerate = jiffies;
-       for_each_rcu_flavor(rsp) {
-               rdp = per_cpu_ptr(rsp->rda, cpu);
-               if (!*rdp->nxttail[RCU_DONE_TAIL])
-                       continue;
-               rnp = rdp->mynode;
-               raw_spin_lock(&rnp->lock); /* irqs already disabled. */
-               rcu_accelerate_cbs(rsp, rnp, rdp);
-               raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
-       }
-}
-
-/*
- * Clean up for exit from idle.  Attempt to advance callbacks based on
- * any grace periods that elapsed while the CPU was idle, and if any
- * callbacks are now ready to invoke, initiate invocation.
- */
-static void rcu_cleanup_after_idle(int cpu)
-{
-       struct rcu_data *rdp;
-       struct rcu_state *rsp;
-
-       if (rcu_is_nocb_cpu(cpu))
-               return;
-       rcu_try_advance_all_cbs();
-       for_each_rcu_flavor(rsp) {
-               rdp = per_cpu_ptr(rsp->rda, cpu);
-               if (cpu_has_callbacks_ready_to_invoke(rdp))
-                       invoke_rcu_core();
-       }
-}
-
-/*
- * Keep a running count of the number of non-lazy callbacks posted
- * on this CPU.  This running counter (which is never decremented) allows
- * rcu_prepare_for_idle() to detect when something out of the idle loop
- * posts a callback, even if an equal number of callbacks are invoked.
- * Of course, callbacks should only be posted from within a trace event
- * designed to be called from idle or from within RCU_NONIDLE().
- */
-static void rcu_idle_count_callbacks_posted(void)
-{
-       __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
-}
-
-/*
- * Data for flushing lazy RCU callbacks at OOM time.
- */
-static atomic_t oom_callback_count;
-static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
-
-/*
- * RCU OOM callback -- decrement the outstanding count and deliver the
- * wake-up if we are the last one.
- */
-static void rcu_oom_callback(struct rcu_head *rhp)
-{
-       if (atomic_dec_and_test(&oom_callback_count))
-               wake_up(&oom_callback_wq);
-}
-
-/*
- * Post an rcu_oom_notify callback on the current CPU if it has at
- * least one lazy callback.  This will unnecessarily post callbacks
- * to CPUs that already have a non-lazy callback at the end of their
- * callback list, but this is an infrequent operation, so accept some
- * extra overhead to keep things simple.
- */
-static void rcu_oom_notify_cpu(void *unused)
-{
-       struct rcu_state *rsp;
-       struct rcu_data *rdp;
-
-       for_each_rcu_flavor(rsp) {
-               rdp = __this_cpu_ptr(rsp->rda);
-               if (rdp->qlen_lazy != 0) {
-                       atomic_inc(&oom_callback_count);
-                       rsp->call(&rdp->oom_head, rcu_oom_callback);
-               }
-       }
-}
-
-/*
- * If low on memory, ensure that each CPU has a non-lazy callback.
- * This will wake up CPUs that have only lazy callbacks, in turn
- * ensuring that they free up the corresponding memory in a timely manner.
- * Because an uncertain amount of memory will be freed in some uncertain
- * timeframe, we do not claim to have freed anything.
- */
-static int rcu_oom_notify(struct notifier_block *self,
-                         unsigned long notused, void *nfreed)
-{
-       int cpu;
-
-       /* Wait for callbacks from earlier instance to complete. */
-       wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
-
-       /*
-        * Prevent premature wakeup: ensure that all increments happen
-        * before there is a chance of the counter reaching zero.
-        */
-       atomic_set(&oom_callback_count, 1);
-
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-               cond_resched();
-       }
-       put_online_cpus();
-
-       /* Unconditionally decrement: no need to wake ourselves up. */
-       atomic_dec(&oom_callback_count);
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block rcu_oom_nb = {
-       .notifier_call = rcu_oom_notify
-};
-
-static int __init rcu_register_oom_notifier(void)
-{
-       register_oom_notifier(&rcu_oom_nb);
-       return 0;
-}
-early_initcall(rcu_register_oom_notifier);
-
-#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-
-#ifdef CONFIG_RCU_CPU_STALL_INFO
-
-#ifdef CONFIG_RCU_FAST_NO_HZ
-
-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
-{
-       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-       unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
-
-       sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
-               rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
-               ulong2long(nlpd),
-               rdtp->all_lazy ? 'L' : '.',
-               rdtp->tick_nohz_enabled_snap ? '.' : 'D');
-}
-
-#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
-{
-       *cp = '\0';
-}
-
-#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-/* Initiate the stall-info list. */
-static void print_cpu_stall_info_begin(void)
-{
-       pr_cont("\n");
-}
-
-/*
- * Print out diagnostic information for the specified stalled CPU.
- *
- * If the specified CPU is aware of the current RCU grace period
- * (flavor specified by rsp), then print the number of scheduling
- * clock interrupts the CPU has taken during the time that it has
- * been aware.  Otherwise, print the number of RCU grace periods
- * that this CPU is ignorant of, for example, "1" if the CPU was
- * aware of the previous grace period.
- *
- * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
- */
-static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
-{
-       char fast_no_hz[72];
-       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_dynticks *rdtp = rdp->dynticks;
-       char *ticks_title;
-       unsigned long ticks_value;
-
-       if (rsp->gpnum == rdp->gpnum) {
-               ticks_title = "ticks this GP";
-               ticks_value = rdp->ticks_this_gp;
-       } else {
-               ticks_title = "GPs behind";
-               ticks_value = rsp->gpnum - rdp->gpnum;
-       }
-       print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
-       pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
-              cpu, ticks_value, ticks_title,
-              atomic_read(&rdtp->dynticks) & 0xfff,
-              rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
-              rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
-              fast_no_hz);
-}
-
-/* Terminate the stall-info list. */
-static void print_cpu_stall_info_end(void)
-{
-       pr_err("\t");
-}
-
-/* Zero ->ticks_this_gp for all flavors of RCU. */
-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
-{
-       rdp->ticks_this_gp = 0;
-       rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
-}
-
-/* Increment ->ticks_this_gp for all flavors of RCU. */
-static void increment_cpu_stall_ticks(void)
-{
-       struct rcu_state *rsp;
-
-       for_each_rcu_flavor(rsp)
-               __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
-}
-
-#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-static void print_cpu_stall_info_begin(void)
-{
-       pr_cont(" {");
-}
-
-static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
-{
-       pr_cont(" %d", cpu);
-}
-
-static void print_cpu_stall_info_end(void)
-{
-       pr_cont("} ");
-}
-
-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
-{
-}
-
-static void increment_cpu_stall_ticks(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-#ifdef CONFIG_RCU_NOCB_CPU
-
-/*
- * Offload callback processing from the boot-time-specified set of CPUs
- * specified by rcu_nocb_mask.  For each CPU in the set, there is a
- * kthread created that pulls the callbacks from the corresponding CPU,
- * waits for a grace period to elapse, and invokes the callbacks.
- * The no-CBs CPUs do a wake_up() on their kthread when they insert
- * a callback into any empty list, unless the rcu_nocb_poll boot parameter
- * has been specified, in which case each kthread actively polls its
- * CPU.  (Which isn't so great for energy efficiency, but which does
- * reduce RCU's overhead on that CPU.)
- *
- * This is intended to be used in conjunction with Frederic Weisbecker's
- * adaptive-idle work, which would seriously reduce OS jitter on CPUs
- * running CPU-bound user-mode computations.
- *
- * Offloading of callback processing could also in theory be used as
- * an energy-efficiency measure because CPUs with no RCU callbacks
- * queued are more aggressive about entering dyntick-idle mode.
- */
-
-
-/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
-static int __init rcu_nocb_setup(char *str)
-{
-       alloc_bootmem_cpumask_var(&rcu_nocb_mask);
-       have_rcu_nocb_mask = true;
-       cpulist_parse(str, rcu_nocb_mask);
-       return 1;
-}
-__setup("rcu_nocbs=", rcu_nocb_setup);
-
-static int __init parse_rcu_nocb_poll(char *arg)
-{
-       rcu_nocb_poll = 1;
-       return 0;
-}
-early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
-
-/*
- * Do any no-CBs CPUs need another grace period?
- *
- * Interrupts must be disabled.  If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_nocb_needs_gp(struct rcu_state *rsp)
-{
-       struct rcu_node *rnp = rcu_get_root(rsp);
-
-       return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
-}
-
-/*
- * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
- * grace period.
- */
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
-{
-       wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
-}
-
-/*
- * Set the root rcu_node structure's ->need_future_gp field
- * based on the sum of those of all rcu_node structures.  This does
- * double-count the root rcu_node structure's requests, but this
- * is necessary to handle the possibility of a rcu_nocb_kthread()
- * having awakened during the time that the rcu_node structures
- * were being updated for the end of the previous grace period.
- */
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-       rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
-}
-
-static void rcu_init_one_nocb(struct rcu_node *rnp)
-{
-       init_waitqueue_head(&rnp->nocb_gp_wq[0]);
-       init_waitqueue_head(&rnp->nocb_gp_wq[1]);
-}
-
-/* Is the specified CPU a no-CPUs CPU? */
-bool rcu_is_nocb_cpu(int cpu)
-{
-       if (have_rcu_nocb_mask)
-               return cpumask_test_cpu(cpu, rcu_nocb_mask);
-       return false;
-}
-
-/*
- * Enqueue the specified string of rcu_head structures onto the specified
- * CPU's no-CBs lists.  The CPU is specified by rdp, the head of the
- * string by rhp, and the tail of the string by rhtp.  The non-lazy/lazy
- * counts are supplied by rhcount and rhcount_lazy.
- *
- * If warranted, also wake up the kthread servicing this CPUs queues.
- */
-static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
-                                   struct rcu_head *rhp,
-                                   struct rcu_head **rhtp,
-                                   int rhcount, int rhcount_lazy)
-{
-       int len;
-       struct rcu_head **old_rhpp;
-       struct task_struct *t;
-
-       /* Enqueue the callback on the nocb list and update counts. */
-       old_rhpp = xchg(&rdp->nocb_tail, rhtp);
-       ACCESS_ONCE(*old_rhpp) = rhp;
-       atomic_long_add(rhcount, &rdp->nocb_q_count);
-       atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
-
-       /* If we are not being polled and there is a kthread, awaken it ... */
-       t = ACCESS_ONCE(rdp->nocb_kthread);
-       if (rcu_nocb_poll | !t)
-               return;
-       len = atomic_long_read(&rdp->nocb_q_count);
-       if (old_rhpp == &rdp->nocb_head) {
-               wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
-               rdp->qlen_last_fqs_check = 0;
-       } else if (len > rdp->qlen_last_fqs_check + qhimark) {
-               wake_up_process(t); /* ... or if many callbacks queued. */
-               rdp->qlen_last_fqs_check = LONG_MAX / 2;
-       }
-       return;
-}
-
-/*
- * This is a helper for __call_rcu(), which invokes this when the normal
- * callback queue is inoperable.  If this is not a no-CBs CPU, this
- * function returns failure back to __call_rcu(), which can complain
- * appropriately.
- *
- * Otherwise, this function queues the callback where the corresponding
- * "rcuo" kthread can find it.
- */
-static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
-                           bool lazy)
-{
-
-       if (!rcu_is_nocb_cpu(rdp->cpu))
-               return 0;
-       __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
-       if (__is_kfree_rcu_offset((unsigned long)rhp->func))
-               trace_rcu_kfree_callback(rdp->rsp->name, rhp,
-                                        (unsigned long)rhp->func,
-                                        rdp->qlen_lazy, rdp->qlen);
-       else
-               trace_rcu_callback(rdp->rsp->name, rhp,
-                                  rdp->qlen_lazy, rdp->qlen);
-       return 1;
-}
-
-/*
- * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
- * not a no-CBs CPU.
- */
-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
-                                                    struct rcu_data *rdp)
-{
-       long ql = rsp->qlen;
-       long qll = rsp->qlen_lazy;
-
-       /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
-       if (!rcu_is_nocb_cpu(smp_processor_id()))
-               return 0;
-       rsp->qlen = 0;
-       rsp->qlen_lazy = 0;
-
-       /* First, enqueue the donelist, if any.  This preserves CB ordering. */
-       if (rsp->orphan_donelist != NULL) {
-               __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
-                                       rsp->orphan_donetail, ql, qll);
-               ql = qll = 0;
-               rsp->orphan_donelist = NULL;
-               rsp->orphan_donetail = &rsp->orphan_donelist;
-       }
-       if (rsp->orphan_nxtlist != NULL) {
-               __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
-                                       rsp->orphan_nxttail, ql, qll);
-               ql = qll = 0;
-               rsp->orphan_nxtlist = NULL;
-               rsp->orphan_nxttail = &rsp->orphan_nxtlist;
-       }
-       return 1;
-}
-
-/*
- * If necessary, kick off a new grace period, and either way wait
- * for a subsequent grace period to complete.
- */
-static void rcu_nocb_wait_gp(struct rcu_data *rdp)
-{
-       unsigned long c;
-       bool d;
-       unsigned long flags;
-       struct rcu_node *rnp = rdp->mynode;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       c = rcu_start_future_gp(rnp, rdp);
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-
-       /*
-        * Wait for the grace period.  Do so interruptibly to avoid messing
-        * up the load average.
-        */
-       trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
-       for (;;) {
-               wait_event_interruptible(
-                       rnp->nocb_gp_wq[c & 0x1],
-                       (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
-               if (likely(d))
-                       break;
-               flush_signals(current);
-               trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
-       }
-       trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
-       smp_mb(); /* Ensure that CB invocation happens after GP end. */
-}
-
-/*
- * Per-rcu_data kthread, but only for no-CBs CPUs.  Each kthread invokes
- * callbacks queued by the corresponding no-CBs CPU.
- */
-static int rcu_nocb_kthread(void *arg)
-{
-       int c, cl;
-       struct rcu_head *list;
-       struct rcu_head *next;
-       struct rcu_head **tail;
-       struct rcu_data *rdp = arg;
-
-       /* Each pass through this loop invokes one batch of callbacks */
-       for (;;) {
-               /* If not polling, wait for next batch of callbacks. */
-               if (!rcu_nocb_poll)
-                       wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
-               list = ACCESS_ONCE(rdp->nocb_head);
-               if (!list) {
-                       schedule_timeout_interruptible(1);
-                       flush_signals(current);
-                       continue;
-               }
-
-               /*
-                * Extract queued callbacks, update counts, and wait
-                * for a grace period to elapse.
-                */
-               ACCESS_ONCE(rdp->nocb_head) = NULL;
-               tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
-               c = atomic_long_xchg(&rdp->nocb_q_count, 0);
-               cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
-               ACCESS_ONCE(rdp->nocb_p_count) += c;
-               ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
-               rcu_nocb_wait_gp(rdp);
-
-               /* Each pass through the following loop invokes a callback. */
-               trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
-               c = cl = 0;
-               while (list) {
-                       next = list->next;
-                       /* Wait for enqueuing to complete, if needed. */
-                       while (next == NULL && &list->next != tail) {
-                               schedule_timeout_interruptible(1);
-                               next = list->next;
-                       }
-                       debug_rcu_head_unqueue(list);
-                       local_bh_disable();
-                       if (__rcu_reclaim(rdp->rsp->name, list))
-                               cl++;
-                       c++;
-                       local_bh_enable();
-                       list = next;
-               }
-               trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
-               ACCESS_ONCE(rdp->nocb_p_count) -= c;
-               ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
-               rdp->n_nocbs_invoked += c;
-       }
-       return 0;
-}
-
-/* Initialize per-rcu_data variables for no-CBs CPUs. */
-static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
-{
-       rdp->nocb_tail = &rdp->nocb_head;
-       init_waitqueue_head(&rdp->nocb_wq);
-}
-
-/* Create a kthread for each RCU flavor for each no-CBs CPU. */
-static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
-{
-       int cpu;
-       struct rcu_data *rdp;
-       struct task_struct *t;
-
-       if (rcu_nocb_mask == NULL)
-               return;
-       for_each_cpu(cpu, rcu_nocb_mask) {
-               rdp = per_cpu_ptr(rsp->rda, cpu);
-               t = kthread_run(rcu_nocb_kthread, rdp,
-                               "rcuo%c/%d", rsp->abbr, cpu);
-               BUG_ON(IS_ERR(t));
-               ACCESS_ONCE(rdp->nocb_kthread) = t;
-       }
-}
-
-/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
-static bool init_nocb_callback_list(struct rcu_data *rdp)
-{
-       if (rcu_nocb_mask == NULL ||
-           !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
-               return false;
-       rdp->nxttail[RCU_NEXT_TAIL] = NULL;
-       return true;
-}
-
-#else /* #ifdef CONFIG_RCU_NOCB_CPU */
-
-static int rcu_nocb_needs_gp(struct rcu_state *rsp)
-{
-       return 0;
-}
-
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
-{
-}
-
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-}
-
-static void rcu_init_one_nocb(struct rcu_node *rnp)
-{
-}
-
-static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
-                           bool lazy)
-{
-       return 0;
-}
-
-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
-                                                    struct rcu_data *rdp)
-{
-       return 0;
-}
-
-static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
-{
-}
-
-static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
-{
-}
-
-static bool init_nocb_callback_list(struct rcu_data *rdp)
-{
-       return false;
-}
-
-#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
-
-/*
- * An adaptive-ticks CPU can potentially execute in kernel mode for an
- * arbitrarily long period of time with the scheduling-clock tick turned
- * off.  RCU will be paying attention to this CPU because it is in the
- * kernel, but the CPU cannot be guaranteed to be executing the RCU state
- * machine because the scheduling-clock tick has been disabled.  Therefore,
- * if an adaptive-ticks CPU is failing to respond to the current grace
- * period and has not be idle from an RCU perspective, kick it.
- */
-static void rcu_kick_nohz_cpu(int cpu)
-{
-#ifdef CONFIG_NO_HZ_FULL
-       if (tick_nohz_full_cpu(cpu))
-               smp_send_reschedule(cpu);
-#endif /* #ifdef CONFIG_NO_HZ_FULL */
-}
-
-
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-
-/*
- * Define RCU flavor that holds sysidle state.  This needs to be the
- * most active flavor of RCU.
- */
-#ifdef CONFIG_PREEMPT_RCU
-static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
-#else /* #ifdef CONFIG_PREEMPT_RCU */
-static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
-
-static int full_sysidle_state;         /* Current system-idle state. */
-#define RCU_SYSIDLE_NOT                0       /* Some CPU is not idle. */
-#define RCU_SYSIDLE_SHORT      1       /* All CPUs idle for brief period. */
-#define RCU_SYSIDLE_LONG       2       /* All CPUs idle for long enough. */
-#define RCU_SYSIDLE_FULL       3       /* All CPUs idle, ready for sysidle. */
-#define RCU_SYSIDLE_FULL_NOTED 4       /* Actually entered sysidle state. */
-
-/*
- * Invoked to note exit from irq or task transition to idle.  Note that
- * usermode execution does -not- count as idle here!  After all, we want
- * to detect full-system idle states, not RCU quiescent states and grace
- * periods.  The caller must have disabled interrupts.
- */
-static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
-{
-       unsigned long j;
-
-       /* Adjust nesting, check for fully idle. */
-       if (irq) {
-               rdtp->dynticks_idle_nesting--;
-               WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-               if (rdtp->dynticks_idle_nesting != 0)
-                       return;  /* Still not fully idle. */
-       } else {
-               if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
-                   DYNTICK_TASK_NEST_VALUE) {
-                       rdtp->dynticks_idle_nesting = 0;
-               } else {
-                       rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
-                       WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-                       return;  /* Still not fully idle. */
-               }
-       }
-
-       /* Record start of fully idle period. */
-       j = jiffies;
-       ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
-       smp_mb__before_atomic_inc();
-       atomic_inc(&rdtp->dynticks_idle);
-       smp_mb__after_atomic_inc();
-       WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
-}
-
-/*
- * Unconditionally force exit from full system-idle state.  This is
- * invoked when a normal CPU exits idle, but must be called separately
- * for the timekeeping CPU (tick_do_timer_cpu).  The reason for this
- * is that the timekeeping CPU is permitted to take scheduling-clock
- * interrupts while the system is in system-idle state, and of course
- * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
- * interrupt from any other type of interrupt.
- */
-void rcu_sysidle_force_exit(void)
-{
-       int oldstate = ACCESS_ONCE(full_sysidle_state);
-       int newoldstate;
-
-       /*
-        * Each pass through the following loop attempts to exit full
-        * system-idle state.  If contention proves to be a problem,
-        * a trylock-based contention tree could be used here.
-        */
-       while (oldstate > RCU_SYSIDLE_SHORT) {
-               newoldstate = cmpxchg(&full_sysidle_state,
-                                     oldstate, RCU_SYSIDLE_NOT);
-               if (oldstate == newoldstate &&
-                   oldstate == RCU_SYSIDLE_FULL_NOTED) {
-                       rcu_kick_nohz_cpu(tick_do_timer_cpu);
-                       return; /* We cleared it, done! */
-               }
-               oldstate = newoldstate;
-       }
-       smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
-}
-
-/*
- * Invoked to note entry to irq or task transition from idle.  Note that
- * usermode execution does -not- count as idle here!  The caller must
- * have disabled interrupts.
- */
-static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
-{
-       /* Adjust nesting, check for already non-idle. */
-       if (irq) {
-               rdtp->dynticks_idle_nesting++;
-               WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-               if (rdtp->dynticks_idle_nesting != 1)
-                       return; /* Already non-idle. */
-       } else {
-               /*
-                * Allow for irq misnesting.  Yes, it really is possible
-                * to enter an irq handler then never leave it, and maybe
-                * also vice versa.  Handle both possibilities.
-                */
-               if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
-                       rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
-                       WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-                       return; /* Already non-idle. */
-               } else {
-                       rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
-               }
-       }
-
-       /* Record end of idle period. */
-       smp_mb__before_atomic_inc();
-       atomic_inc(&rdtp->dynticks_idle);
-       smp_mb__after_atomic_inc();
-       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
-
-       /*
-        * If we are the timekeeping CPU, we are permitted to be non-idle
-        * during a system-idle state.  This must be the case, because
-        * the timekeeping CPU has to take scheduling-clock interrupts
-        * during the time that the system is transitioning to full
-        * system-idle state.  This means that the timekeeping CPU must
-        * invoke rcu_sysidle_force_exit() directly if it does anything
-        * more than take a scheduling-clock interrupt.
-        */
-       if (smp_processor_id() == tick_do_timer_cpu)
-               return;
-
-       /* Update system-idle state: We are clearly no longer fully idle! */
-       rcu_sysidle_force_exit();
-}
-
-/*
- * Check to see if the current CPU is idle.  Note that usermode execution
- * does not count as idle.  The caller must have disabled interrupts.
- */
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-                                 unsigned long *maxj)
-{
-       int cur;
-       unsigned long j;
-       struct rcu_dynticks *rdtp = rdp->dynticks;
-
-       /*
-        * If some other CPU has already reported non-idle, if this is
-        * not the flavor of RCU that tracks sysidle state, or if this
-        * is an offline or the timekeeping CPU, nothing to do.
-        */
-       if (!*isidle || rdp->rsp != rcu_sysidle_state ||
-           cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
-               return;
-       if (rcu_gp_in_progress(rdp->rsp))
-               WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
-
-       /* Pick up current idle and NMI-nesting counter and check. */
-       cur = atomic_read(&rdtp->dynticks_idle);
-       if (cur & 0x1) {
-               *isidle = false; /* We are not idle! */
-               return;
-       }
-       smp_mb(); /* Read counters before timestamps. */
-
-       /* Pick up timestamps. */
-       j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
-       /* If this CPU entered idle more recently, update maxj timestamp. */
-       if (ULONG_CMP_LT(*maxj, j))
-               *maxj = j;
-}
-
-/*
- * Is this the flavor of RCU that is handling full-system idle?
- */
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-       return rsp == rcu_sysidle_state;
-}
-
-/*
- * Bind the grace-period kthread for the sysidle flavor of RCU to the
- * timekeeping CPU.
- */
-static void rcu_bind_gp_kthread(void)
-{
-       int cpu = ACCESS_ONCE(tick_do_timer_cpu);
-
-       if (cpu < 0 || cpu >= nr_cpu_ids)
-               return;
-       if (raw_smp_processor_id() != cpu)
-               set_cpus_allowed_ptr(current, cpumask_of(cpu));
-}
-
-/*
- * Return a delay in jiffies based on the number of CPUs, rcu_node
- * leaf fanout, and jiffies tick rate.  The idea is to allow larger
- * systems more time to transition to full-idle state in order to
- * avoid the cache thrashing that otherwise occur on the state variable.
- * Really small systems (less than a couple of tens of CPUs) should
- * instead use a single global atomically incremented counter, and later
- * versions of this will automatically reconfigure themselves accordingly.
- */
-static unsigned long rcu_sysidle_delay(void)
-{
-       if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-               return 0;
-       return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
-}
-
-/*
- * Advance the full-system-idle state.  This is invoked when all of
- * the non-timekeeping CPUs are idle.
- */
-static void rcu_sysidle(unsigned long j)
-{
-       /* Check the current state. */
-       switch (ACCESS_ONCE(full_sysidle_state)) {
-       case RCU_SYSIDLE_NOT:
-
-               /* First time all are idle, so note a short idle period. */
-               ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
-               break;
-
-       case RCU_SYSIDLE_SHORT:
-
-               /*
-                * Idle for a bit, time to advance to next state?
-                * cmpxchg failure means race with non-idle, let them win.
-                */
-               if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-                       (void)cmpxchg(&full_sysidle_state,
-                                     RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
-               break;
-
-       case RCU_SYSIDLE_LONG:
-
-               /*
-                * Do an additional check pass before advancing to full.
-                * cmpxchg failure means race with non-idle, let them win.
-                */
-               if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-                       (void)cmpxchg(&full_sysidle_state,
-                                     RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
-               break;
-
-       default:
-               break;
-       }
-}
-
-/*
- * Found a non-idle non-timekeeping CPU, so kick the system-idle state
- * back to the beginning.
- */
-static void rcu_sysidle_cancel(void)
-{
-       smp_mb();
-       ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
-}
-
-/*
- * Update the sysidle state based on the results of a force-quiescent-state
- * scan of the CPUs' dyntick-idle state.
- */
-static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
-                              unsigned long maxj, bool gpkt)
-{
-       if (rsp != rcu_sysidle_state)
-               return;  /* Wrong flavor, ignore. */
-       if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-               return;  /* Running state machine from timekeeping CPU. */
-       if (isidle)
-               rcu_sysidle(maxj);    /* More idle! */
-       else
-               rcu_sysidle_cancel(); /* Idle is over. */
-}
-
-/*
- * Wrapper for rcu_sysidle_report() when called from the grace-period
- * kthread's context.
- */
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-                                 unsigned long maxj)
-{
-       rcu_sysidle_report(rsp, isidle, maxj, true);
-}
-
-/* Callback and function for forcing an RCU grace period. */
-struct rcu_sysidle_head {
-       struct rcu_head rh;
-       int inuse;
-};
-
-static void rcu_sysidle_cb(struct rcu_head *rhp)
-{
-       struct rcu_sysidle_head *rshp;
-
-       /*
-        * The following memory barrier is needed to replace the
-        * memory barriers that would normally be in the memory
-        * allocator.
-        */
-       smp_mb();  /* grace period precedes setting inuse. */
-
-       rshp = container_of(rhp, struct rcu_sysidle_head, rh);
-       ACCESS_ONCE(rshp->inuse) = 0;
-}
-
-/*
- * Check to see if the system is fully idle, other than the timekeeping CPU.
- * The caller must have disabled interrupts.
- */
-bool rcu_sys_is_idle(void)
-{
-       static struct rcu_sysidle_head rsh;
-       int rss = ACCESS_ONCE(full_sysidle_state);
-
-       if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
-               return false;
-
-       /* Handle small-system case by doing a full scan of CPUs. */
-       if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
-               int oldrss = rss - 1;
-
-               /*
-                * One pass to advance to each state up to _FULL.
-                * Give up if any pass fails to advance the state.
-                */
-               while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
-                       int cpu;
-                       bool isidle = true;
-                       unsigned long maxj = jiffies - ULONG_MAX / 4;
-                       struct rcu_data *rdp;
-
-                       /* Scan all the CPUs looking for nonidle CPUs. */
-                       for_each_possible_cpu(cpu) {
-                               rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
-                               rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
-                               if (!isidle)
-                                       break;
-                       }
-                       rcu_sysidle_report(rcu_sysidle_state,
-                                          isidle, maxj, false);
-                       oldrss = rss;
-                       rss = ACCESS_ONCE(full_sysidle_state);
-               }
-       }
-
-       /* If this is the first observation of an idle period, record it. */
-       if (rss == RCU_SYSIDLE_FULL) {
-               rss = cmpxchg(&full_sysidle_state,
-                             RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
-               return rss == RCU_SYSIDLE_FULL;
-       }
-
-       smp_mb(); /* ensure rss load happens before later caller actions. */
-
-       /* If already fully idle, tell the caller (in case of races). */
-       if (rss == RCU_SYSIDLE_FULL_NOTED)
-               return true;
-
-       /*
-        * If we aren't there yet, and a grace period is not in flight,
-        * initiate a grace period.  Either way, tell the caller that
-        * we are not there yet.  We use an xchg() rather than an assignment
-        * to make up for the memory barriers that would otherwise be
-        * provided by the memory allocator.
-        */
-       if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
-           !rcu_gp_in_progress(rcu_sysidle_state) &&
-           !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
-               call_rcu(&rsh.rh, rcu_sysidle_cb);
-       return false;
-}
-
-/*
- * Initialize dynticks sysidle state for CPUs coming online.
- */
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-       rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
-}
-
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
-{
-}
-
-static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
-{
-}
-
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-                                 unsigned long *maxj)
-{
-}
-
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-       return false;
-}
-
-static void rcu_bind_gp_kthread(void)
-{
-}
-
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-                                 unsigned long maxj)
-{
-}
-
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
deleted file mode 100644 (file)
index cf6c174..0000000
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Read-Copy Update tracing for classic implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2008
- *
- * Papers:  http://www.rdrop.com/users/paulmck/RCU
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *             Documentation/RCU
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/completion.h>
-#include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#define RCU_TREE_NONCORE
-#include "rcutree.h"
-
-static int r_open(struct inode *inode, struct file *file,
-                                       const struct seq_operations *op)
-{
-       int ret = seq_open(file, op);
-       if (!ret) {
-               struct seq_file *m = (struct seq_file *)file->private_data;
-               m->private = inode->i_private;
-       }
-       return ret;
-}
-
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-       struct rcu_state *rsp = (struct rcu_state *)m->private;
-       *pos = cpumask_next(*pos - 1, cpu_possible_mask);
-       if ((*pos) < nr_cpu_ids)
-               return per_cpu_ptr(rsp->rda, *pos);
-       return NULL;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       (*pos)++;
-       return r_start(m, pos);
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-}
-
-static int show_rcubarrier(struct seq_file *m, void *v)
-{
-       struct rcu_state *rsp = (struct rcu_state *)m->private;
-       seq_printf(m, "bcc: %d nbd: %lu\n",
-                  atomic_read(&rsp->barrier_cpu_count),
-                  rsp->n_barrier_done);
-       return 0;
-}
-
-static int rcubarrier_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_rcubarrier, inode->i_private);
-}
-
-static const struct file_operations rcubarrier_fops = {
-       .owner = THIS_MODULE,
-       .open = rcubarrier_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = single_release,
-};
-
-#ifdef CONFIG_RCU_BOOST
-
-static char convert_kthread_status(unsigned int kthread_status)
-{
-       if (kthread_status > RCU_KTHREAD_MAX)
-               return '?';
-       return "SRWOY"[kthread_status];
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
-{
-       long ql, qll;
-
-       if (!rdp->beenonline)
-               return;
-       seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
-                  rdp->cpu,
-                  cpu_is_offline(rdp->cpu) ? '!' : ' ',
-                  ulong2long(rdp->completed), ulong2long(rdp->gpnum),
-                  rdp->passed_quiesce, rdp->qs_pending);
-       seq_printf(m, " dt=%d/%llx/%d df=%lu",
-                  atomic_read(&rdp->dynticks->dynticks),
-                  rdp->dynticks->dynticks_nesting,
-                  rdp->dynticks->dynticks_nmi_nesting,
-                  rdp->dynticks_fqs);
-       seq_printf(m, " of=%lu", rdp->offline_fqs);
-       rcu_nocb_q_lengths(rdp, &ql, &qll);
-       qll += rdp->qlen_lazy;
-       ql += rdp->qlen;
-       seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
-                  qll, ql,
-                  ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
-                       rdp->nxttail[RCU_NEXT_TAIL]],
-                  ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
-                       rdp->nxttail[RCU_NEXT_READY_TAIL]],
-                  ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
-                       rdp->nxttail[RCU_WAIT_TAIL]],
-                  ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
-#ifdef CONFIG_RCU_BOOST
-       seq_printf(m, " kt=%d/%c ktl=%x",
-                  per_cpu(rcu_cpu_has_work, rdp->cpu),
-                  convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
-                                         rdp->cpu)),
-                  per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-       seq_printf(m, " b=%ld", rdp->blimit);
-       seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n",
-                  rdp->n_cbs_invoked, rdp->n_nocbs_invoked,
-                  rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
-}
-
-static int show_rcudata(struct seq_file *m, void *v)
-{
-       print_one_rcu_data(m, (struct rcu_data *)v);
-       return 0;
-}
-
-static const struct seq_operations rcudate_op = {
-       .start = r_start,
-       .next  = r_next,
-       .stop  = r_stop,
-       .show  = show_rcudata,
-};
-
-static int rcudata_open(struct inode *inode, struct file *file)
-{
-       return r_open(inode, file, &rcudate_op);
-}
-
-static const struct file_operations rcudata_fops = {
-       .owner = THIS_MODULE,
-       .open = rcudata_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = seq_release,
-};
-
-static int show_rcuexp(struct seq_file *m, void *v)
-{
-       struct rcu_state *rsp = (struct rcu_state *)m->private;
-
-       seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n",
-                  atomic_long_read(&rsp->expedited_start),
-                  atomic_long_read(&rsp->expedited_done),
-                  atomic_long_read(&rsp->expedited_wrap),
-                  atomic_long_read(&rsp->expedited_tryfail),
-                  atomic_long_read(&rsp->expedited_workdone1),
-                  atomic_long_read(&rsp->expedited_workdone2),
-                  atomic_long_read(&rsp->expedited_normal),
-                  atomic_long_read(&rsp->expedited_stoppedcpus),
-                  atomic_long_read(&rsp->expedited_done_tries),
-                  atomic_long_read(&rsp->expedited_done_lost),
-                  atomic_long_read(&rsp->expedited_done_exit));
-       return 0;
-}
-
-static int rcuexp_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_rcuexp, inode->i_private);
-}
-
-static const struct file_operations rcuexp_fops = {
-       .owner = THIS_MODULE,
-       .open = rcuexp_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = single_release,
-};
-
-#ifdef CONFIG_RCU_BOOST
-
-static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
-{
-       seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
-                  rnp->grplo, rnp->grphi,
-                  "T."[list_empty(&rnp->blkd_tasks)],
-                  "N."[!rnp->gp_tasks],
-                  "E."[!rnp->exp_tasks],
-                  "B."[!rnp->boost_tasks],
-                  convert_kthread_status(rnp->boost_kthread_status),
-                  rnp->n_tasks_boosted, rnp->n_exp_boosts,
-                  rnp->n_normal_boosts);
-       seq_printf(m, "j=%04x bt=%04x\n",
-                  (int)(jiffies & 0xffff),
-                  (int)(rnp->boost_time & 0xffff));
-       seq_printf(m, "    balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
-                  rnp->n_balk_blkd_tasks,
-                  rnp->n_balk_exp_gp_tasks,
-                  rnp->n_balk_boost_tasks,
-                  rnp->n_balk_notblocked,
-                  rnp->n_balk_notyet,
-                  rnp->n_balk_nos);
-}
-
-static int show_rcu_node_boost(struct seq_file *m, void *unused)
-{
-       struct rcu_node *rnp;
-
-       rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
-               print_one_rcu_node_boost(m, rnp);
-       return 0;
-}
-
-static int rcu_node_boost_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_rcu_node_boost, NULL);
-}
-
-static const struct file_operations rcu_node_boost_fops = {
-       .owner = THIS_MODULE,
-       .open = rcu_node_boost_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = single_release,
-};
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
-{
-       unsigned long gpnum;
-       int level = 0;
-       struct rcu_node *rnp;
-
-       gpnum = rsp->gpnum;
-       seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
-                  ulong2long(rsp->completed), ulong2long(gpnum),
-                  rsp->fqs_state,
-                  (long)(rsp->jiffies_force_qs - jiffies),
-                  (int)(jiffies & 0xffff));
-       seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
-                  rsp->n_force_qs, rsp->n_force_qs_ngp,
-                  rsp->n_force_qs - rsp->n_force_qs_ngp,
-                  rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
-       for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
-               if (rnp->level != level) {
-                       seq_puts(m, "\n");
-                       level = rnp->level;
-               }
-               seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d    ",
-                          rnp->qsmask, rnp->qsmaskinit,
-                          ".G"[rnp->gp_tasks != NULL],
-                          ".E"[rnp->exp_tasks != NULL],
-                          ".T"[!list_empty(&rnp->blkd_tasks)],
-                          rnp->grplo, rnp->grphi, rnp->grpnum);
-       }
-       seq_puts(m, "\n");
-}
-
-static int show_rcuhier(struct seq_file *m, void *v)
-{
-       struct rcu_state *rsp = (struct rcu_state *)m->private;
-       print_one_rcu_state(m, rsp);
-       return 0;
-}
-
-static int rcuhier_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_rcuhier, inode->i_private);
-}
-
-static const struct file_operations rcuhier_fops = {
-       .owner = THIS_MODULE,
-       .open = rcuhier_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = single_release,
-};
-
-static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
-{
-       unsigned long flags;
-       unsigned long completed;
-       unsigned long gpnum;
-       unsigned long gpage;
-       unsigned long gpmax;
-       struct rcu_node *rnp = &rsp->node[0];
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       completed = ACCESS_ONCE(rsp->completed);
-       gpnum = ACCESS_ONCE(rsp->gpnum);
-       if (completed == gpnum)
-               gpage = 0;
-       else
-               gpage = jiffies - rsp->gp_start;
-       gpmax = rsp->gp_max;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       seq_printf(m, "completed=%ld  gpnum=%ld  age=%ld  max=%ld\n",
-                  ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
-}
-
-static int show_rcugp(struct seq_file *m, void *v)
-{
-       struct rcu_state *rsp = (struct rcu_state *)m->private;
-       show_one_rcugp(m, rsp);
-       return 0;
-}
-
-static int rcugp_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_rcugp, inode->i_private);
-}
-
-static const struct file_operations rcugp_fops = {
-       .owner = THIS_MODULE,
-       .open = rcugp_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = single_release,
-};
-
-static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
-{
-       if (!rdp->beenonline)
-               return;
-       seq_printf(m, "%3d%cnp=%ld ",
-                  rdp->cpu,
-                  cpu_is_offline(rdp->cpu) ? '!' : ' ',
-                  rdp->n_rcu_pending);
-       seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
-                  rdp->n_rp_qs_pending,
-                  rdp->n_rp_report_qs,
-                  rdp->n_rp_cb_ready,
-                  rdp->n_rp_cpu_needs_gp);
-       seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n",
-                  rdp->n_rp_gp_completed,
-                  rdp->n_rp_gp_started,
-                  rdp->n_rp_need_nothing);
-}
-
-static int show_rcu_pending(struct seq_file *m, void *v)
-{
-       print_one_rcu_pending(m, (struct rcu_data *)v);
-       return 0;
-}
-
-static const struct seq_operations rcu_pending_op = {
-       .start = r_start,
-       .next  = r_next,
-       .stop  = r_stop,
-       .show  = show_rcu_pending,
-};
-
-static int rcu_pending_open(struct inode *inode, struct file *file)
-{
-       return r_open(inode, file, &rcu_pending_op);
-}
-
-static const struct file_operations rcu_pending_fops = {
-       .owner = THIS_MODULE,
-       .open = rcu_pending_open,
-       .read = seq_read,
-       .llseek = no_llseek,
-       .release = seq_release,
-};
-
-static int show_rcutorture(struct seq_file *m, void *unused)
-{
-       seq_printf(m, "rcutorture test sequence: %lu %s\n",
-                  rcutorture_testseq >> 1,
-                  (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
-       seq_printf(m, "rcutorture update version number: %lu\n",
-                  rcutorture_vernum);
-       return 0;
-}
-
-static int rcutorture_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, show_rcutorture, NULL);
-}
-
-static const struct file_operations rcutorture_fops = {
-       .owner = THIS_MODULE,
-       .open = rcutorture_open,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-};
-
-static struct dentry *rcudir;
-
-static int __init rcutree_trace_init(void)
-{
-       struct rcu_state *rsp;
-       struct dentry *retval;
-       struct dentry *rspdir;
-
-       rcudir = debugfs_create_dir("rcu", NULL);
-       if (!rcudir)
-               goto free_out;
-
-       for_each_rcu_flavor(rsp) {
-               rspdir = debugfs_create_dir(rsp->name, rcudir);
-               if (!rspdir)
-                       goto free_out;
-
-               retval = debugfs_create_file("rcudata", 0444,
-                               rspdir, rsp, &rcudata_fops);
-               if (!retval)
-                       goto free_out;
-
-               retval = debugfs_create_file("rcuexp", 0444,
-                               rspdir, rsp, &rcuexp_fops);
-               if (!retval)
-                       goto free_out;
-
-               retval = debugfs_create_file("rcu_pending", 0444,
-                               rspdir, rsp, &rcu_pending_fops);
-               if (!retval)
-                       goto free_out;
-
-               retval = debugfs_create_file("rcubarrier", 0444,
-                               rspdir, rsp, &rcubarrier_fops);
-               if (!retval)
-                       goto free_out;
-
-#ifdef CONFIG_RCU_BOOST
-               if (rsp == &rcu_preempt_state) {
-                       retval = debugfs_create_file("rcuboost", 0444,
-                               rspdir, NULL, &rcu_node_boost_fops);
-                       if (!retval)
-                               goto free_out;
-               }
-#endif
-
-               retval = debugfs_create_file("rcugp", 0444,
-                               rspdir, rsp, &rcugp_fops);
-               if (!retval)
-                       goto free_out;
-
-               retval = debugfs_create_file("rcuhier", 0444,
-                               rspdir, rsp, &rcuhier_fops);
-               if (!retval)
-                       goto free_out;
-       }
-
-       retval = debugfs_create_file("rcutorture", 0444, rcudir,
-                                               NULL, &rcutorture_fops);
-       if (!retval)
-               goto free_out;
-       return 0;
-free_out:
-       debugfs_remove_recursive(rcudir);
-       return 1;
-}
-
-static void __exit rcutree_trace_cleanup(void)
-{
-       debugfs_remove_recursive(rcudir);
-}
-
-
-module_init(rcutree_trace_init);
-module_exit(rcutree_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
-MODULE_LICENSE("GPL");
diff --git a/kernel/srcu.c b/kernel/srcu.c
deleted file mode 100644 (file)
index 01d5ccb..0000000
+++ /dev/null
@@ -1,651 +0,0 @@
-/*
- * Sleepable Read-Copy Update mechanism for mutual exclusion.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2006
- * Copyright (C) Fujitsu, 2012
- *
- * Author: Paul McKenney <paulmck@us.ibm.com>
- *        Lai Jiangshan <laijs@cn.fujitsu.com>
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *             Documentation/RCU/ *.txt
- *
- */
-
-#include <linux/export.h>
-#include <linux/mutex.h>
-#include <linux/percpu.h>
-#include <linux/preempt.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/srcu.h>
-
-#include <trace/events/rcu.h>
-
-#include "rcu.h"
-
-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
-       b->head = NULL;
-       b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
-       *b->tail = head;
-       b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
-       return b->tail == &b->head;
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
-       struct rcu_head *head;
-
-       if (rcu_batch_empty(b))
-               return NULL;
-
-       head = b->head;
-       b->head = head->next;
-       if (b->tail == &head->next)
-               rcu_batch_init(b);
-
-       return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
-       if (!rcu_batch_empty(from)) {
-               *to->tail = from->head;
-               to->tail = from->tail;
-               rcu_batch_init(from);
-       }
-}
-
-static int init_srcu_struct_fields(struct srcu_struct *sp)
-{
-       sp->completed = 0;
-       spin_lock_init(&sp->queue_lock);
-       sp->running = false;
-       rcu_batch_init(&sp->batch_queue);
-       rcu_batch_init(&sp->batch_check0);
-       rcu_batch_init(&sp->batch_check1);
-       rcu_batch_init(&sp->batch_done);
-       INIT_DELAYED_WORK(&sp->work, process_srcu);
-       sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
-       return sp->per_cpu_ref ? 0 : -ENOMEM;
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
-                      struct lock_class_key *key)
-{
-       /* Don't re-initialize a lock while it is held. */
-       debug_check_no_locks_freed((void *)sp, sizeof(*sp));
-       lockdep_init_map(&sp->dep_map, name, key, 0);
-       return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(__init_srcu_struct);
-
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/**
- * init_srcu_struct - initialize a sleep-RCU structure
- * @sp: structure to initialize.
- *
- * Must invoke this on a given srcu_struct before passing that srcu_struct
- * to any other function.  Each srcu_struct represents a separate domain
- * of SRCU protection.
- */
-int init_srcu_struct(struct srcu_struct *sp)
-{
-       return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(init_srcu_struct);
-
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/*
- * Returns approximate total of the readers' ->seq[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
-{
-       int cpu;
-       unsigned long sum = 0;
-       unsigned long t;
-
-       for_each_possible_cpu(cpu) {
-               t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
-               sum += t;
-       }
-       return sum;
-}
-
-/*
- * Returns approximate number of readers active on the specified rank
- * of the per-CPU ->c[] counters.
- */
-static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
-{
-       int cpu;
-       unsigned long sum = 0;
-       unsigned long t;
-
-       for_each_possible_cpu(cpu) {
-               t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
-               sum += t;
-       }
-       return sum;
-}
-
-/*
- * Return true if the number of pre-existing readers is determined to
- * be stably zero.  An example unstable zero can occur if the call
- * to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
- * but due to task migration, sees the corresponding __srcu_read_unlock()
- * decrement.  This can happen because srcu_readers_active_idx() takes
- * time to sum the array, and might in fact be interrupted or preempted
- * partway through the summation.
- */
-static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
-{
-       unsigned long seq;
-
-       seq = srcu_readers_seq_idx(sp, idx);
-
-       /*
-        * The following smp_mb() A pairs with the smp_mb() B located in
-        * __srcu_read_lock().  This pairing ensures that if an
-        * __srcu_read_lock() increments its counter after the summation
-        * in srcu_readers_active_idx(), then the corresponding SRCU read-side
-        * critical section will see any changes made prior to the start
-        * of the current SRCU grace period.
-        *
-        * Also, if the above call to srcu_readers_seq_idx() saw the
-        * increment of ->seq[], then the call to srcu_readers_active_idx()
-        * must see the increment of ->c[].
-        */
-       smp_mb(); /* A */
-
-       /*
-        * Note that srcu_readers_active_idx() can incorrectly return
-        * zero even though there is a pre-existing reader throughout.
-        * To see this, suppose that task A is in a very long SRCU
-        * read-side critical section that started on CPU 0, and that
-        * no other reader exists, so that the sum of the counters
-        * is equal to one.  Then suppose that task B starts executing
-        * srcu_readers_active_idx(), summing up to CPU 1, and then that
-        * task C starts reading on CPU 0, so that its increment is not
-        * summed, but finishes reading on CPU 2, so that its decrement
-        * -is- summed.  Then when task B completes its sum, it will
-        * incorrectly get zero, despite the fact that task A has been
-        * in its SRCU read-side critical section the whole time.
-        *
-        * We therefore do a validation step should srcu_readers_active_idx()
-        * return zero.
-        */
-       if (srcu_readers_active_idx(sp, idx) != 0)
-               return false;
-
-       /*
-        * The remainder of this function is the validation step.
-        * The following smp_mb() D pairs with the smp_mb() C in
-        * __srcu_read_unlock().  If the __srcu_read_unlock() was seen
-        * by srcu_readers_active_idx() above, then any destructive
-        * operation performed after the grace period will happen after
-        * the corresponding SRCU read-side critical section.
-        *
-        * Note that there can be at most NR_CPUS worth of readers using
-        * the old index, which is not enough to overflow even a 32-bit
-        * integer.  (Yes, this does mean that systems having more than
-        * a billion or so CPUs need to be 64-bit systems.)  Therefore,
-        * the sum of the ->seq[] counters cannot possibly overflow.
-        * Therefore, the only way that the return values of the two
-        * calls to srcu_readers_seq_idx() can be equal is if there were
-        * no increments of the corresponding rank of ->seq[] counts
-        * in the interim.  But the missed-increment scenario laid out
-        * above includes an increment of the ->seq[] counter by
-        * the corresponding __srcu_read_lock().  Therefore, if this
-        * scenario occurs, the return values from the two calls to
-        * srcu_readers_seq_idx() will differ, and thus the validation
-        * step below suffices.
-        */
-       smp_mb(); /* D */
-
-       return srcu_readers_seq_idx(sp, idx) == seq;
-}
-
-/**
- * srcu_readers_active - returns approximate number of readers.
- * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
- *
- * Note that this is not an atomic primitive, and can therefore suffer
- * severe errors when invoked on an active srcu_struct.  That said, it
- * can be useful as an error check at cleanup time.
- */
-static int srcu_readers_active(struct srcu_struct *sp)
-{
-       int cpu;
-       unsigned long sum = 0;
-
-       for_each_possible_cpu(cpu) {
-               sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
-               sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
-       }
-       return sum;
-}
-
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
-{
-       if (WARN_ON(srcu_readers_active(sp)))
-               return; /* Leakage unless caller handles error. */
-       free_percpu(sp->per_cpu_ref);
-       sp->per_cpu_ref = NULL;
-}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-
-/*
- * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
- * Returns an index that must be passed to the matching srcu_read_unlock().
- */
-int __srcu_read_lock(struct srcu_struct *sp)
-{
-       int idx;
-
-       idx = ACCESS_ONCE(sp->completed) & 0x1;
-       preempt_disable();
-       ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
-       smp_mb(); /* B */  /* Avoid leaking the critical section. */
-       ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
-       preempt_enable();
-       return idx;
-}
-EXPORT_SYMBOL_GPL(__srcu_read_lock);
-
-/*
- * Removes the count for the old reader from the appropriate per-CPU
- * element of the srcu_struct.  Note that this may well be a different
- * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
- */
-void __srcu_read_unlock(struct srcu_struct *sp, int idx)
-{
-       smp_mb(); /* C */  /* Avoid leaking the critical section. */
-       this_cpu_dec(sp->per_cpu_ref->c[idx]);
-}
-EXPORT_SYMBOL_GPL(__srcu_read_unlock);
-
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited().  We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after 10 microseconds,
- * we repeatedly block for 1-millisecond time periods.  This approach
- * has done well in testing, so there is no need for a config parameter.
- */
-#define SRCU_RETRY_CHECK_DELAY         5
-#define SYNCHRONIZE_SRCU_TRYCOUNT      2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT  12
-
-/*
- * @@@ Wait until all pre-existing readers complete.  Such readers
- * will have used the index specified by "idx".
- * the caller should ensures the ->completed is not changed while checking
- * and idx = (->completed & 1) ^ 1
- */
-static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
-{
-       for (;;) {
-               if (srcu_readers_active_idx_check(sp, idx))
-                       return true;
-               if (--trycount <= 0)
-                       return false;
-               udelay(SRCU_RETRY_CHECK_DELAY);
-       }
-}
-
-/*
- * Increment the ->completed counter so that future SRCU readers will
- * use the other rank of the ->c[] and ->seq[] arrays.  This allows
- * us to wait for pre-existing readers in a starvation-free manner.
- */
-static void srcu_flip(struct srcu_struct *sp)
-{
-       sp->completed++;
-}
-
-/*
- * Enqueue an SRCU callback on the specified srcu_struct structure,
- * initiating grace-period processing if it is not already running.
- */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
-               void (*func)(struct rcu_head *head))
-{
-       unsigned long flags;
-
-       head->next = NULL;
-       head->func = func;
-       spin_lock_irqsave(&sp->queue_lock, flags);
-       rcu_batch_queue(&sp->batch_queue, head);
-       if (!sp->running) {
-               sp->running = true;
-               schedule_delayed_work(&sp->work, 0);
-       }
-       spin_unlock_irqrestore(&sp->queue_lock, flags);
-}
-EXPORT_SYMBOL_GPL(call_srcu);
-
-struct rcu_synchronize {
-       struct rcu_head head;
-       struct completion completion;
-};
-
-/*
- * Awaken the corresponding synchronize_srcu() instance now that a
- * grace period has elapsed.
- */
-static void wakeme_after_rcu(struct rcu_head *head)
-{
-       struct rcu_synchronize *rcu;
-
-       rcu = container_of(head, struct rcu_synchronize, head);
-       complete(&rcu->completion);
-}
-
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
-static void srcu_reschedule(struct srcu_struct *sp);
-
-/*
- * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
- */
-static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
-{
-       struct rcu_synchronize rcu;
-       struct rcu_head *head = &rcu.head;
-       bool done = false;
-
-       rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
-                          !lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
-
-       might_sleep();
-       init_completion(&rcu.completion);
-
-       head->next = NULL;
-       head->func = wakeme_after_rcu;
-       spin_lock_irq(&sp->queue_lock);
-       if (!sp->running) {
-               /* steal the processing owner */
-               sp->running = true;
-               rcu_batch_queue(&sp->batch_check0, head);
-               spin_unlock_irq(&sp->queue_lock);
-
-               srcu_advance_batches(sp, trycount);
-               if (!rcu_batch_empty(&sp->batch_done)) {
-                       BUG_ON(sp->batch_done.head != head);
-                       rcu_batch_dequeue(&sp->batch_done);
-                       done = true;
-               }
-               /* give the processing owner to work_struct */
-               srcu_reschedule(sp);
-       } else {
-               rcu_batch_queue(&sp->batch_queue, head);
-               spin_unlock_irq(&sp->queue_lock);
-       }
-
-       if (!done)
-               wait_for_completion(&rcu.completion);
-}
-
-/**
- * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for the count to drain to zero of both indexes. To avoid the
- * possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->completed & 1) ^ 1) to drain to zero at first,
- * and then flip the completed and wait for the count of the other index.
- *
- * Can block; must be called from process context.
- *
- * Note that it is illegal to call synchronize_srcu() from the corresponding
- * SRCU read-side critical section; doing so will result in deadlock.
- * However, it is perfectly legal to call synchronize_srcu() on one
- * srcu_struct from some other srcu_struct's read-side critical section.
- */
-void synchronize_srcu(struct srcu_struct *sp)
-{
-       __synchronize_srcu(sp, rcu_expedited
-                          ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
-                          : SYNCHRONIZE_SRCU_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu);
-
-/**
- * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for an SRCU grace period to elapse, but be more aggressive about
- * spinning rather than blocking when waiting.
- *
- * Note that it is also illegal to call synchronize_srcu_expedited()
- * from the corresponding SRCU read-side critical section;
- * doing so will result in deadlock.  However, it is perfectly legal
- * to call synchronize_srcu_expedited() on one srcu_struct from some
- * other srcu_struct's read-side critical section, as long as
- * the resulting graph of srcu_structs is acyclic.
- */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
-{
-       __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
-
-/**
- * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
- */
-void srcu_barrier(struct srcu_struct *sp)
-{
-       synchronize_srcu(sp);
-}
-EXPORT_SYMBOL_GPL(srcu_barrier);
-
-/**
- * srcu_batches_completed - return batches completed.
- * @sp: srcu_struct on which to report batch completion.
- *
- * Report the number of batches, correlated with, but not necessarily
- * precisely the same as, the number of grace periods that have elapsed.
- */
-long srcu_batches_completed(struct srcu_struct *sp)
-{
-       return sp->completed;
-}
-EXPORT_SYMBOL_GPL(srcu_batches_completed);
-
-#define SRCU_CALLBACK_BATCH    10
-#define SRCU_INTERVAL          1
-
-/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
-       if (!rcu_batch_empty(&sp->batch_queue)) {
-               spin_lock_irq(&sp->queue_lock);
-               rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
-               spin_unlock_irq(&sp->queue_lock);
-       }
-}
-
-/*
- * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
- * ->batch_check1 and then to ->batch_done as readers drain.
- */
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
-{
-       int idx = 1 ^ (sp->completed & 1);
-
-       /*
-        * Because readers might be delayed for an extended period after
-        * fetching ->completed for their index, at any point in time there
-        * might well be readers using both idx=0 and idx=1.  We therefore
-        * need to wait for readers to clear from both index values before
-        * invoking a callback.
-        */
-
-       if (rcu_batch_empty(&sp->batch_check0) &&
-           rcu_batch_empty(&sp->batch_check1))
-               return; /* no callbacks need to be advanced */
-
-       if (!try_check_zero(sp, idx, trycount))
-               return; /* failed to advance, will try after SRCU_INTERVAL */
-
-       /*
-        * The callbacks in ->batch_check1 have already done with their
-        * first zero check and flip back when they were enqueued on
-        * ->batch_check0 in a previous invocation of srcu_advance_batches().
-        * (Presumably try_check_zero() returned false during that
-        * invocation, leaving the callbacks stranded on ->batch_check1.)
-        * They are therefore ready to invoke, so move them to ->batch_done.
-        */
-       rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
-       if (rcu_batch_empty(&sp->batch_check0))
-               return; /* no callbacks need to be advanced */
-       srcu_flip(sp);
-
-       /*
-        * The callbacks in ->batch_check0 just finished their
-        * first check zero and flip, so move them to ->batch_check1
-        * for future checking on the other idx.
-        */
-       rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
-       /*
-        * SRCU read-side critical sections are normally short, so check
-        * at least twice in quick succession after a flip.
-        */
-       trycount = trycount < 2 ? 2 : trycount;
-       if (!try_check_zero(sp, idx^1, trycount))
-               return; /* failed to advance, will try after SRCU_INTERVAL */
-
-       /*
-        * The callbacks in ->batch_check1 have now waited for all
-        * pre-existing readers using both idx values.  They are therefore
-        * ready to invoke, so move them to ->batch_done.
-        */
-       rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-}
-
-/*
- * Invoke a limited number of SRCU callbacks that have passed through
- * their grace period.  If there are more to do, SRCU will reschedule
- * the workqueue.
- */
-static void srcu_invoke_callbacks(struct srcu_struct *sp)
-{
-       int i;
-       struct rcu_head *head;
-
-       for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
-               head = rcu_batch_dequeue(&sp->batch_done);
-               if (!head)
-                       break;
-               local_bh_disable();
-               head->func(head);
-               local_bh_enable();
-       }
-}
-
-/*
- * Finished one round of SRCU grace period.  Start another if there are
- * more SRCU callbacks queued, otherwise put SRCU into not-running state.
- */
-static void srcu_reschedule(struct srcu_struct *sp)
-{
-       bool pending = true;
-
-       if (rcu_batch_empty(&sp->batch_done) &&
-           rcu_batch_empty(&sp->batch_check1) &&
-           rcu_batch_empty(&sp->batch_check0) &&
-           rcu_batch_empty(&sp->batch_queue)) {
-               spin_lock_irq(&sp->queue_lock);
-               if (rcu_batch_empty(&sp->batch_done) &&
-                   rcu_batch_empty(&sp->batch_check1) &&
-                   rcu_batch_empty(&sp->batch_check0) &&
-                   rcu_batch_empty(&sp->batch_queue)) {
-                       sp->running = false;
-                       pending = false;
-               }
-               spin_unlock_irq(&sp->queue_lock);
-       }
-
-       if (pending)
-               schedule_delayed_work(&sp->work, SRCU_INTERVAL);
-}
-
-/*
- * This is the work-queue function that handles SRCU grace periods.
- */
-void process_srcu(struct work_struct *work)
-{
-       struct srcu_struct *sp;
-
-       sp = container_of(work, struct srcu_struct, work.work);
-
-       srcu_collect_new(sp);
-       srcu_advance_batches(sp, 1);
-       srcu_invoke_callbacks(sp);
-       srcu_reschedule(sp);
-}
-EXPORT_SYMBOL_GPL(process_srcu);
index a159e1fd2013fecd2c8c16f50e520278a587c1b8..339c003314f4a05daf1e8993fbfd76ba1f11d4ab 100644 (file)
@@ -969,9 +969,10 @@ static struct ctl_table kern_table[] = {
        {
                .procname       = "hung_task_check_count",
                .data           = &sysctl_hung_task_check_count,
-               .maxlen         = sizeof(unsigned long),
+               .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = proc_doulongvec_minmax,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
        },
        {
                .procname       = "hung_task_timeout_secs",
index a685c8a79578b274cb361d442b953507e58d7d63..d16fa295ae1dbd0d43f1303b7796086cd968344c 100644 (file)
@@ -577,7 +577,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
                miter->__offset += miter->consumed;
                miter->__remaining -= miter->consumed;
 
-               if (miter->__flags & SG_MITER_TO_SG)
+               if ((miter->__flags & SG_MITER_TO_SG) &&
+                   !PageSlab(miter->page))
                        flush_kernel_dcache_page(miter->page);
 
                if (miter->__flags & SG_MITER_ATOMIC) {
index 497ec33ff22d6de772e761c6d13394480b266dc6..13b9d0f221b8460ae3c361dd51d22ae969e78325 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
+#include <linux/lockdep.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -2046,6 +2047,12 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
        return total;
 }
 
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map memcg_oom_lock_dep_map = {
+       .name = "memcg_oom_lock",
+};
+#endif
+
 static DEFINE_SPINLOCK(memcg_oom_lock);
 
 /*
@@ -2083,7 +2090,8 @@ static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
                        }
                        iter->oom_lock = false;
                }
-       }
+       } else
+               mutex_acquire(&memcg_oom_lock_dep_map, 0, 1, _RET_IP_);
 
        spin_unlock(&memcg_oom_lock);
 
@@ -2095,6 +2103,7 @@ static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
        struct mem_cgroup *iter;
 
        spin_lock(&memcg_oom_lock);
+       mutex_release(&memcg_oom_lock_dep_map, 1, _RET_IP_);
        for_each_mem_cgroup_tree(iter, memcg)
                iter->oom_lock = false;
        spin_unlock(&memcg_oom_lock);
@@ -2765,10 +2774,10 @@ done:
        *ptr = memcg;
        return 0;
 nomem:
-       *ptr = NULL;
-       if (gfp_mask & __GFP_NOFAIL)
-               return 0;
-       return -ENOMEM;
+       if (!(gfp_mask & __GFP_NOFAIL)) {
+               *ptr = NULL;
+               return -ENOMEM;
+       }
 bypass:
        *ptr = root_mem_cgroup;
        return -EINTR;
@@ -3773,7 +3782,6 @@ void mem_cgroup_move_account_page_stat(struct mem_cgroup *from,
 {
        /* Update stat data for mem_cgroup */
        preempt_disable();
-       WARN_ON_ONCE(from->stat->count[idx] < nr_pages);
        __this_cpu_sub(from->stat->count[idx], nr_pages);
        __this_cpu_add(to->stat->count[idx], nr_pages);
        preempt_enable();
@@ -4950,31 +4958,18 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
        } while (usage > 0);
 }
 
-/*
- * This mainly exists for tests during the setting of set of use_hierarchy.
- * Since this is the very setting we are changing, the current hierarchy value
- * is meaningless
- */
-static inline bool __memcg_has_children(struct mem_cgroup *memcg)
-{
-       struct cgroup_subsys_state *pos;
-
-       /* bounce at first found */
-       css_for_each_child(pos, &memcg->css)
-               return true;
-       return false;
-}
-
-/*
- * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed
- * to be already dead (as in mem_cgroup_force_empty, for instance).  This is
- * from mem_cgroup_count_children(), in the sense that we don't really care how
- * many children we have; we only need to know if we have any.  It also counts
- * any memcg without hierarchy as infertile.
- */
 static inline bool memcg_has_children(struct mem_cgroup *memcg)
 {
-       return memcg->use_hierarchy && __memcg_has_children(memcg);
+       lockdep_assert_held(&memcg_create_mutex);
+       /*
+        * The lock does not prevent addition or deletion to the list
+        * of children, but it prevents a new child from being
+        * initialized based on this parent in css_online(), so it's
+        * enough to decide whether hierarchically inherited
+        * attributes can still be changed or not.
+        */
+       return memcg->use_hierarchy &&
+               !list_empty(&memcg->css.cgroup->children);
 }
 
 /*
@@ -5054,7 +5049,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
         */
        if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
                                (val == 1 || val == 0)) {
-               if (!__memcg_has_children(memcg))
+               if (list_empty(&memcg->css.cgroup->children))
                        memcg->use_hierarchy = val;
                else
                        retval = -EBUSY;
index 487ac6f37ca23ce2d1e832fa177d74d4544386bf..9a11f9f799f499f2d34d1dea3dec48feb36db00f 100644 (file)
@@ -55,6 +55,7 @@ static struct sym_entry *table;
 static unsigned int table_size, table_cnt;
 static int all_symbols = 0;
 static char symbol_prefix_char = '\0';
+static unsigned long long kernel_start_addr = 0;
 
 int token_profit[0x10000];
 
@@ -65,7 +66,10 @@ unsigned char best_table_len[256];
 
 static void usage(void)
 {
-       fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
+       fprintf(stderr, "Usage: kallsyms [--all-symbols] "
+                       "[--symbol-prefix=<prefix char>] "
+                       "[--page-offset=<CONFIG_PAGE_OFFSET>] "
+                       "< in.map > out.S\n");
        exit(1);
 }
 
@@ -194,6 +198,9 @@ static int symbol_valid(struct sym_entry *s)
        int i;
        int offset = 1;
 
+       if (s->addr < kernel_start_addr)
+               return 0;
+
        /* skip prefix char */
        if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
                offset++;
@@ -646,6 +653,9 @@ int main(int argc, char **argv)
                                if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
                                        p++;
                                symbol_prefix_char = *p;
+                       } else if (strncmp(argv[i], "--page-offset=", 14) == 0) {
+                               const char *p = &argv[i][14];
+                               kernel_start_addr = strtoull(p, NULL, 16);
                        } else
                                usage();
                }
index 014994936b1c2152c82f793ce646342159d78eb5..32b10f53d0b4cbad76b13ef86d547ad864ae19c2 100644 (file)
@@ -82,6 +82,8 @@ kallsyms()
                kallsymopt="${kallsymopt} --all-symbols"
        fi
 
+       kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET"
+
        local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL}               \
                      ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}"
 
index 17f45e8aa89cd49561cce6c40cbc174f416d9c49..e1e9e0c999fefa854a507e623b821371ebf00b78 100644 (file)
@@ -49,6 +49,8 @@ static struct snd_pcm *snd_pcm_get(struct snd_card *card, int device)
        struct snd_pcm *pcm;
 
        list_for_each_entry(pcm, &snd_pcm_devices, list) {
+               if (pcm->internal)
+                       continue;
                if (pcm->card == card && pcm->device == device)
                        return pcm;
        }
@@ -60,6 +62,8 @@ static int snd_pcm_next(struct snd_card *card, int device)
        struct snd_pcm *pcm;
 
        list_for_each_entry(pcm, &snd_pcm_devices, list) {
+               if (pcm->internal)
+                       continue;
                if (pcm->card == card && pcm->device > device)
                        return pcm->device;
                else if (pcm->card->number > card->number)
index bf313bea70858f8a4abd18ef6c60beca6668fea1..8ad554312b69196de4d9cf056024c2e8f07907bd 100644 (file)
@@ -4623,6 +4623,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
        SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4),
+       SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_ASUS_MODE4),
        SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),
        SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2),
        SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
index 8b50e5958de5a43030ac854d18c046cf903e5841..01daf655e20b1663fbb92179283dea25ac209ac9 100644 (file)
@@ -530,6 +530,7 @@ static int hp_supply_event(struct snd_soc_dapm_widget *w,
                                hubs->hp_startup_mode);
                        break;
                }
+               break;
 
        case SND_SOC_DAPM_PRE_PMD:
                snd_soc_update_bits(codec, WM8993_CHARGE_PUMP_1,
index c17c14c394df88bb0442ccaf51b5735f05455ab5..b2949aed1ac2e9bfd374dff8b0a2483ecf19ddf4 100644 (file)
@@ -1949,7 +1949,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file,
                                w->active ? "active" : "inactive");
 
        list_for_each_entry(p, &w->sources, list_sink) {
-               if (p->connected && !p->connected(w, p->sink))
+               if (p->connected && !p->connected(w, p->source))
                        continue;
 
                if (p->connect)
@@ -3495,6 +3495,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm,
                if (!w) {
                        dev_err(dapm->dev, "ASoC: Failed to create %s widget\n",
                                dai->driver->playback.stream_name);
+                       return -ENOMEM;
                }
 
                w->priv = dai;
@@ -3513,6 +3514,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm,
                if (!w) {
                        dev_err(dapm->dev, "ASoC: Failed to create %s widget\n",
                                dai->driver->capture.stream_name);
+                       return -ENOMEM;
                }
 
                w->priv = dai;