Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)
diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt

new file mode 100644 (file)

index 0000000..f4faec0
--- /dev/null
+++ b/Documentation/assoc_array.txt
@@ -0,0 +1,574 @@
+                  ========================================
+                  GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
+                  ========================================
+
+Contents:
+
+ - Overview.
+
+ - The public API.
+   - Edit script.
+   - Operations table.
+   - Manipulation functions.
+   - Access functions.
+   - Index key form.
+
+ - Internal workings.
+   - Basic internal tree layout.
+   - Shortcuts.
+   - Splitting and collapsing nodes.
+   - Non-recursive iteration.
+   - Simultaneous alteration and iteration.
+
+
+========
+OVERVIEW
+========
+
+This associative array implementation is an object container with the following
+properties:
+
+ (1) Objects are opaque pointers.  The implementation does not care where they
+     point (if anywhere) or what they point to (if anything).
+
+     [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
+
+ (2) Objects do not need to contain linkage blocks for use by the array.  This
+     permits an object to be located in multiple arrays simultaneously.
+     Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects require index keys to locate them within the array.
+
+ (4) Index keys must be unique.  Inserting an object with the same key as one
+     already in the array will replace the old object.
+
+ (5) Index keys can be of any length and can be of different lengths.
+
+ (6) Index keys should encode the length early on, before any variation due to
+     length is seen.
+
+ (7) Index keys can include a hash to scatter objects throughout the array.
+
+ (8) The array can iterated over.  The objects will not necessarily come out in
+     key order.
+
+ (9) The array can be iterated over whilst it is being modified, provided the
+     RCU readlock is being held by the iterator.  Note, however, under these
+     circumstances, some objects may be seen more than once.  If this is a
+     problem, the iterator should lock against modification.  Objects will not
+     be missed, however, unless deleted.
+
+(10) Objects in the array can be looked up by means of their index key.
+
+(11) Objects can be looked up whilst the array is being modified, provided the
+     RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key in the same manner as in a radix
+tree.  To improve memory efficiency, shortcuts can be emplaced to skip over
+what would otherwise be a series of single-occupancy nodes.  Further, nodes
+pack leaf object pointers into spare space in the node rather than making an
+extra branch until as such time an object needs to be added to a full node.
+
+
+==============
+THE PUBLIC API
+==============
+
+The public API can be found in <linux/assoc_array.h>.  The associative array is
+rooted on the following structure:
+
+       struct assoc_array {
+               ...
+       };
+
+The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
+
+
+EDIT SCRIPT
+-----------
+
+The insertion and deletion functions produce an 'edit script' that can later be
+applied to effect the changes without risking ENOMEM.  This retains the
+preallocated metadata blocks that will be installed in the internal tree and
+keeps track of the metadata blocks that will be removed from the tree when the
+script is applied.
+
+This is also used to keep track of dead blocks and dead objects after the
+script has been applied so that they can be freed later.  The freeing is done
+after an RCU grace period has passed - thus allowing access functions to
+proceed under the RCU read lock.
+
+The script appears as outside of the API as a pointer of the type:
+
+       struct assoc_array_edit;
+
+There are two functions for dealing with the script:
+
+ (1) Apply an edit script.
+
+       void assoc_array_apply_edit(struct assoc_array_edit *edit);
+
+     This will perform the edit functions, interpolating various write barriers
+     to permit accesses under the RCU read lock to continue.  The edit script
+     will then be passed to call_rcu() to free it and any dead stuff it points
+     to.
+
+ (2) Cancel an edit script.
+
+       void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+
+     This frees the edit script and all preallocated memory immediately.  If
+     this was for insertion, the new object is _not_ released by this function,
+     but must rather be released by the caller.
+
+These functions are guaranteed not to fail.
+
+
+OPERATIONS TABLE
+----------------
+
+Various functions take a table of operations:
+
+       struct assoc_array_ops {
+               ...
+       };
+
+This points to a number of methods, all of which need to be provided:
+
+ (1) Get a chunk of index key from caller data:
+
+       unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+     This should return a chunk of caller-supplied index key starting at the
+     *bit* position given by the level argument.  The level argument will be a
+     multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
+     ASSOC_ARRAY_KEY_CHUNK_SIZE bits.  No error is possible.
+
+
+ (2) Get a chunk of an object's index key.
+
+       unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+     As the previous function, but gets its data from an object in the array
+     rather than from a caller-supplied index key.
+
+
+ (3) See if this is the object we're looking for.
+
+       bool (*compare_object)(const void *object, const void *index_key);
+
+     Compare the object against an index key and return true if it matches and
+     false if it doesn't.
+
+
+ (4) Diff the index keys of two objects.
+
+       int (*diff_objects)(const void *a, const void *b);
+
+     Return the bit position at which the index keys of two objects differ or
+     -1 if they are the same.
+
+
+ (5) Free an object.
+
+       void (*free_object)(void *object);
+
+     Free the specified object.  Note that this may be called an RCU grace
+     period after assoc_array_apply_edit() was called, so synchronize_rcu() may
+     be necessary on module unloading.
+
+
+MANIPULATION FUNCTIONS
+----------------------
+
+There are a number of functions for manipulating an associative array:
+
+ (1) Initialise an associative array.
+
+       void assoc_array_init(struct assoc_array *array);
+
+     This initialises the base structure for an associative array.  It can't
+     fail.
+
+
+ (2) Insert/replace an object in an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_insert(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          const void *index_key,
+                          void *object);
+
+     This inserts the given object into the array.  Note that the least
+     significant bit of the pointer must be zero as it's used to type-mark
+     pointers internally.
+
+     If an object already exists for that key then it will be replaced with the
+     new object and the old one will be freed automatically.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (3) Delete an object from an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_delete(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          const void *index_key);
+
+     This deletes an object that matches the specified data from the array.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.  NULL will be returned if the specified object is
+     not found within the array.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (4) Delete all objects from an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_clear(struct assoc_array *array,
+                         const struct assoc_array_ops *ops);
+
+     This deletes all the objects from an associative array and leaves it
+     completely empty.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (5) Destroy an associative array, deleting all objects.
+
+       void assoc_array_destroy(struct assoc_array *array,
+                                const struct assoc_array_ops *ops);
+
+     This destroys the contents of the associative array and leaves it
+     completely empty.  It is not permitted for another thread to be traversing
+     the array under the RCU read lock at the same time as this function is
+     destroying it as no RCU deferral is performed on memory release -
+     something that would require memory to be allocated.
+
+     The caller should lock exclusively against other modifiers and accessors
+     of the array.
+
+
+ (6) Garbage collect an associative array.
+
+       int assoc_array_gc(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          bool (*iterator)(void *object, void *iterator_data),
+                          void *iterator_data);
+
+     This iterates over the objects in an associative array and passes each one
+     to iterator().  If iterator() returns true, the object is kept.  If it
+     returns false, the object will be freed.  If the iterator() function
+     returns true, it must perform any appropriate refcount incrementing on the
+     object before returning.
+
+     The internal tree will be packed down if possible as part of the iteration
+     to reduce the number of nodes in it.
+
+     The iterator_data is passed directly to iterator() and is otherwise
+     ignored by the function.
+
+     The function will return 0 if successful and -ENOMEM if there wasn't
+     enough memory.
+
+     It is possible for other threads to iterate over or search the array under
+     the RCU read lock whilst this function is in progress.  The caller should
+     lock exclusively against other modifiers of the array.
+
+
+ACCESS FUNCTIONS
+----------------
+
+There are two functions for accessing an associative array:
+
+ (1) Iterate over all the objects in an associative array.
+
+       int assoc_array_iterate(const struct assoc_array *array,
+                               int (*iterator)(const void *object,
+                                               void *iterator_data),
+                               void *iterator_data);
+
+     This passes each object in the array to the iterator callback function.
+     iterator_data is private data for that function.
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.  Under such circumstances,
+     it is possible for the iteration function to see some objects twice.  If
+     this is a problem, then modification should be locked against.  The
+     iteration algorithm should not, however, miss any objects.
+
+     The function will return 0 if no objects were in the array or else it will
+     return the result of the last iterator function called.  Iteration stops
+     immediately if any call to the iteration function results in a non-zero
+     return.
+
+
+ (2) Find an object in an associative array.
+
+       void *assoc_array_find(const struct assoc_array *array,
+                              const struct assoc_array_ops *ops,
+                              const void *index_key);
+
+     This walks through the array's internal tree directly to the object
+     specified by the index key..
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.
+
+     The function will return the object if found (and set *_type to the object
+     type) or will return NULL if the object was not found.
+
+
+INDEX KEY FORM
+--------------
+
+The index key can be of any form, but since the algorithms aren't told how long
+the key is, it is strongly recommended that the index key includes its length
+very early on before any variation due to the length would have an effect on
+comparisons.
+
+This will cause leaves with different length keys to scatter away from each
+other - and those with the same length keys to cluster together.
+
+It is also recommended that the index key begin with a hash of the rest of the
+key to maximise scattering throughout keyspace.
+
+The better the scattering, the wider and lower the internal tree will be.
+
+Poor scattering isn't too much of a problem as there are shortcuts and nodes
+can contain mixtures of leaves and metadata pointers.
+
+The index key is read in chunks of machine word.  Each chunk is subdivided into
+one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
+on a 64-bit CPU, 16 levels.  Unless the scattering is really poor, it is
+unlikely that more than one word of any particular index key will have to be
+used.
+
+
+=================
+INTERNAL WORKINGS
+=================
+
+The associative array data structure has an internal tree.  This tree is
+constructed of two types of metadata blocks: nodes and shortcuts.
+
+A node is an array of slots.  Each slot can contain one of four things:
+
+ (*) A NULL pointer, indicating that the slot is empty.
+
+ (*) A pointer to an object (a leaf).
+
+ (*) A pointer to a node at the next level.
+
+ (*) A pointer to a shortcut.
+
+
+BASIC INTERNAL TREE LAYOUT
+--------------------------
+
+Ignoring shortcuts for the moment, the nodes form a multilevel tree.  The index
+key space is strictly subdivided by the nodes in the tree and nodes occur on
+fixed levels.  For example:
+
+ Level:        0               1               2               3
+       =============== =============== =============== ===============
+                                                       NODE D
+                       NODE B          NODE C  +------>+---+
+               +------>+---+   +------>+---+   |       | 0 |
+       NODE A  |       | 0 |   |       | 0 |   |       +---+
+       +---+   |       +---+   |       +---+   |       :   :
+       | 0 |   |       :   :   |       :   :   |       +---+
+       +---+   |       +---+   |       +---+   |       | f |
+       | 1 |---+       | 3 |---+       | 7 |---+       +---+
+       +---+           +---+           +---+
+       :   :           :   :           | 8 |---+
+       +---+           +---+           +---+   |       NODE E
+       | e |---+       | f |           :   :   +------>+---+
+       +---+   |       +---+           +---+           | 0 |
+       | f |   |                       | f |           +---+
+       +---+   |                       +---+           :   :
+               |       NODE F                          +---+
+               +------>+---+                           | f |
+                       | 0 |           NODE G          +---+
+                       +---+   +------>+---+
+                       :   :   |       | 0 |
+                       +---+   |       +---+
+                       | 6 |---+       :   :
+                       +---+           +---+
+                       :   :           | f |
+                       +---+           +---+
+                       | f |
+                       +---+
+
+In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
+Assuming no other meta data nodes in the tree, the key space is divided thusly:
+
+       KEY PREFIX      NODE
+       ==========      ====
+       137*            D
+       138*            E
+       13[0-69-f]*     C
+       1[0-24-f]*      B
+       e6*             G
+       e[0-57-f]*      F
+       [02-df]*        A
+
+So, for instance, keys with the following example index keys will be found in
+the appropriate nodes:
+
+       INDEX KEY       PREFIX  NODE
+       =============== ======= ====
+       13694892892489  13      C
+       13795289025897  137     D
+       13889dde88793   138     E
+       138bbb89003093  138     E
+       1394879524789   12      C
+       1458952489      1       B
+       9431809de993ba  -       A
+       b4542910809cd   -       A
+       e5284310def98   e       F
+       e68428974237    e6      G
+       e7fffcbd443     e       F
+       f3842239082     -       A
+
+To save memory, if a node can hold all the leaves in its portion of keyspace,
+then the node will have all those leaves in it and will not have any metadata
+pointers - even if some of those leaves would like to be in the same slot.
+
+A node can contain a heterogeneous mix of leaves and metadata pointers.
+Metadata pointers must be in the slots that match their subdivisions of key
+space.  The leaves can be in any slot not occupied by a metadata pointer.  It
+is guaranteed that none of the leaves in a node will match a slot occupied by a
+metadata pointer.  If the metadata pointer is there, any leaf whose key matches
+the metadata key prefix must be in the subtree that the metadata pointer points
+to.
+
+In the above example list of index keys, node A will contain:
+
+       SLOT    CONTENT         INDEX KEY (PREFIX)
+       ====    =============== ==================
+       1       PTR TO NODE B   1*
+       any     LEAF            9431809de993ba
+       any     LEAF            b4542910809cd
+       e       PTR TO NODE F   e*
+       any     LEAF            f3842239082
+
+and node B:
+
+       3       PTR TO NODE C   13*
+       any     LEAF            1458952489
+
+
+SHORTCUTS
+---------
+
+Shortcuts are metadata records that jump over a piece of keyspace.  A shortcut
+is a replacement for a series of single-occupancy nodes ascending through the
+levels.  Shortcuts exist to save memory and to speed up traversal.
+
+It is possible for the root of the tree to be a shortcut - say, for example,
+the tree contains at least 17 nodes all with key prefix '1111'.  The insertion
+algorithm will insert a shortcut to skip over the '1111' keyspace in a single
+bound and get to the fourth level where these actually become different.
+
+
+SPLITTING AND COLLAPSING NODES
+------------------------------
+
+Each node has a maximum capacity of 16 leaves and metadata pointers.  If the
+insertion algorithm finds that it is trying to insert a 17th object into a
+node, that node will be split such that at least two leaves that have a common
+key segment at that level end up in a separate node rooted on that slot for
+that common key segment.
+
+If the leaves in a full node and the leaf that is being inserted are
+sufficiently similar, then a shortcut will be inserted into the tree.
+
+When the number of objects in the subtree rooted at a node falls to 16 or
+fewer, then the subtree will be collapsed down to a single node - and this will
+ripple towards the root if possible.
+
+
+NON-RECURSIVE ITERATION
+-----------------------
+
+Each node and shortcut contains a back pointer to its parent and the number of
+slot in that parent that points to it.  None-recursive iteration uses these to
+proceed rootwards through the tree, going to the parent node, slot N + 1 to
+make sure progress is made without the need for a stack.
+
+The backpointers, however, make simultaneous alteration and iteration tricky.
+
+
+SIMULTANEOUS ALTERATION AND ITERATION
+-------------------------------------
+
+There are a number of cases to consider:
+
+ (1) Simple insert/replace.  This involves simply replacing a NULL or old
+     matching leaf pointer with the pointer to the new leaf after a barrier.
+     The metadata blocks don't change otherwise.  An old leaf won't be freed
+     until after the RCU grace period.
+
+ (2) Simple delete.  This involves just clearing an old matching leaf.  The
+     metadata blocks don't change otherwise.  The old leaf won't be freed until
+     after the RCU grace period.
+
+ (3) Insertion replacing part of a subtree that we haven't yet entered.  This
+     may involve replacement of part of that subtree - but that won't affect
+     the iteration as we won't have reached the pointer to it yet and the
+     ancestry blocks are not replaced (the layout of those does not change).
+
+ (4) Insertion replacing nodes that we're actively processing.  This isn't a
+     problem as we've passed the anchoring pointer and won't switch onto the
+     new layout until we follow the back pointers - at which point we've
+     already examined the leaves in the replaced node (we iterate over all the
+     leaves in a node before following any of its metadata pointers).
+
+     We might, however, re-see some leaves that have been split out into a new
+     branch that's in a slot further along than we were at.
+
+ (5) Insertion replacing nodes that we're processing a dependent branch of.
+     This won't affect us until we follow the back pointers.  Similar to (4).
+
+ (6) Deletion collapsing a branch under us.  This doesn't affect us because the
+     back pointers will get us back to the parent of the new node before we
+     could see the new node.  The entire collapsed subtree is thrown away
+     unchanged - and will still be rooted on the same slot, so we shouldn't
+     process it a second time as we'll go back to slot + 1.
+
+Note:
+
+ (*) Under some circumstances, we need to simultaneously change the parent
+     pointer and the parent slot pointer on a node (say, for example, we
+     inserted another node before it and moved it up a level).  We cannot do
+     this without locking against a read - so we have to replace that node too.
+
+     However, when we're changing a shortcut into a node this isn't a problem
+     as shortcuts only have one slot and so the parent slot number isn't used
+     when traversing backwards over one.  This means that it's okay to change
+     the slot number first - provided suitable barriers are used to make sure
+     the parent slot number is read after the back pointer.
+
+Obsolete blocks and leaves are freed up after an RCU grace period has passed,
+so as long as anyone doing walking or iteration holds the RCU read lock, the
+old superstructure should not go away on them.
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt

index e1f343c7a34b7b10ea462a39b5e9a320ef463ba6..f69bcf5a6343bf314b5eef199999c5a676131e74 100644 (file)
--- a/Documentation/devicetree/bindings/dma/atmel-dma.txt
+++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt
@@ -28,7 +28,7 @@ The three cells in order are:
  dependent:
    - bit 7-0: peripheral identifier for the hardware handshaking interface. The
    identifier can be different for tx and rx.
-  - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 1 for ASAP.
+  - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP.
  
  Example:
  
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt

index ad6a73852f0880bf3625893d6ffc11646bdc8048..f1fb26eed0e9486f7226f58c6b14fce571c2f371 100644 (file)
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -15,6 +15,7 @@ adi,adt7461           +/-1C TDM Extended Temp Range I.C
  adt7461                        +/-1C TDM Extended Temp Range I.C
  at,24c08               i2c serial eeprom  (24cxx)
  atmel,24c02            i2c serial eeprom  (24cxx)
+atmel,at97sc3204t      i2c trusted platform module (TPM)
  catalyst,24c32         i2c serial eeprom
  dallas,ds1307          64 x 8, Serial, I2C Real-Time Clock
  dallas,ds1338          I2C RTC with 56-Byte NV RAM
@@ -44,6 +45,7 @@ mc,rv3029c2           Real Time Clock Module with I2C-Bus
  national,lm75          I2C TEMP SENSOR
  national,lm80          Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
  national,lm92          ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface
+nuvoton,npct501                i2c trusted platform module (TPM)
  nxp,pca9556            Octal SMBus and I2C registered interface
  nxp,pca9557            8-bit I2C-bus and SMBus I/O port with reset
  nxp,pcf8563            Real-time clock/calendar
@@ -61,3 +63,4 @@ taos,tsl2550          Ambient Light Sensor with SMBUS/Two Wire Serial Interface
  ti,tsc2003             I2C Touch-Screen Controller
  ti,tmp102              Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
  ti,tmp275              Digital Temperature Sensor
+winbond,wpct301                i2c trusted platform module (TPM)
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt

index 2a4b4bce6110af59579c6f29e36fb8bcd71f7428..7fc1b010fa759a9cb69ab286b4ca032d295f0046 100644 (file)
--- a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
@@ -1,33 +1,30 @@
-* Freescale 83xx DMA Controller
+* Freescale DMA Controllers
  
-Freescale PowerPC 83xx have on chip general purpose DMA controllers.
+** Freescale Elo DMA Controller
+   This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx
+   series chips such as mpc8315, mpc8349, mpc8379 etc.
  
  Required properties:
  
-- compatible        : compatible list, contains 2 entries, first is
-                "fsl,CHIP-dma", where CHIP is the processor
-                (mpc8349, mpc8360, etc.) and the second is
-                "fsl,elo-dma"
-- reg               : <registers mapping for DMA general status reg>
-- ranges               : Should be defined as specified in 1) to describe the
-                 DMA controller channels.
+- compatible        : must include "fsl,elo-dma"
+- reg               : DMA General Status Register, i.e. DGSR which contains
+                      status for all the 4 DMA channels
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
  - cell-index        : controller index.  0 for controller @ 0x8100
-- interrupts        : <interrupt mapping for DMA IRQ>
+- interrupts        : interrupt specifier for DMA IRQ
  - interrupt-parent  : optional, if needed for interrupt mapping
  
-
  - DMA channel nodes:
-        - compatible        : compatible list, contains 2 entries, first is
-                        "fsl,CHIP-dma-channel", where CHIP is the processor
-                        (mpc8349, mpc8350, etc.) and the second is
-                        "fsl,elo-dma-channel". However, see note below.
-        - reg               : <registers mapping for channel>
-        - cell-index        : dma channel index starts at 0.
+        - compatible        : must include "fsl,elo-dma-channel"
+                              However, see note below.
+        - reg               : DMA channel specific registers
+        - cell-index        : DMA channel index starts at 0.
  
  Optional properties:
-        - interrupts        : <interrupt mapping for DMA channel IRQ>
-                         (on 83xx this is expected to be identical to
-                          the interrupts property of the parent node)
+        - interrupts        : interrupt specifier for DMA channel IRQ
+                              (on 83xx this is expected to be identical to
+                              the interrupts property of the parent node)
          - interrupt-parent  : optional, if needed for interrupt mapping
  
  Example:
@@ -70,30 +67,27 @@ Example:
                 };
         };
  
-* Freescale 85xx/86xx DMA Controller
-
-Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers.
+** Freescale EloPlus DMA Controller
+   This is a 4-channel DMA controller with extended addresses and chaining,
+   mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as
+   mpc8540, mpc8641 p4080, bsc9131 etc.
  
  Required properties:
  
-- compatible        : compatible list, contains 2 entries, first is
-                "fsl,CHIP-dma", where CHIP is the processor
-                (mpc8540, mpc8540, etc.) and the second is
-                "fsl,eloplus-dma"
-- reg               : <registers mapping for DMA general status reg>
+- compatible        : must include "fsl,eloplus-dma"
+- reg               : DMA General Status Register, i.e. DGSR which contains
+                      status for all the 4 DMA channels
  - cell-index        : controller index.  0 for controller @ 0x21000,
                                           1 for controller @ 0xc000
-- ranges               : Should be defined as specified in 1) to describe the
-                 DMA controller channels.
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
  
  - DMA channel nodes:
-        - compatible        : compatible list, contains 2 entries, first is
-                        "fsl,CHIP-dma-channel", where CHIP is the processor
-                        (mpc8540, mpc8560, etc.) and the second is
-                        "fsl,eloplus-dma-channel". However, see note below.
-        - cell-index        : dma channel index starts at 0.
-        - reg               : <registers mapping for channel>
-        - interrupts        : <interrupt mapping for DMA channel IRQ>
+        - compatible        : must include "fsl,eloplus-dma-channel"
+                              However, see note below.
+        - cell-index        : DMA channel index starts at 0.
+        - reg               : DMA channel specific registers
+        - interrupts        : interrupt specifier for DMA channel IRQ
          - interrupt-parent  : optional, if needed for interrupt mapping
  
  Example:
@@ -134,6 +128,76 @@ Example:
                 };
         };
  
+** Freescale Elo3 DMA Controller
+   DMA controller which has same function as EloPlus except that Elo3 has 8
+   channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx
+   series chips, such as t1040, t4240, b4860.
+
+Required properties:
+
+- compatible        : must include "fsl,elo3-dma"
+- reg               : contains two entries for DMA General Status Registers,
+                      i.e. DGSR0 which includes status for channel 1~4, and
+                      DGSR1 for channel 5~8
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
+
+- DMA channel nodes:
+        - compatible        : must include "fsl,eloplus-dma-channel"
+        - reg               : DMA channel specific registers
+        - interrupts        : interrupt specifier for DMA channel IRQ
+        - interrupt-parent  : optional, if needed for interrupt mapping
+
+Example:
+dma@100300 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,elo3-dma";
+       reg = <0x100300 0x4>,
+             <0x100600 0x4>;
+       ranges = <0x0 0x100100 0x500>;
+       dma-channel@0 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x0 0x80>;
+               interrupts = <28 2 0 0>;
+       };
+       dma-channel@80 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x80 0x80>;
+               interrupts = <29 2 0 0>;
+       };
+       dma-channel@100 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x100 0x80>;
+               interrupts = <30 2 0 0>;
+       };
+       dma-channel@180 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x180 0x80>;
+               interrupts = <31 2 0 0>;
+       };
+       dma-channel@300 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x300 0x80>;
+               interrupts = <76 2 0 0>;
+       };
+       dma-channel@380 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x380 0x80>;
+               interrupts = <77 2 0 0>;
+       };
+       dma-channel@400 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x400 0x80>;
+               interrupts = <78 2 0 0>;
+       };
+       dma-channel@480 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x480 0x80>;
+               interrupts = <79 2 0 0>;
+       };
+};
+
  Note on DMA channel compatible properties: The compatible property must say
  "fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA
  driver (fsldma).  Any DMA channel used by fsldma cannot be used by another
diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt

index a2b5663eae266d2dcae8fcf9400ef9eb16b24709..dd77a81bdb80b82b5c732ceb33ba2ef30b1d9eea 100644 (file)
--- a/Documentation/dmatest.txt
+++ b/Documentation/dmatest.txt
@@ -15,39 +15,48 @@ be built as module or inside kernel. Let's consider those cases.
  
         Part 2 - When dmatest is built as a module...
  
-After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
-folder with nodes will be created. There are two important files located. First
-is the 'run' node that controls run and stop phases of the test, and the second
-one, 'results', is used to get the test case results.
-
-Note that in this case test will not run on load automatically.
-
  Example of usage:
+       % modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1
+
+...or:
+       % modprobe dmatest
         % echo dma0chan0 > /sys/module/dmatest/parameters/channel
         % echo 2000 > /sys/module/dmatest/parameters/timeout
         % echo 1 > /sys/module/dmatest/parameters/iterations
-       % echo 1 > /sys/kernel/debug/dmatest/run
+       % echo 1 > /sys/module/dmatest/parameters/run
+
+...or on the kernel command line:
+
+       dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1
  
  Hint: available channel list could be extracted by running the following
  command:
         % ls -1 /sys/class/dma/
  
-After a while you will start to get messages about current status or error like
-in the original code.
+Once started a message like "dmatest: Started 1 threads using dma0chan0" is
+emitted.  After that only test failure messages are reported until the test
+stops.
  
  Note that running a new test will not stop any in progress test.
  
-The following command should return actual state of the test.
-       % cat /sys/kernel/debug/dmatest/run
-
-To wait for test done the user may perform a busy loop that checks the state.
-
-       % while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ]
-       > do
-       >       echo -n "."
-       >       sleep 1
-       > done
-       > echo
+The following command returns the state of the test.
+       % cat /sys/module/dmatest/parameters/run
+
+To wait for test completion userpace can poll 'run' until it is false, or use
+the wait parameter.  Specifying 'wait=1' when loading the module causes module
+initialization to pause until a test run has completed, while reading
+/sys/module/dmatest/parameters/wait waits for any running test to complete
+before returning.  For example, the following scripts wait for 42 tests
+to complete before exiting.  Note that if 'iterations' is set to 'infinite' then
+waiting is disabled.
+
+Example:
+       % modprobe dmatest run=1 iterations=42 wait=1
+       % modprobe -r dmatest
+...or:
+       % modprobe dmatest run=1 iterations=42
+       % cat /sys/module/dmatest/parameters/wait
+       % modprobe -r dmatest
  
         Part 3 - When built-in in the kernel...
  
@@ -62,21 +71,22 @@ case. You always could check them at run-time by running
  
         Part 4 - Gathering the test results
  
-The module provides a storage for the test results in the memory. The gathered
-data could be used after test is done.
+Test results are printed to the kernel log buffer with the format:
  
-The special file 'results' in the debugfs represents gathered data of the in
-progress test. The messages collected are printed to the kernel log as well.
+"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)"
  
  Example of output:
-       % cat /sys/kernel/debug/dmatest/results
-       dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+       % dmesg | tail -n 1
+       dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
  
  The message format is unified across the different types of errors. A number in
  the parens represents additional information, e.g. error code, error counter,
-or status.
+or status.  A test thread also emits a summary line at completion listing the
+number of tests executed, number that failed, and a result code.
  
-Comparison between buffers is stored to the dedicated structure.
+Example:
+       % dmesg | tail -n 1
+       dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0)
  
-Note that the verify result is now accessible only via file 'results' in the
-debugfs.
+The details of a data miscompare error are also emitted, but do not follow the
+above format.
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt

index 9dae59407437916759a73e00166b6ba04b2c4e52..5dd282dda55c5eca0fe50b6b1cfc0116d3cc7432 100644 (file)
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -70,6 +70,12 @@ Unless otherwise specified, all options default to off.
  
         See comments at the top of fs/btrfs/check-integrity.c for more info.
  
+  commit=<seconds>
+       Set the interval of periodic commit, 30 seconds by default. Higher
+       values defer data being synced to permanent storage with obvious
+       consequences when the system crashes. The upper bound is not forced,
+       but a warning is printed if it's more than 300 seconds (5 minutes).
+
    compress
    compress=<type>
    compress-force
@@ -154,7 +160,11 @@ Unless otherwise specified, all options default to off.
         Currently this scans a list of several previous tree roots and tries to 
         use the first readable.
  
- skip_balance
+  rescan_uuid_tree
+       Force check and rebuild procedure of the UUID tree. This should not
+       normally be needed.
+
+  skip_balance
         Skip automatic resume of interrupted balance operation after mount.
         May be resumed with "btrfs balance resume."
  
@@ -234,24 +244,14 @@ available from the git repository at the following location:
  
  These include the following tools:
  
-mkfs.btrfs: create a filesystem
-
-btrfsctl: control program to create snapshots and subvolumes:
+* mkfs.btrfs: create a filesystem
  
-       mount /dev/sda2 /mnt
-       btrfsctl -s new_subvol_name /mnt
-       btrfsctl -s snapshot_of_default /mnt/default
-       btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
-       btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
-       ls /mnt
-       default snapshot_of_a_snapshot snapshot_of_new_subvol
-       new_subvol_name snapshot_of_default
+* btrfs: a single tool to manage the filesystems, refer to the manpage for more details
  
-       Snapshots and subvolumes cannot be deleted right now, but you can
-       rm -rf all the files and directories inside them.
+* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem
  
-btrfsck: do a limited check of the FS extent trees.
+Other tools for specific tasks:
  
-btrfs-debug-tree: print all of the FS metadata in text form.  Example:
+* btrfs-convert: in-place conversion from ext2/3/4 filesystems
  
-       btrfs-debug-tree /dev/sda2 >& big_output_file
+* btrfs-image: dump filesystem metadata for debugging
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 9ca3e74a10e128b4103d00dc8d06c12e6127d4c8..50680a59a2ff9a913e449a1d71ced9fab3004fe4 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1190,15 +1190,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         owned by uid=0.
  
         ima_hash=       [IMA]
-                       Format: { "sha1" | "md5" }
+                       Format: { md5 | sha1 | rmd160 | sha256 | sha384
+                                  | sha512 | ... }
                         default: "sha1"
  
+                       The list of supported hash algorithms is defined
+                       in crypto/hash_info.h.
+
         ima_tcb         [IMA]
                         Load a policy which meets the needs of the Trusted
                         Computing Base.  This means IMA will measure all
                         programs exec'd, files mmap'd for exec, and all files
                         opened for read by uid=0.
  
+       ima_template=   [IMA]
+                       Select one of defined IMA measurements template formats.
+                       Formats: { "ima" | "ima-ng" }
+                       Default: "ima-ng"
+
         init=           [KNL]
                         Format: <full_path>
                         Run specified binary instead of /sbin/init as init
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt

index 0f54333b0ff2990ce090f88087e17417a894b46c..b6ce00b2be9ae9682c6821bfb70175e42cc75e57 100644 (file)
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -547,13 +547,11 @@ helper functions described in Section 4.  In that case, pm_runtime_resume()
  should be used.  Of course, for this purpose the device's runtime PM has to be
  enabled earlier by calling pm_runtime_enable().
  
-If the device bus type's or driver's ->probe() callback runs
-pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
-they will fail returning -EAGAIN, because the device's usage counter is
-incremented by the driver core before executing ->probe().  Still, it may be
-desirable to suspend the device as soon as ->probe() has finished, so the driver
-core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
-the device at that time.
+It may be desirable to suspend the device once ->probe() has finished.
+Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+request to execute the subsystem-level idle callback for the device at that
+time.  A driver that makes use of the runtime autosuspend feature, may want to
+update the last busy mark before returning from ->probe().
  
  Moreover, the driver core prevents runtime PM callbacks from racing with the bus
  notifier callback in __device_release_driver(), which is necessary, because the
@@ -656,7 +654,7 @@ out the following operations:
      __pm_runtime_disable() with 'false' as the second argument for every device
      right before executing the subsystem-level .suspend_late() callback for it.
  
-  * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
+  * During system resume it calls pm_runtime_enable() and pm_runtime_put()
      for every device right after executing the subsystem-level .resume_early()
      callback and right after executing the subsystem-level .resume() callback
      for it, respectively.
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX

index 414235c1fcfcdd3f4a8ca191df7edf7cf21049ad..45c82fd3e9d39bacaa1febcabba942446c2afda4 100644 (file)
--- a/Documentation/security/00-INDEX
+++ b/Documentation/security/00-INDEX
@@ -22,3 +22,5 @@ keys.txt
         - description of the kernel key retention service.
  tomoyo.txt
         - documentation on the TOMOYO Linux Security Module.
+IMA-templates.txt
+       - documentation on the template management mechanism for IMA.
diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt

new file mode 100644 (file)

index 0000000..a777e5f
--- /dev/null
+++ b/Documentation/security/IMA-templates.txt
@@ -0,0 +1,87 @@
+                       IMA Template Management Mechanism
+
+
+==== INTRODUCTION ====
+
+The original 'ima' template is fixed length, containing the filedata hash
+and pathname. The filedata hash is limited to 20 bytes (md5/sha1).
+The pathname is a null terminated string, limited to 255 characters.
+To overcome these limitations and to add additional file metadata, it is
+necessary to extend the current version of IMA by defining additional
+templates. For example, information that could be possibly reported are
+the inode UID/GID or the LSM labels either of the inode and of the process
+that is accessing it.
+
+However, the main problem to introduce this feature is that, each time
+a new template is defined, the functions that generate and display
+the measurements list would include the code for handling a new format
+and, thus, would significantly grow over the time.
+
+The proposed solution solves this problem by separating the template
+management from the remaining IMA code. The core of this solution is the
+definition of two new data structures: a template descriptor, to determine
+which information should be included in the measurement list; a template
+field, to generate and display data of a given type.
+
+Managing templates with these structures is very simple. To support
+a new data type, developers define the field identifier and implement
+two functions, init() and show(), respectively to generate and display
+measurement entries. Defining a new template descriptor requires
+specifying the template format, a string of field identifiers separated
+by the '|' character. While in the current implementation it is possible
+to define new template descriptors only by adding their definition in the
+template specific code (ima_template.c), in a future version it will be
+possible to register a new template on a running kernel by supplying to IMA
+the desired format string. In this version, IMA initializes at boot time
+all defined template descriptors by translating the format into an array
+of template fields structures taken from the set of the supported ones.
+
+After the initialization step, IMA will call ima_alloc_init_template()
+(new function defined within the patches for the new template management
+mechanism) to generate a new measurement entry by using the template
+descriptor chosen through the kernel configuration or through the newly
+introduced 'ima_template=' kernel command line parameter. It is during this
+phase that the advantages of the new architecture are clearly shown:
+the latter function will not contain specific code to handle a given template
+but, instead, it simply calls the init() method of the template fields
+associated to the chosen template descriptor and store the result (pointer
+to allocated data and data length) in the measurement entry structure.
+
+The same mechanism is employed to display measurements entries.
+The functions ima[_ascii]_measurements_show() retrieve, for each entry,
+the template descriptor used to produce that entry and call the show()
+method for each item of the array of template fields structures.
+
+
+
+==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ====
+
+In the following, there is the list of supported template fields
+('<identifier>': description), that can be used to define new template
+descriptors by adding their identifier to the format string
+(support for more data types will be added later):
+
+ - 'd': the digest of the event (i.e. the digest of a measured file),
+        calculated with the SHA1 or MD5 hash algorithm;
+ - 'n': the name of the event (i.e. the file name), with size up to 255 bytes;
+ - 'd-ng': the digest of the event, calculated with an arbitrary hash
+           algorithm (field format: [<hash algo>:]digest, where the digest
+           prefix is shown only if the hash algorithm is not SHA1 or MD5);
+ - 'n-ng': the name of the event, without size limitations.
+
+
+Below, there is the list of defined template descriptors:
+ - "ima": its format is 'd|n';
+ - "ima-ng" (default): its format is 'd-ng|n-ng'.
+
+
+
+==== USE ====
+
+To specify the template descriptor to be used to generate measurement entries,
+currently the following methods are supported:
+
+ - select a template descriptor among those supported in the kernel
+   configuration ('ima-ng' is the default choice);
+ - specify a template descriptor name from the kernel command line through
+   the 'ima_template=' parameter.
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt

index 7b4145d00452f259fe79eff4f400ecfc949a6fd1..a4c33f1a7c6de5dc2207a21bab00846266668f90 100644 (file)
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -865,15 +865,14 @@ encountered:
       calling processes has a searchable link to the key from one of its
       keyrings. There are three functions for dealing with these:
  
-       key_ref_t make_key_ref(const struct key *key,
-                              unsigned long possession);
+       key_ref_t make_key_ref(const struct key *key, bool possession);
  
         struct key *key_ref_to_ptr(const key_ref_t key_ref);
  
-       unsigned long is_key_possessed(const key_ref_t key_ref);
+       bool is_key_possessed(const key_ref_t key_ref);
  
       The first function constructs a key reference from a key pointer and
-     possession information (which must be 0 or 1 and not any other value).
+     possession information (which must be true or false).
  
       The second function retrieves the key pointer from a reference and the
       third retrieves the possession flag.
@@ -961,14 +960,17 @@ payload contents" for more information.
      the argument will not be parsed.
  
  
-(*) Extra references can be made to a key by calling the following function:
+(*) Extra references can be made to a key by calling one of the following
+    functions:
  
+       struct key *__key_get(struct key *key);
         struct key *key_get(struct key *key);
  
-    These need to be disposed of by calling key_put() when they've been
-    finished with. The key pointer passed in will be returned. If the pointer
-    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
-    no increment will take place.
+    Keys so references will need to be disposed of by calling key_put() when
+    they've been finished with.  The key pointer passed in will be returned.
+
+    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+    then the key will not be dereferenced and no increment will take place.
  
  
  (*) A key's serial number can be obtained by calling:
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock

index 7521d367f21d689a70faa048b2a57b781fbb143e..6dea4fd5c96100d75f9152ffd8ac9be5909acf5f 100644 (file)
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -63,9 +63,9 @@ levels.
  PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
  allocation and pgtable_pmd_page_dtor() on freeing.
  
-Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but
-make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE
-preallocate few PMDs on pgd_alloc().
+Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and
+pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing
+paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
  
  With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
  
diff --git a/MAINTAINERS b/MAINTAINERS

index 63f30484932b1cddbe5f9c5bd05ca6473d612e0b..8285ed4676b6388502be84ddde71d1e201827ce8 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4065,6 +4065,7 @@ F:        arch/x86/include/uapi/asm/hyperv.h
  F:     arch/x86/kernel/cpu/mshyperv.c
  F:     drivers/hid/hid-hyperv.c
  F:     drivers/hv/
+F:     drivers/input/serio/hyperv-keyboard.c
  F:     drivers/net/hyperv/
  F:     drivers/scsi/storvsc_drv.c
  F:     drivers/video/hyperv_fb.c
@@ -7515,9 +7516,10 @@ SELINUX SECURITY MODULE
  M:     Stephen Smalley <sds@tycho.nsa.gov>
  M:     James Morris <james.l.morris@oracle.com>
  M:     Eric Paris <eparis@parisplace.org>
+M:     Paul Moore <paul@paul-moore.com>
  L:     selinux@tycho.nsa.gov (subscribers-only, general discussion)
  W:     http://selinuxproject.org
-T:     git git://git.infradead.org/users/eparis/selinux.git
+T:     git git://git.infradead.org/users/pcmoore/selinux
  S:     Supported
  F:     include/linux/selinux*
  F:     security/selinux/
@@ -8664,6 +8666,7 @@ F:        drivers/media/usb/tm6000/
  TPM DEVICE DRIVER
  M:     Leonidas Da Silva Barbosa <leosilva@linux.vnet.ibm.com>
  M:     Ashley Lai <ashley@ashleylai.com>
+M:     Peter Huewe <peterhuewe@gmx.de>
  M:     Rajiv Andrade <mail@srajiv.net>
  W:     http://tpmdd.sourceforge.net
  M:     Marcel Selhorst <tpmdd@selhorst.net>
@@ -9522,8 +9525,8 @@ F:        drivers/xen/*swiotlb*
  
  XFS FILESYSTEM
  P:     Silicon Graphics Inc
+M:     Dave Chinner <dchinner@fromorbit.com>
  M:     Ben Myers <bpm@sgi.com>
-M:     Alex Elder <elder@kernel.org>
  M:     xfs@oss.sgi.com
  L:     xfs@oss.sgi.com
  W:     http://oss.sgi.com/projects/xfs
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig

index 135c674eaf9ec3383b309e7869ea261841a82fe4..d39dc9b95a2c6810ae920ed9d5dbdadbcbb37de5 100644 (file)
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -16,8 +16,8 @@ config ALPHA
         select ARCH_WANT_IPC_PARSE_VERSION
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
         select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+       select GENERIC_CLOCKEVENTS
         select GENERIC_SMP_IDLE_THREAD
-       select GENERIC_CMOS_UPDATE
         select GENERIC_STRNCPY_FROM_USER
         select GENERIC_STRNLEN_USER
         select HAVE_MOD_ARCH_SPECIFIC
@@ -488,6 +488,20 @@ config VGA_HOSE
           which always have multiple hoses, and whose consoles support it.
  
  
+config ALPHA_QEMU
+       bool "Run under QEMU emulation"
+       depends on !ALPHA_GENERIC
+       ---help---
+         Assume the presence of special features supported by QEMU PALcode
+         that reduce the overhead of system emulation.
+
+         Generic kernels will auto-detect QEMU.  But when building a
+         system-specific kernel, the assumption is that we want to
+         elimiate as many runtime tests as possible.
+
+         If unsure, say N.
+
+
  config ALPHA_SRM
         bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME
         depends on TTY
@@ -572,6 +586,30 @@ config NUMA
           Access).  This option is for configuring high-end multiprocessor
           server machines.  If in doubt, say N.
  
+config ALPHA_WTINT
+       bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC
+       default y if ALPHA_QEMU
+       default n if ALPHA_EV5 || ALPHA_EV56 || (ALPHA_EV4 && !ALPHA_LCA)
+       default n if !ALPHA_SRM && !ALPHA_GENERIC
+       default y if SMP
+       ---help---
+         The Wait for Interrupt (WTINT) PALcall attempts to place the CPU
+         to sleep until the next interrupt.  This may reduce the power
+         consumed, and the heat produced by the computer.  However, it has
+         the side effect of making the cycle counter unreliable as a timing
+         device across the sleep.
+
+         For emulation under QEMU, definitely say Y here, as we have other
+         mechanisms for measuring time than the cycle counter.
+
+         For EV4 (but not LCA), EV5 and EV56 systems, or for systems running
+         MILO, sleep mode is not supported so you might as well say N here.
+
+         For SMP systems we cannot use the cycle counter for timing anyway,
+         so you might as well say Y here.
+
+         If unsure, say N.
+
  config NODES_SHIFT
         int
         default "7"
@@ -613,9 +651,41 @@ config VERBOSE_MCHECK_ON
  
           Take the default (1) unless you want more control or more info.
  
+choice
+       prompt "Timer interrupt frequency (HZ)?"
+       default HZ_128 if ALPHA_QEMU
+       default HZ_1200 if ALPHA_RAWHIDE
+       default HZ_1024
+       ---help---
+         The frequency at which timer interrupts occur.  A high frequency
+         minimizes latency, whereas a low frequency minimizes overhead of
+         process accounting.  The later effect is especially significant
+         when being run under QEMU.
+
+         Note that some Alpha hardware cannot change the interrupt frequency
+         of the timer.  If unsure, say 1024 (or 1200 for Rawhide).
+
+       config HZ_32
+               bool "32 Hz"
+       config HZ_64
+               bool "64 Hz"
+       config HZ_128
+               bool "128 Hz"
+       config HZ_256
+               bool "256 Hz"
+       config HZ_1024
+               bool "1024 Hz"
+       config HZ_1200
+               bool "1200 Hz"
+endchoice
+
  config HZ
-       int
-       default 1200 if ALPHA_RAWHIDE
+       int 
+       default 32 if HZ_32
+       default 64 if HZ_64
+       default 128 if HZ_128
+       default 256 if HZ_256
+       default 1200 if HZ_1200
         default 1024
  
  source "drivers/pci/Kconfig"
diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h

index 72dbf235927054145d8c18c14968d452efd2c494..75cb3641ed2f0b507c1bcd6a86cb2d8da2966044 100644 (file)
--- a/arch/alpha/include/asm/machvec.h
+++ b/arch/alpha/include/asm/machvec.h
@@ -33,6 +33,7 @@ struct alpha_machine_vector
  
         int nr_irqs;
         int rtc_port;
+       int rtc_boot_cpu_only;
         unsigned int max_asn;
         unsigned long max_isa_dma_address;
         unsigned long irq_probe_mask;
@@ -95,9 +96,6 @@ struct alpha_machine_vector
  
         struct _alpha_agp_info *(*agp_info)(void);
  
-       unsigned int (*rtc_get_time)(struct rtc_time *);
-       int (*rtc_set_time)(struct rtc_time *);
-
         const char *vector_name;
  
         /* NUMA information */
@@ -126,13 +124,19 @@ extern struct alpha_machine_vector alpha_mv;
  
  #ifdef CONFIG_ALPHA_GENERIC
  extern int alpha_using_srm;
+extern int alpha_using_qemu;
  #else
-#ifdef CONFIG_ALPHA_SRM
-#define alpha_using_srm 1
-#else
-#define alpha_using_srm 0
-#endif
+# ifdef CONFIG_ALPHA_SRM
+#  define alpha_using_srm 1
+# else
+#  define alpha_using_srm 0
+# endif
+# ifdef CONFIG_ALPHA_QEMU
+#  define alpha_using_qemu 1
+# else
+#  define alpha_using_qemu 0
+# endif
  #endif /* GENERIC */
  
-#endif
+#endif /* __KERNEL__ */
  #endif /* __ALPHA_MACHVEC_H */
diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h

index 6fcd2b5b08f0d5ac4462ba6616a66519c2819d7d..5422a47646fc20add2047b57d41a91593905e242 100644 (file)
--- a/arch/alpha/include/asm/pal.h
+++ b/arch/alpha/include/asm/pal.h
@@ -89,6 +89,7 @@ __CALL_PAL_W1(wrmces, unsigned long);
  __CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
  __CALL_PAL_W1(wrusp, unsigned long);
  __CALL_PAL_W1(wrvptptr, unsigned long);
+__CALL_PAL_RW1(wtint, unsigned long, unsigned long);
  
  /*
   * TB routines..
@@ -111,5 +112,75 @@ __CALL_PAL_W1(wrvptptr, unsigned long);
  #define tbiap()                __tbi(-1, /* no second argument */)
  #define tbia()         __tbi(-2, /* no second argument */)
  
+/*
+ * QEMU Cserv routines..
+ */
+
+static inline unsigned long
+qemu_get_walltime(void)
+{
+       register unsigned long v0 __asm__("$0");
+       register unsigned long a0 __asm__("$16") = 3;
+
+       asm("call_pal %2 # cserve get_time"
+           : "=r"(v0), "+r"(a0)
+           : "i"(PAL_cserve)
+           : "$17", "$18", "$19", "$20", "$21");
+
+       return v0;
+}
+
+static inline unsigned long
+qemu_get_alarm(void)
+{
+       register unsigned long v0 __asm__("$0");
+       register unsigned long a0 __asm__("$16") = 4;
+
+       asm("call_pal %2 # cserve get_alarm"
+           : "=r"(v0), "+r"(a0)
+           : "i"(PAL_cserve)
+           : "$17", "$18", "$19", "$20", "$21");
+
+       return v0;
+}
+
+static inline void
+qemu_set_alarm_rel(unsigned long expire)
+{
+       register unsigned long a0 __asm__("$16") = 5;
+       register unsigned long a1 __asm__("$17") = expire;
+
+       asm volatile("call_pal %2 # cserve set_alarm_rel"
+                    : "+r"(a0), "+r"(a1)
+                    : "i"(PAL_cserve)
+                    : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline void
+qemu_set_alarm_abs(unsigned long expire)
+{
+       register unsigned long a0 __asm__("$16") = 6;
+       register unsigned long a1 __asm__("$17") = expire;
+
+       asm volatile("call_pal %2 # cserve set_alarm_abs"
+                    : "+r"(a0), "+r"(a1)
+                    : "i"(PAL_cserve)
+                    : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline unsigned long
+qemu_get_vmtime(void)
+{
+       register unsigned long v0 __asm__("$0");
+       register unsigned long a0 __asm__("$16") = 7;
+
+       asm("call_pal %2 # cserve get_time"
+           : "=r"(v0), "+r"(a0)
+           : "i"(PAL_cserve)
+           : "$17", "$18", "$19", "$20", "$21");
+
+       return v0;
+}
+
  #endif /* !__ASSEMBLY__ */
  #endif /* __ALPHA_PAL_H */
diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h

index d70408d36677c86d0fbd8ce90530724e54d15a57..f71c3b0ed3606c7fc96ab6ee45b66ba324dc30ee 100644 (file)
--- a/arch/alpha/include/asm/rtc.h
+++ b/arch/alpha/include/asm/rtc.h
@@ -1,12 +1 @@
-#ifndef _ALPHA_RTC_H
-#define _ALPHA_RTC_H
-
-#if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) \
- || defined(CONFIG_ALPHA_GENERIC)
-# define get_rtc_time          alpha_mv.rtc_get_time
-# define set_rtc_time          alpha_mv.rtc_set_time
-#endif
-
  #include <asm-generic/rtc.h>
-
-#endif
diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h

index b02b8a282940fd3e64bde0d757fdb4f6a5ba94c7..c2911f5917041abd49dea5f14855ac9691d1aec0 100644 (file)
--- a/arch/alpha/include/asm/string.h
+++ b/arch/alpha/include/asm/string.h
@@ -22,15 +22,27 @@ extern void * __memcpy(void *, const void *, size_t);
  
  #define __HAVE_ARCH_MEMSET
  extern void * __constant_c_memset(void *, unsigned long, size_t);
+extern void * ___memset(void *, int, size_t);
  extern void * __memset(void *, int, size_t);
  extern void * memset(void *, int, size_t);
  
-#define memset(s, c, n)                                                            \
-(__builtin_constant_p(c)                                                   \
- ? (__builtin_constant_p(n) && (c) == 0                                            \
-    ? __builtin_memset((s),0,(n))                                          \
-    : __constant_c_memset((s),0x0101010101010101UL*(unsigned char)(c),(n))) \
- : __memset((s),(c),(n)))
+/* For gcc 3.x, we cannot have the inline function named "memset" because
+   the __builtin_memset will attempt to resolve to the inline as well,
+   leading to a "sorry" about unimplemented recursive inlining.  */
+extern inline void *__memset(void *s, int c, size_t n)
+{
+       if (__builtin_constant_p(c)) {
+               if (__builtin_constant_p(n)) {
+                       return __builtin_memset(s, c, n);
+               } else {
+                       unsigned long c8 = (c & 0xff) * 0x0101010101010101UL;
+                       return __constant_c_memset(s, c8, n);
+               }
+       }
+       return ___memset(s, c, n);
+}
+
+#define memset __memset
  
  #define __HAVE_ARCH_STRCPY
  extern char * strcpy(char *,const char *);
diff --git a/arch/alpha/include/uapi/asm/pal.h b/arch/alpha/include/uapi/asm/pal.h

index 3c0ce08e5f592d9b779c10a441527cd9061e9c07..dfc8140b908821d46a3d82f30baaa121c1bf64ef 100644 (file)
--- a/arch/alpha/include/uapi/asm/pal.h
+++ b/arch/alpha/include/uapi/asm/pal.h
@@ -46,6 +46,7 @@
  #define PAL_rdusp      58
  #define PAL_whami      60
  #define PAL_retsys     61
+#define PAL_wtint      62
  #define PAL_rti                63
  
  
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile

index 84ec46b38f7dc1c39043f8671bc60b3db1a62b9d..0d54650e78fc6b622272d88ac6406273f8e9a282 100644 (file)
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_PCI)     += pci.o pci_iommu.o pci-sysfs.o
  obj-$(CONFIG_SRM_ENV)  += srm_env.o
  obj-$(CONFIG_MODULES)  += module.o
  obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_RTC_DRV_ALPHA) += rtc.o
  
  ifdef CONFIG_ALPHA_GENERIC
  
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c

index 89566b346c0f802bd5bfe0c817bd2fee9bd39d3e..f4c7ab6f43b0dc167bce50125c812e79ebb2d067 100644 (file)
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -40,6 +40,7 @@ EXPORT_SYMBOL(strrchr);
  EXPORT_SYMBOL(memmove);
  EXPORT_SYMBOL(__memcpy);
  EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(___memset);
  EXPORT_SYMBOL(__memsetw);
  EXPORT_SYMBOL(__constant_c_memset);
  EXPORT_SYMBOL(copy_page);
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c

index 28e4429596f3f208c1b868887112bad4faa33987..1c8625cb0e253fbc57e1b341fe48867bd7369e96 100644 (file)
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -66,21 +66,7 @@ do_entInt(unsigned long type, unsigned long vector,
                 break;
         case 1:
                 old_regs = set_irq_regs(regs);
-#ifdef CONFIG_SMP
-         {
-               long cpu;
-
-               smp_percpu_timer_interrupt(regs);
-               cpu = smp_processor_id();
-               if (cpu != boot_cpuid) {
-                       kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ));
-               } else {
-                       handle_irq(RTC_IRQ);
-               }
-         }
-#else
                 handle_irq(RTC_IRQ);
-#endif
                 set_irq_regs(old_regs);
                 return;
         case 2:
@@ -228,7 +214,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr,
   */
  
  struct irqaction timer_irqaction = {
-       .handler        = timer_interrupt,
+       .handler        = rtc_timer_interrupt,
         .name           = "timer",
  };
  
diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h

index 7fa62488bd16791f77b0218451629d749c2e5351..f54bdf658cd0b9ff6b72f71fc40504bd8dff252e 100644 (file)
--- a/arch/alpha/kernel/machvec_impl.h
+++ b/arch/alpha/kernel/machvec_impl.h
@@ -43,10 +43,7 @@
  #define CAT1(x,y)  x##y
  #define CAT(x,y)   CAT1(x,y)
  
-#define DO_DEFAULT_RTC \
-       .rtc_port = 0x70, \
-       .rtc_get_time = common_get_rtc_time, \
-       .rtc_set_time = common_set_rtc_time
+#define DO_DEFAULT_RTC                 .rtc_port = 0x70
  
  #define DO_EV4_MMU                                                     \
         .max_asn =                      EV4_MAX_ASN,                    \
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c

index d821b17047e0abbe54dd82871d3cd9e324d28df8..c52e7f0ee5f6084bd2c8068a552659f97edd9a8d 100644 (file)
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -83,6 +83,8 @@ struct alpha_pmu_t {
         long pmc_left[3];
          /* Subroutine for allocation of PMCs.  Enforces constraints. */
         int (*check_constraints)(struct perf_event **, unsigned long *, int);
+       /* Subroutine for checking validity of a raw event for this PMU. */
+       int (*raw_event_valid)(u64 config);
  };
  
  /*
@@ -203,6 +205,12 @@ success:
  }
  
  
+static int ev67_raw_event_valid(u64 config)
+{
+       return config >= EV67_CYCLES && config < EV67_LAST_ET;
+};
+
+
  static const struct alpha_pmu_t ev67_pmu = {
         .event_map = ev67_perfmon_event_map,
         .max_events = ARRAY_SIZE(ev67_perfmon_event_map),
@@ -211,7 +219,8 @@ static const struct alpha_pmu_t ev67_pmu = {
         .pmc_count_mask = {EV67_PCTR_0_COUNT_MASK,  EV67_PCTR_1_COUNT_MASK,  0},
         .pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0},
         .pmc_left = {16, 4, 0},
-       .check_constraints = ev67_check_constraints
+       .check_constraints = ev67_check_constraints,
+       .raw_event_valid = ev67_raw_event_valid,
  };
  
  
@@ -609,7 +618,9 @@ static int __hw_perf_event_init(struct perf_event *event)
         } else if (attr->type == PERF_TYPE_HW_CACHE) {
                 return -EOPNOTSUPP;
         } else if (attr->type == PERF_TYPE_RAW) {
-               ev = attr->config & 0xff;
+               if (!alpha_pmu->raw_event_valid(attr->config))
+                       return -EINVAL;
+               ev = attr->config;
         } else {
                 return -EOPNOTSUPP;
         }
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c

index f2360a74e5d5544983160d951c46bddb98819e0e..1941a07b5811f925aed82e853aab4efb081f74ca 100644 (file)
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -46,6 +46,23 @@
  void (*pm_power_off)(void) = machine_power_off;
  EXPORT_SYMBOL(pm_power_off);
  
+#ifdef CONFIG_ALPHA_WTINT
+/*
+ * Sleep the CPU.
+ * EV6, LCA45 and QEMU know how to power down, skipping N timer interrupts.
+ */
+void arch_cpu_idle(void)
+{
+       wtint(0);
+       local_irq_enable();
+}
+
+void arch_cpu_idle_dead(void)
+{
+       wtint(INT_MAX);
+}
+#endif /* ALPHA_WTINT */
+
  struct halt_info {
         int mode;
         char *restart_cmd;
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h

index d3e52d3fd59299771ed3e90d0fc13f0aca9faa6f..da2d6ec9c37065ca48265597cc6cd8b91f6e2796 100644 (file)
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -135,17 +135,15 @@ extern void unregister_srm_console(void);
  /* smp.c */
  extern void setup_smp(void);
  extern void handle_ipi(struct pt_regs *);
-extern void smp_percpu_timer_interrupt(struct pt_regs *);
  
  /* bios32.c */
  /* extern void reset_for_srm(void); */
  
  /* time.c */
-extern irqreturn_t timer_interrupt(int irq, void *dev);
+extern irqreturn_t rtc_timer_interrupt(int irq, void *dev);
+extern void init_clockevent(void);
  extern void common_init_rtc(void);
  extern unsigned long est_cycle_freq;
-extern unsigned int common_get_rtc_time(struct rtc_time *time);
-extern int common_set_rtc_time(struct rtc_time *time);
  
  /* smc37c93x.c */
  extern void SMC93x_Init(void);
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c

new file mode 100644 (file)

index 0000000..c8d284d
--- /dev/null
+++ b/arch/alpha/kernel/rtc.c
@@ -0,0 +1,323 @@
+/*
+ *  linux/arch/alpha/kernel/rtc.c
+ *
+ *  Copyright (C) 1991, 1992, 1995, 1999, 2000  Linus Torvalds
+ *
+ * This file contains date handling.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+#include "proto.h"
+
+
+/*
+ * Support for the RTC device.
+ *
+ * We don't want to use the rtc-cmos driver, because we don't want to support
+ * alarms, as that would be indistinguishable from timer interrupts.
+ *
+ * Further, generic code is really, really tied to a 1900 epoch.  This is
+ * true in __get_rtc_time as well as the users of struct rtc_time e.g.
+ * rtc_tm_to_time.  Thankfully all of the other epochs in use are later
+ * than 1900, and so it's easy to adjust.
+ */
+
+static unsigned long rtc_epoch;
+
+static int __init
+specifiy_epoch(char *str)
+{
+       unsigned long epoch = simple_strtoul(str, NULL, 0);
+       if (epoch < 1900)
+               printk("Ignoring invalid user specified epoch %lu\n", epoch);
+       else
+               rtc_epoch = epoch;
+       return 1;
+}
+__setup("epoch=", specifiy_epoch);
+
+static void __init
+init_rtc_epoch(void)
+{
+       int epoch, year, ctrl;
+
+       if (rtc_epoch != 0) {
+               /* The epoch was specified on the command-line.  */
+               return;
+       }
+
+       /* Detect the epoch in use on this computer.  */
+       ctrl = CMOS_READ(RTC_CONTROL);
+       year = CMOS_READ(RTC_YEAR);
+       if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+               year = bcd2bin(year);
+
+       /* PC-like is standard; used for year >= 70 */
+       epoch = 1900;
+       if (year < 20) {
+               epoch = 2000;
+       } else if (year >= 20 && year < 48) {
+               /* NT epoch */
+               epoch = 1980;
+       } else if (year >= 48 && year < 70) {
+               /* Digital UNIX epoch */
+               epoch = 1952;
+       }
+       rtc_epoch = epoch;
+
+       printk(KERN_INFO "Using epoch %d for rtc year %d\n", epoch, year);
+}
+
+static int
+alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       __get_rtc_time(tm);
+
+       /* Adjust for non-default epochs.  It's easier to depend on the
+          generic __get_rtc_time and adjust the epoch here than create
+          a copy of __get_rtc_time with the edits we need.  */
+       if (rtc_epoch != 1900) {
+               int year = tm->tm_year;
+               /* Undo the century adjustment made in __get_rtc_time.  */
+               if (year >= 100)
+                       year -= 100;
+               year += rtc_epoch - 1900;
+               /* Redo the century adjustment with the epoch in place.  */
+               if (year <= 69)
+                       year += 100;
+               tm->tm_year = year;
+       }
+
+       return rtc_valid_tm(tm);
+}
+
+static int
+alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct rtc_time xtm;
+
+       if (rtc_epoch != 1900) {
+               xtm = *tm;
+               xtm.tm_year -= rtc_epoch - 1900;
+               tm = &xtm;
+       }
+
+       return __set_rtc_time(tm);
+}
+
+static int
+alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+{
+       int retval = 0;
+       int real_seconds, real_minutes, cmos_minutes;
+       unsigned char save_control, save_freq_select;
+
+       /* Note: This code only updates minutes and seconds.  Comments
+          indicate this was to avoid messing with unknown time zones,
+          and with the epoch nonsense described above.  In order for
+          this to work, the existing clock cannot be off by more than
+          15 minutes.
+
+          ??? This choice is may be out of date.  The x86 port does
+          not have problems with timezones, and the epoch processing has
+          now been fixed in alpha_set_rtc_time.
+
+          In either case, one can always force a full rtc update with
+          the userland hwclock program, so surely 15 minute accuracy
+          is no real burden.  */
+
+       /* In order to set the CMOS clock precisely, we have to be called
+          500 ms after the second nowtime has started, because when
+          nowtime is written into the registers of the CMOS clock, it will
+          jump to the next second precisely 500 ms later. Check the Motorola
+          MC146818A or Dallas DS12887 data sheet for details.  */
+
+       /* irq are locally disabled here */
+       spin_lock(&rtc_lock);
+       /* Tell the clock it's being set */
+       save_control = CMOS_READ(RTC_CONTROL);
+       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+       /* Stop and reset prescaler */
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+       cmos_minutes = CMOS_READ(RTC_MINUTES);
+       if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+               cmos_minutes = bcd2bin(cmos_minutes);
+
+       real_seconds = nowtime % 60;
+       real_minutes = nowtime / 60;
+       if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) {
+               /* correct for half hour time zone */
+               real_minutes += 30;
+       }
+       real_minutes %= 60;
+
+       if (abs(real_minutes - cmos_minutes) < 30) {
+               if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
+               }
+               CMOS_WRITE(real_seconds,RTC_SECONDS);
+               CMOS_WRITE(real_minutes,RTC_MINUTES);
+       } else {
+               printk_once(KERN_NOTICE
+                           "set_rtc_mmss: can't update from %d to %d\n",
+                           cmos_minutes, real_minutes);
+               retval = -1;
+       }
+
+       /* The following flags have to be released exactly in this order,
+        * otherwise the DS12887 (popular MC146818A clone with integrated
+        * battery and quartz) will not reset the oscillator and will not
+        * update precisely 500 ms later. You won't find this mentioned in
+        * the Dallas Semiconductor data sheets, but who believes data
+        * sheets anyway ...                           -- Markus Kuhn
+        */
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       spin_unlock(&rtc_lock);
+
+       return retval;
+}
+
+static int
+alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case RTC_EPOCH_READ:
+               return put_user(rtc_epoch, (unsigned long __user *)arg);
+       case RTC_EPOCH_SET:
+               if (arg < 1900)
+                       return -EINVAL;
+               rtc_epoch = arg;
+               return 0;
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
+static const struct rtc_class_ops alpha_rtc_ops = {
+       .read_time = alpha_rtc_read_time,
+       .set_time = alpha_rtc_set_time,
+       .set_mmss = alpha_rtc_set_mmss,
+       .ioctl = alpha_rtc_ioctl,
+};
+
+/*
+ * Similarly, except do the actual CMOS access on the boot cpu only.
+ * This requires marshalling the data across an interprocessor call.
+ */
+
+#if defined(CONFIG_SMP) && \
+    (defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_MARVEL))
+# define HAVE_REMOTE_RTC 1
+
+union remote_data {
+       struct rtc_time *tm;
+       unsigned long now;
+       long retval;
+};
+
+static void
+do_remote_read(void *data)
+{
+       union remote_data *x = data;
+       x->retval = alpha_rtc_read_time(NULL, x->tm);
+}
+
+static int
+remote_read_time(struct device *dev, struct rtc_time *tm)
+{
+       union remote_data x;
+       if (smp_processor_id() != boot_cpuid) {
+               x.tm = tm;
+               smp_call_function_single(boot_cpuid, do_remote_read, &x, 1);
+               return x.retval;
+       }
+       return alpha_rtc_read_time(NULL, tm);
+}
+
+static void
+do_remote_set(void *data)
+{
+       union remote_data *x = data;
+       x->retval = alpha_rtc_set_time(NULL, x->tm);
+}
+
+static int
+remote_set_time(struct device *dev, struct rtc_time *tm)
+{
+       union remote_data x;
+       if (smp_processor_id() != boot_cpuid) {
+               x.tm = tm;
+               smp_call_function_single(boot_cpuid, do_remote_set, &x, 1);
+               return x.retval;
+       }
+       return alpha_rtc_set_time(NULL, tm);
+}
+
+static void
+do_remote_mmss(void *data)
+{
+       union remote_data *x = data;
+       x->retval = alpha_rtc_set_mmss(NULL, x->now);
+}
+
+static int
+remote_set_mmss(struct device *dev, unsigned long now)
+{
+       union remote_data x;
+       if (smp_processor_id() != boot_cpuid) {
+               x.now = now;
+               smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1);
+               return x.retval;
+       }
+       return alpha_rtc_set_mmss(NULL, now);
+}
+
+static const struct rtc_class_ops remote_rtc_ops = {
+       .read_time = remote_read_time,
+       .set_time = remote_set_time,
+       .set_mmss = remote_set_mmss,
+       .ioctl = alpha_rtc_ioctl,
+};
+#endif
+
+static int __init
+alpha_rtc_init(void)
+{
+       const struct rtc_class_ops *ops;
+       struct platform_device *pdev;
+       struct rtc_device *rtc;
+       const char *name;
+
+       init_rtc_epoch();
+       name = "rtc-alpha";
+       ops = &alpha_rtc_ops;
+
+#ifdef HAVE_REMOTE_RTC
+       if (alpha_mv.rtc_boot_cpu_only)
+               ops = &remote_rtc_ops;
+#endif
+
+       pdev = platform_device_register_simple(name, -1, NULL, 0);
+       rtc = devm_rtc_device_register(&pdev->dev, name, ops, THIS_MODULE);
+       if (IS_ERR(rtc))
+               return PTR_ERR(rtc);
+
+       platform_set_drvdata(pdev, rtc);
+       return 0;
+}
+device_initcall(alpha_rtc_init);
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c

index 9e3107cc5ebb45d7dccc2889462f9f10c4b575ec..b20af76f12c1dbf548a27210fdbb196a2c77496d 100644 (file)
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -115,10 +115,17 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE;
  
  #ifdef CONFIG_ALPHA_GENERIC
  struct alpha_machine_vector alpha_mv;
+#endif
+
+#ifndef alpha_using_srm
  int alpha_using_srm;
  EXPORT_SYMBOL(alpha_using_srm);
  #endif
  
+#ifndef alpha_using_qemu
+int alpha_using_qemu;
+#endif
+
  static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long,
                                                unsigned long);
  static struct alpha_machine_vector *get_sysvec_byname(const char *);
@@ -529,11 +536,15 @@ setup_arch(char **cmdline_p)
         atomic_notifier_chain_register(&panic_notifier_list,
                         &alpha_panic_block);
  
-#ifdef CONFIG_ALPHA_GENERIC
+#ifndef alpha_using_srm
         /* Assume that we've booted from SRM if we haven't booted from MILO.
            Detect the later by looking for "MILO" in the system serial nr.  */
         alpha_using_srm = strncmp((const char *)hwrpb->ssn, "MILO", 4) != 0;
  #endif
+#ifndef alpha_using_qemu
+       /* Similarly, look for QEMU.  */
+       alpha_using_qemu = strstr((const char *)hwrpb->ssn, "QEMU") != 0;
+#endif
  
         /* If we are using SRM, we want to allow callbacks
            as early as possible, so do this NOW, and then
@@ -1207,6 +1218,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
         char *systype_name;
         char *sysvariation_name;
         int nr_processors;
+       unsigned long timer_freq;
  
         cpu_index = (unsigned) (cpu->type - 1);
         cpu_name = "Unknown";
@@ -1218,6 +1230,12 @@ show_cpuinfo(struct seq_file *f, void *slot)
  
         nr_processors = get_nr_processors(cpu, hwrpb->nr_processors);
  
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+       timer_freq = (100UL * hwrpb->intr_freq) / 4096;
+#else
+       timer_freq = 100UL * CONFIG_HZ;
+#endif
+
         seq_printf(f, "cpu\t\t\t: Alpha\n"
                       "cpu model\t\t: %s\n"
                       "cpu variation\t\t: %ld\n"
@@ -1243,8 +1261,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
                        (char*)hwrpb->ssn,
                        est_cycle_freq ? : hwrpb->cycle_freq,
                        est_cycle_freq ? "est." : "",
-                      hwrpb->intr_freq / 4096,
-                      (100 * hwrpb->intr_freq / 4096) % 100,
+                      timer_freq / 100, timer_freq % 100,
                        hwrpb->pagesize,
                        hwrpb->pa_bits,
                        hwrpb->max_asn,
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c

index 9dbbcb3b914675f80e3e0f097ced7e4545b533f9..99ac36d5de4efd10832804e82509e062606720e2 100644 (file)
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -138,9 +138,11 @@ smp_callin(void)
  
         /* Get our local ticker going. */
         smp_setup_percpu_timer(cpuid);
+       init_clockevent();
  
         /* Call platform-specific callin, if specified */
-       if (alpha_mv.smp_callin) alpha_mv.smp_callin();
+       if (alpha_mv.smp_callin)
+               alpha_mv.smp_callin();
  
         /* All kernel threads share the same mm context.  */
         atomic_inc(&init_mm.mm_count);
@@ -498,35 +500,6 @@ smp_cpus_done(unsigned int max_cpus)
                ((bogosum + 2500) / (5000/HZ)) % 100);
  }
  
-\f
-void
-smp_percpu_timer_interrupt(struct pt_regs *regs)
-{
-       struct pt_regs *old_regs;
-       int cpu = smp_processor_id();
-       unsigned long user = user_mode(regs);
-       struct cpuinfo_alpha *data = &cpu_data[cpu];
-
-       old_regs = set_irq_regs(regs);
-
-       /* Record kernel PC.  */
-       profile_tick(CPU_PROFILING);
-
-       if (!--data->prof_counter) {
-               /* We need to make like a normal interrupt -- otherwise
-                  timer interrupts ignore the global interrupt lock,
-                  which would be a Bad Thing.  */
-               irq_enter();
-
-               update_process_times(user);
-
-               data->prof_counter = data->prof_multiplier;
-
-               irq_exit();
-       }
-       set_irq_regs(old_regs);
-}
-
  int
  setup_profiling_timer(unsigned int multiplier)
  {
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c

index 5a0af11b3a61c1b97b5e1da19fe1fb8f2dd15f50..608f2a7fa0a30f415e2bdef4c7424957080b8641 100644 (file)
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -224,8 +224,6 @@ struct alpha_machine_vector jensen_mv __initmv = {
         .machine_check          = jensen_machine_check,
         .max_isa_dma_address    = ALPHA_MAX_ISA_DMA_ADDRESS,
         .rtc_port               = 0x170,
-       .rtc_get_time           = common_get_rtc_time,
-       .rtc_set_time           = common_set_rtc_time,
  
         .nr_irqs                = 16,
         .device_interrupt       = jensen_device_interrupt,
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c

index c92e389ff2192973f95f51073a5ce50e5f132a3b..f21d61fab6787331d21571958185b637fc601bb7 100644 (file)
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -22,7 +22,6 @@
  #include <asm/hwrpb.h>
  #include <asm/tlbflush.h>
  #include <asm/vga.h>
-#include <asm/rtc.h>
  
  #include "proto.h"
  #include "err_impl.h"
@@ -400,57 +399,6 @@ marvel_init_rtc(void)
         init_rtc_irq();
  }
  
-struct marvel_rtc_time {
-       struct rtc_time *time;
-       int retval;
-};
-
-#ifdef CONFIG_SMP
-static void
-smp_get_rtc_time(void *data)
-{
-       struct marvel_rtc_time *mrt = data;
-       mrt->retval = __get_rtc_time(mrt->time);
-}
-
-static void
-smp_set_rtc_time(void *data)
-{
-       struct marvel_rtc_time *mrt = data;
-       mrt->retval = __set_rtc_time(mrt->time);
-}
-#endif
-
-static unsigned int
-marvel_get_rtc_time(struct rtc_time *time)
-{
-#ifdef CONFIG_SMP
-       struct marvel_rtc_time mrt;
-
-       if (smp_processor_id() != boot_cpuid) {
-               mrt.time = time;
-               smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1);
-               return mrt.retval;
-       }
-#endif
-       return __get_rtc_time(time);
-}
-
-static int
-marvel_set_rtc_time(struct rtc_time *time)
-{
-#ifdef CONFIG_SMP
-       struct marvel_rtc_time mrt;
-
-       if (smp_processor_id() != boot_cpuid) {
-               mrt.time = time;
-               smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1);
-               return mrt.retval;
-       }
-#endif
-       return __set_rtc_time(time);
-}
-
  static void
  marvel_smp_callin(void)
  {
@@ -492,8 +440,7 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = {
         .vector_name            = "MARVEL/EV7",
         DO_EV7_MMU,
         .rtc_port               = 0x70,
-       .rtc_get_time           = marvel_get_rtc_time,
-       .rtc_set_time           = marvel_set_rtc_time,
+       .rtc_boot_cpu_only      = 1,
         DO_MARVEL_IO,
         .machine_check          = marvel_machine_check,
         .max_isa_dma_address    = ALPHA_MAX_ISA_DMA_ADDRESS,
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c

index ea3395036556ceea61c74c08452d5e7a7061e566..ee39cee8064caa4e930a06eb2fa35457ad6d390e 100644 (file)
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -3,13 +3,7 @@
   *
   *  Copyright (C) 1991, 1992, 1995, 1999, 2000  Linus Torvalds
   *
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02    Alan Modra
- *     fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26    Markus Kuhn
- *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- *      precision CMOS clock update
+ * This file contains the clocksource time handling.
   * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
   *             "A Kernel Model for Precision Timekeeping" by Dave Mills
   * 1997-01-09    Adrian Sun
@@ -21,9 +15,6 @@
   * 1999-04-16  Thorsten Kranzkowski (dl8bcu@gmx.net)
   *     fixed algorithm in do_gettimeofday() for calculating the precise time
   *     from processor cycle counter (now taking lost_ticks into account)
- * 2000-08-13  Jan-Benedict Glaw <jbglaw@lug-owl.de>
- *     Fixed time_init to be aware of epoches != 1900. This prevents
- *     booting up in 2048 for me;) Code is stolen from rtc.c.
   * 2003-06-03  R. Scott Bailey <scott.bailey@eds.com>
   *     Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM
   */
@@ -46,40 +37,19 @@
  #include <asm/uaccess.h>
  #include <asm/io.h>
  #include <asm/hwrpb.h>
-#include <asm/rtc.h>
  
  #include <linux/mc146818rtc.h>
  #include <linux/time.h>
  #include <linux/timex.h>
  #include <linux/clocksource.h>
+#include <linux/clockchips.h>
  
  #include "proto.h"
  #include "irq_impl.h"
  
-static int set_rtc_mmss(unsigned long);
-
  DEFINE_SPINLOCK(rtc_lock);
  EXPORT_SYMBOL(rtc_lock);
  
-#define TICK_SIZE (tick_nsec / 1000)
-
-/*
- * Shift amount by which scaled_ticks_per_cycle is scaled.  Shifting
- * by 48 gives us 16 bits for HZ while keeping the accuracy good even
- * for large CPU clock rates.
- */
-#define FIX_SHIFT      48
-
-/* lump static variables together for more efficient access: */
-static struct {
-       /* cycle counter last time it got invoked */
-       __u32 last_time;
-       /* ticks/cycle * 2^48 */
-       unsigned long scaled_ticks_per_cycle;
-       /* partial unused tick */
-       unsigned long partial_tick;
-} state;
-
  unsigned long est_cycle_freq;
  
  #ifdef CONFIG_IRQ_WORK
@@ -108,109 +78,156 @@ static inline __u32 rpcc(void)
         return __builtin_alpha_rpcc();
  }
  
-int update_persistent_clock(struct timespec now)
-{
-       return set_rtc_mmss(now.tv_sec);
-}
  
-void read_persistent_clock(struct timespec *ts)
+\f
+/*
+ * The RTC as a clock_event_device primitive.
+ */
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ce);
+
+irqreturn_t
+rtc_timer_interrupt(int irq, void *dev)
  {
-       unsigned int year, mon, day, hour, min, sec, epoch;
-
-       sec = CMOS_READ(RTC_SECONDS);
-       min = CMOS_READ(RTC_MINUTES);
-       hour = CMOS_READ(RTC_HOURS);
-       day = CMOS_READ(RTC_DAY_OF_MONTH);
-       mon = CMOS_READ(RTC_MONTH);
-       year = CMOS_READ(RTC_YEAR);
-
-       if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               sec = bcd2bin(sec);
-               min = bcd2bin(min);
-               hour = bcd2bin(hour);
-               day = bcd2bin(day);
-               mon = bcd2bin(mon);
-               year = bcd2bin(year);
-       }
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
  
-       /* PC-like is standard; used for year >= 70 */
-       epoch = 1900;
-       if (year < 20)
-               epoch = 2000;
-       else if (year >= 20 && year < 48)
-               /* NT epoch */
-               epoch = 1980;
-       else if (year >= 48 && year < 70)
-               /* Digital UNIX epoch */
-               epoch = 1952;
+       /* Don't run the hook for UNUSED or SHUTDOWN.  */
+       if (likely(ce->mode == CLOCK_EVT_MODE_PERIODIC))
+               ce->event_handler(ce);
  
-       printk(KERN_INFO "Using epoch = %d\n", epoch);
+       if (test_irq_work_pending()) {
+               clear_irq_work_pending();
+               irq_work_run();
+       }
  
-       if ((year += epoch) < 1970)
-               year += 100;
+       return IRQ_HANDLED;
+}
  
-       ts->tv_sec = mktime(year, mon, day, hour, min, sec);
-       ts->tv_nsec = 0;
+static void
+rtc_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce)
+{
+       /* The mode member of CE is updated in generic code.
+          Since we only support periodic events, nothing to do.  */
+}
+
+static int
+rtc_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+       /* This hook is for oneshot mode, which we don't support.  */
+       return -EINVAL;
  }
  
+static void __init
+init_rtc_clockevent(void)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+       *ce = (struct clock_event_device){
+               .name = "rtc",
+               .features = CLOCK_EVT_FEAT_PERIODIC,
+               .rating = 100,
+               .cpumask = cpumask_of(cpu),
+               .set_mode = rtc_ce_set_mode,
+               .set_next_event = rtc_ce_set_next_event,
+       };
  
+       clockevents_config_and_register(ce, CONFIG_HZ, 0, 0);
+}
  
+\f
  /*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick
+ * The QEMU clock as a clocksource primitive.
   */
-irqreturn_t timer_interrupt(int irq, void *dev)
+
+static cycle_t
+qemu_cs_read(struct clocksource *cs)
  {
-       unsigned long delta;
-       __u32 now;
-       long nticks;
+       return qemu_get_vmtime();
+}
  
-#ifndef CONFIG_SMP
-       /* Not SMP, do kernel PC profiling here.  */
-       profile_tick(CPU_PROFILING);
-#endif
+static struct clocksource qemu_cs = {
+       .name                   = "qemu",
+       .rating                 = 400,
+       .read                   = qemu_cs_read,
+       .mask                   = CLOCKSOURCE_MASK(64),
+       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
+       .max_idle_ns            = LONG_MAX
+};
  
-       /*
-        * Calculate how many ticks have passed since the last update,
-        * including any previous partial leftover.  Save any resulting
-        * fraction for the next pass.
-        */
-       now = rpcc();
-       delta = now - state.last_time;
-       state.last_time = now;
-       delta = delta * state.scaled_ticks_per_cycle + state.partial_tick;
-       state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); 
-       nticks = delta >> FIX_SHIFT;
  
-       if (nticks)
-               xtime_update(nticks);
+/*
+ * The QEMU alarm as a clock_event_device primitive.
+ */
  
-       if (test_irq_work_pending()) {
-               clear_irq_work_pending();
-               irq_work_run();
-       }
+static void
+qemu_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce)
+{
+       /* The mode member of CE is updated for us in generic code.
+          Just make sure that the event is disabled.  */
+       qemu_set_alarm_abs(0);
+}
  
-#ifndef CONFIG_SMP
-       while (nticks--)
-               update_process_times(user_mode(get_irq_regs()));
-#endif
+static int
+qemu_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+       qemu_set_alarm_rel(evt);
+       return 0;
+}
  
+static irqreturn_t
+qemu_timer_interrupt(int irq, void *dev)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+       ce->event_handler(ce);
         return IRQ_HANDLED;
  }
  
+static void __init
+init_qemu_clockevent(void)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+       *ce = (struct clock_event_device){
+               .name = "qemu",
+               .features = CLOCK_EVT_FEAT_ONESHOT,
+               .rating = 400,
+               .cpumask = cpumask_of(cpu),
+               .set_mode = qemu_ce_set_mode,
+               .set_next_event = qemu_ce_set_next_event,
+       };
+
+       clockevents_config_and_register(ce, NSEC_PER_SEC, 1000, LONG_MAX);
+}
+
+\f
  void __init
  common_init_rtc(void)
  {
-       unsigned char x;
+       unsigned char x, sel = 0;
  
         /* Reset periodic interrupt frequency.  */
-       x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
-        /* Test includes known working values on various platforms
-           where 0x26 is wrong; we refuse to change those. */
-       if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
-               printk("Setting RTC_FREQ to 1024 Hz (%x)\n", x);
-               CMOS_WRITE(0x26, RTC_FREQ_SELECT);
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+       x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
+       /* Test includes known working values on various platforms
+          where 0x26 is wrong; we refuse to change those. */
+       if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
+               sel = RTC_REF_CLCK_32KHZ + 6;
         }
+#elif CONFIG_HZ == 256 || CONFIG_HZ == 128 || CONFIG_HZ == 64 || CONFIG_HZ == 32
+       sel = RTC_REF_CLCK_32KHZ + __builtin_ffs(32768 / CONFIG_HZ);
+#else
+# error "Unknown HZ from arch/alpha/Kconfig"
+#endif
+       if (sel) {
+               printk(KERN_INFO "Setting RTC_FREQ to %d Hz (%x)\n",
+                      CONFIG_HZ, sel);
+               CMOS_WRITE(sel, RTC_FREQ_SELECT);
+       }
  
         /* Turn on periodic interrupts.  */
         x = CMOS_READ(RTC_CONTROL);
@@ -233,16 +250,37 @@ common_init_rtc(void)
         init_rtc_irq();
  }
  
-unsigned int common_get_rtc_time(struct rtc_time *time)
-{
-       return __get_rtc_time(time);
-}
+\f
+#ifndef CONFIG_ALPHA_WTINT
+/*
+ * The RPCC as a clocksource primitive.
+ *
+ * While we have free-running timecounters running on all CPUs, and we make
+ * a half-hearted attempt in init_rtc_rpcc_info to sync the timecounter
+ * with the wall clock, that initialization isn't kept up-to-date across
+ * different time counters in SMP mode.  Therefore we can only use this
+ * method when there's only one CPU enabled.
+ *
+ * When using the WTINT PALcall, the RPCC may shift to a lower frequency,
+ * or stop altogether, while waiting for the interrupt.  Therefore we cannot
+ * use this method when WTINT is in use.
+ */
  
-int common_set_rtc_time(struct rtc_time *time)
+static cycle_t read_rpcc(struct clocksource *cs)
  {
-       return __set_rtc_time(time);
+       return rpcc();
  }
  
+static struct clocksource clocksource_rpcc = {
+       .name                   = "rpcc",
+       .rating                 = 300,
+       .read                   = read_rpcc,
+       .mask                   = CLOCKSOURCE_MASK(32),
+       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS
+};
+#endif /* ALPHA_WTINT */
+
+\f
  /* Validate a computed cycle counter result against the known bounds for
     the given processor core.  There's too much brokenness in the way of
     timing hardware for any one method to work everywhere.  :-(
@@ -353,33 +391,6 @@ rpcc_after_update_in_progress(void)
         return rpcc();
  }
  
-#ifndef CONFIG_SMP
-/* Until and unless we figure out how to get cpu cycle counters
-   in sync and keep them there, we can't use the rpcc.  */
-static cycle_t read_rpcc(struct clocksource *cs)
-{
-       cycle_t ret = (cycle_t)rpcc();
-       return ret;
-}
-
-static struct clocksource clocksource_rpcc = {
-       .name                   = "rpcc",
-       .rating                 = 300,
-       .read                   = read_rpcc,
-       .mask                   = CLOCKSOURCE_MASK(32),
-       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS
-};
-
-static inline void register_rpcc_clocksource(long cycle_freq)
-{
-       clocksource_register_hz(&clocksource_rpcc, cycle_freq);
-}
-#else /* !CONFIG_SMP */
-static inline void register_rpcc_clocksource(long cycle_freq)
-{
-}
-#endif /* !CONFIG_SMP */
-
  void __init
  time_init(void)
  {
@@ -387,6 +398,15 @@ time_init(void)
         unsigned long cycle_freq, tolerance;
         long diff;
  
+       if (alpha_using_qemu) {
+               clocksource_register_hz(&qemu_cs, NSEC_PER_SEC);
+               init_qemu_clockevent();
+
+               timer_irqaction.handler = qemu_timer_interrupt;
+               init_rtc_irq();
+               return;
+       }
+
         /* Calibrate CPU clock -- attempt #1.  */
         if (!est_cycle_freq)
                 est_cycle_freq = validate_cc_value(calibrate_cc_with_pit());
@@ -421,100 +441,25 @@ time_init(void)
                        "and unable to estimate a proper value!\n");
         }
  
-       /* From John Bowman <bowman@math.ualberta.ca>: allow the values
-          to settle, as the Update-In-Progress bit going low isn't good
-          enough on some hardware.  2ms is our guess; we haven't found 
-          bogomips yet, but this is close on a 500Mhz box.  */
-       __delay(1000000);
-
-
-       if (HZ > (1<<16)) {
-               extern void __you_loose (void);
-               __you_loose();
-       }
-
-       register_rpcc_clocksource(cycle_freq);
-
-       state.last_time = cc1;
-       state.scaled_ticks_per_cycle
-               = ((unsigned long) HZ << FIX_SHIFT) / cycle_freq;
-       state.partial_tick = 0L;
+       /* See above for restrictions on using clocksource_rpcc.  */
+#ifndef CONFIG_ALPHA_WTINT
+       if (hwrpb->nr_processors == 1)
+               clocksource_register_hz(&clocksource_rpcc, cycle_freq);
+#endif
  
         /* Startup the timer source. */
         alpha_mv.init_rtc();
+       init_rtc_clockevent();
  }
  
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be
- * called 500 ms after the second nowtime has started, because when
- * nowtime is written into the registers of the CMOS clock, it will
- * jump to the next second precisely 500 ms later. Check the Motorola
- * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- *      sets the minutes. Usually you won't notice until after reboot!
- */
-
-
-static int
-set_rtc_mmss(unsigned long nowtime)
+/* Initialize the clock_event_device for secondary cpus.  */
+#ifdef CONFIG_SMP
+void __init
+init_clockevent(void)
  {
-       int retval = 0;
-       int real_seconds, real_minutes, cmos_minutes;
-       unsigned char save_control, save_freq_select;
-
-       /* irq are locally disabled here */
-       spin_lock(&rtc_lock);
-       /* Tell the clock it's being set */
-       save_control = CMOS_READ(RTC_CONTROL);
-       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
-       /* Stop and reset prescaler */
-       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-       CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-       cmos_minutes = CMOS_READ(RTC_MINUTES);
-       if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               cmos_minutes = bcd2bin(cmos_minutes);
-
-       /*
-        * since we're only adjusting minutes and seconds,
-        * don't interfere with hour overflow. This avoids
-        * messing with unknown time zones but requires your
-        * RTC not to be off by more than 15 minutes
-        */
-       real_seconds = nowtime % 60;
-       real_minutes = nowtime / 60;
-       if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) {
-               /* correct for half hour time zone */
-               real_minutes += 30;
-       }
-       real_minutes %= 60;
-
-       if (abs(real_minutes - cmos_minutes) < 30) {
-               if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       real_seconds = bin2bcd(real_seconds);
-                       real_minutes = bin2bcd(real_minutes);
-               }
-               CMOS_WRITE(real_seconds,RTC_SECONDS);
-               CMOS_WRITE(real_minutes,RTC_MINUTES);
-       } else {
-               printk_once(KERN_NOTICE
-                      "set_rtc_mmss: can't update from %d to %d\n",
-                      cmos_minutes, real_minutes);
-               retval = -1;
-       }
-
-       /* The following flags have to be released exactly in this order,
-        * otherwise the DS12887 (popular MC146818A clone with integrated
-        * battery and quartz) will not reset the oscillator and will not
-        * update precisely 500 ms later. You won't find this mentioned in
-        * the Dallas Semiconductor data sheets, but who believes data
-        * sheets anyway ...                           -- Markus Kuhn
-        */
-       CMOS_WRITE(save_control, RTC_CONTROL);
-       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       spin_unlock(&rtc_lock);
-
-       return retval;
+       if (alpha_using_qemu)
+               init_qemu_clockevent();
+       else
+               init_rtc_clockevent();
  }
+#endif
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c

index bd0665cdc840d3e9a79b752ec3a88279ede69344..9c4c189eb22f5a9db2d2ae678756a5241b3e1ee5 100644 (file)
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -241,6 +241,21 @@ do_entIF(unsigned long type, struct pt_regs *regs)
                                (const char *)(data[1] | (long)data[2] << 32), 
                                data[0]);
                 }
+#ifdef CONFIG_ALPHA_WTINT
+               if (type == 4) {
+                       /* If CALL_PAL WTINT is totally unsupported by the
+                          PALcode, e.g. MILO, "emulate" it by overwriting
+                          the insn.  */
+                       unsigned int *pinsn
+                         = (unsigned int *) regs->pc - 1;
+                       if (*pinsn == PAL_wtint) {
+                               *pinsn = 0x47e01400; /* mov 0,$0 */
+                               imb();
+                               regs->r0 = 0;
+                               return;
+                       }
+               }
+#endif /* ALPHA_WTINT */
                 die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
                               regs, type, NULL);
         }
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c

index ffb19b7da999c67722d5690dcfcd6ef0d74123f8..ff3c10721caf67be1873f5c5b88623fc65fd89ea 100644 (file)
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -130,7 +130,7 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
                 *dst = word | tmp;
                 checksum += carry;
         }
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
         return checksum;
  }
  
@@ -185,7 +185,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src,
                 *dst = word | tmp;
                 checksum += carry;
         }
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
         return checksum;
  }
  
@@ -242,7 +242,7 @@ csum_partial_cfu_src_aligned(const unsigned long __user *src,
         stq_u(partial_dest | second_dest, dst);
  out:
         checksum += carry;
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
         return checksum;
  }
  
@@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src,
                 stq_u(partial_dest | word | second_dest, dst);
                 checksum += carry;
         }
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
         return checksum;
  }
  
@@ -339,7 +339,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
  
         if (len) {
                 if (!access_ok(VERIFY_READ, src, len)) {
-                       *errp = -EFAULT;
+                       if (errp) *errp = -EFAULT;
                         memset(dst, 0, len);
                         return sum;
                 }
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S

index d8b94e1c7fcad001c32142f4e4cc51d3cc8f6a5b..356bb2fdd70567721023b8e0a3fc1d59f2f5d981 100644 (file)
--- a/arch/alpha/lib/ev6-memset.S
+++ b/arch/alpha/lib/ev6-memset.S
@@ -30,14 +30,15 @@
         .set noat
         .set noreorder
  .text
+       .globl memset
         .globl __memset
+       .globl ___memset
         .globl __memsetw
         .globl __constant_c_memset
-       .globl memset
  
-       .ent __memset
+       .ent ___memset
  .align 5
-__memset:
+___memset:
         .frame $30,0,$26,0
         .prologue 0
  
@@ -227,7 +228,7 @@ end_b:
         nop
         nop
         ret $31,($26),1         # L0 :
-       .end __memset
+       .end ___memset
  
         /*
          * This is the original body of code, prior to replication and
@@ -594,4 +595,5 @@ end_w:
  
         .end __memsetw
  
-memset = __memset
+memset = ___memset
+__memset = ___memset
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S

index 311b8cfc691488743d178c2dd79eb9f7edafbd81..76ccc6d1f364d67ca8c03c859171f113da5e23ce 100644 (file)
--- a/arch/alpha/lib/memset.S
+++ b/arch/alpha/lib/memset.S
@@ -19,11 +19,13 @@
  .text
         .globl memset
         .globl __memset
+       .globl ___memset
         .globl __memsetw
         .globl __constant_c_memset
-       .ent __memset
+
+       .ent ___memset
  .align 5
-__memset:
+___memset:
         .frame $30,0,$26,0
         .prologue 0
  
@@ -103,7 +105,7 @@ within_one_quad:
  
  end:
         ret $31,($26),1         /* E1 */
-       .end __memset
+       .end ___memset
  
         .align 5
         .ent __memsetw
@@ -121,4 +123,5 @@ __memsetw:
  
         .end __memsetw
  
-memset = __memset
+memset = ___memset
+__memset = ___memset
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 214b698cefea895e35b345dc9e8d070d33cf6929..c1f1a7eee953de4378b1f74bd4907c969f96dceb 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,7 +25,7 @@ config ARM
         select HARDIRQS_SW_RESEND
         select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
         select HAVE_ARCH_KGDB
-       select HAVE_ARCH_SECCOMP_FILTER
+       select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
         select HAVE_ARCH_TRACEHOOK
         select HAVE_BPF_JIT
         select HAVE_CONTEXT_TRACKING
@@ -1496,6 +1496,7 @@ config HAVE_ARM_ARCH_TIMER
         bool "Architected timer support"
         depends on CPU_V7
         select ARM_ARCH_TIMER
+       select GENERIC_CLOCKEVENTS
         help
           This option enables support for the ARM architected timer
  
@@ -1719,7 +1720,6 @@ config AEABI
  config OABI_COMPAT
         bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)"
         depends on AEABI && !THUMB2_KERNEL
-       default y
         help
           This option preserves the old syscall interface along with the
           new (ARM EABI) one. It also provides a compatibility layer to
@@ -1727,11 +1727,16 @@ config OABI_COMPAT
           in memory differs between the legacy ABI and the new ARM EABI
           (only for non "thumb" binaries). This option adds a tiny
           overhead to all syscalls and produces a slightly larger kernel.
+
+         The seccomp filter system will not be available when this is
+         selected, since there is no way yet to sensibly distinguish
+         between calling conventions during filtering.
+
           If you know you'll be using only pure EABI user space then you
           can say N here. If this option is not selected and you attempt
           to execute a legacy ABI binary then the result will be
           UNPREDICTABLE (in fact it can be predicted that it won't work
-         at all). If in doubt say Y.
+         at all). If in doubt say N.
  
  config ARCH_HAS_HOLES_MEMORYMODEL
         bool
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c

index 8e1a0245907f85be1a460bfa785f744daf285d6f..41bca32409fce81358c3b5c35bc081bcc28e7c76 100644 (file)
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -404,7 +404,7 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
                                         BIT(slot));
                         if (edma_cc[ctlr]->intr_data[channel].callback)
                                 edma_cc[ctlr]->intr_data[channel].callback(
-                                       channel, DMA_COMPLETE,
+                                       channel, EDMA_DMA_COMPLETE,
                                         edma_cc[ctlr]->intr_data[channel].data);
                 }
         } while (sh_ipr);
@@ -459,7 +459,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
                                                                 callback) {
                                                 edma_cc[ctlr]->intr_data[k].
                                                 callback(k,
-                                               DMA_CC_ERROR,
+                                               EDMA_DMA_CC_ERROR,
                                                 edma_cc[ctlr]->intr_data
                                                 [k].data);
                                         }
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h

index 9b28f1243bdc1d96c2be3dac0ad947a72a61a264..240b29ef17db9772af6abc4855b90c7c16621e81 100644 (file)
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -393,36 +393,6 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
         return slot_cnt;
  }
  
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-       return 0;
-}
-
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-                                       struct iop_adma_chan *chan)
-{
-       union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-       switch (chan->device->id) {
-       case DMA0_ID:
-       case DMA1_ID:
-               return hw_desc.dma->dest_addr;
-       case AAU_ID:
-               return hw_desc.aau->dest_addr;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-                                         struct iop_adma_chan *chan)
-{
-       BUG();
-       return 0;
-}
-
  static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan)
  {
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h

index 122f86d8c991d73e587c786eba41bf85a6ec5a21..250760e081039542dbcdae191b1b5321921a1d3d 100644 (file)
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -82,8 +82,6 @@ struct iop_adma_chan {
   * @slot_cnt: total slots used in an transaction (group of operations)
   * @slots_per_op: number of slots per operation
   * @idx: pool index
- * @unmap_src_cnt: number of xor sources
- * @unmap_len: transaction bytecount
   * @tx_list: list of descriptors that are associated with one operation
   * @async_tx: support for the async_tx api
   * @group_list: list of slots that make up a multi-descriptor transaction
@@ -99,8 +97,6 @@ struct iop_adma_desc_slot {
         u16 slot_cnt;
         u16 slots_per_op;
         u16 idx;
-       u16 unmap_src_cnt;
-       size_t unmap_len;
         struct list_head tx_list;
         struct dma_async_tx_descriptor async_tx;
         union {
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h

index 4dd21457ef9d2be8b1c94cac7eea97c8ef8cc1f6..9ecccc865046a2c257277cd03a8f607ed5e0217d 100644 (file)
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -226,7 +226,14 @@ static inline phys_addr_t __virt_to_phys(unsigned long x)
  static inline unsigned long __phys_to_virt(phys_addr_t x)
  {
         unsigned long t;
-       __pv_stub(x, t, "sub", __PV_BITS_31_24);
+
+       /*
+        * 'unsigned long' cast discard upper word when
+        * phys_addr_t is 64 bit, and makes sure that inline
+        * assembler expression receives 32 bit argument
+        * in place where 'r' 32 bit operand is expected.
+        */
+       __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24);
         return t;
  }
  
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S

index 7801866e626a2a1a4631d9e3e3fbd3c27ddda429..11d59b32fb8dca45613ed00fb225a72359c19216 100644 (file)
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -508,6 +508,7 @@ __fixup_smp:
         teq     r0, #0x0                @ '0' on actual UP A9 hardware
         beq     __fixup_smp_on_up       @ So its an A9 UP
         ldr     r0, [r0, #4]            @ read SCU Config
+ARM_BE8(rev    r0, r0)                 @ byteswap if big endian
         and     r0, r0, #0x3            @ number of CPUs
         teq     r0, #0x0                @ is 1?
         movne   pc, lr
@@ -643,8 +644,12 @@ ARM_BE8(rev16      ip, ip)
         ldrcc   r7, [r4], #4    @ use branch for delay slot
         bcc     1b
         bx      lr
+#else
+#ifdef CONFIG_CPU_ENDIAN_BE8
+       moveq   r0, #0x00004000 @ set bit 22, mov to mvn instruction
  #else
         moveq   r0, #0x400000   @ set bit 22, mov to mvn instruction
+#endif
         b       2f
  1:     ldr     ip, [r7, r3]
  #ifdef CONFIG_CPU_ENDIAN_BE8
@@ -653,7 +658,7 @@ ARM_BE8(rev16       ip, ip)
         tst     ip, #0x000f0000 @ check the rotation field
         orrne   ip, ip, r6, lsl #24 @ mask in offset bits 31-24
         biceq   ip, ip, #0x00004000 @ clear bit 22
-       orreq   ip, ip, r0, lsl #24 @ mask in offset bits 7-0
+       orreq   ip, ip, r0      @ mask in offset bits 7-0
  #else
         bic     ip, ip, #0x000000ff
         tst     ip, #0xf00      @ check the rotation field
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c

index 6125f259b7b5359072b0cd7a07e122fcd2bda4bd..dbf0923e8d76bda9392b902e0c8e500025d70402 100644 (file)
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -856,7 +856,7 @@ static void __init kuser_init(void *vectors)
                 memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
  }
  #else
-static void __init kuser_init(void *vectors)
+static inline void __init kuser_init(void *vectors)
  {
  }
  #endif
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c

index 371958370de445fbe0cc200e772fa1e9426fb327..580906989db1091eb034ada5cc60570505de8cd1 100644 (file)
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -334,6 +334,17 @@ out:
         return err;
  }
  
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+       if (!is_vmalloc_addr(kaddr)) {
+               BUG_ON(!virt_addr_valid(kaddr));
+               return __pa(kaddr);
+       } else {
+               return page_to_phys(vmalloc_to_page(kaddr)) +
+                      offset_in_page(kaddr);
+       }
+}
+
  /**
   * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
   * @from:      The virtual kernel start address of the range
@@ -345,16 +356,27 @@ out:
   */
  int create_hyp_mappings(void *from, void *to)
  {
-       unsigned long phys_addr = virt_to_phys(from);
+       phys_addr_t phys_addr;
+       unsigned long virt_addr;
         unsigned long start = KERN_TO_HYP((unsigned long)from);
         unsigned long end = KERN_TO_HYP((unsigned long)to);
  
-       /* Check for a valid kernel memory mapping */
-       if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
-               return -EINVAL;
+       start = start & PAGE_MASK;
+       end = PAGE_ALIGN(end);
  
-       return __create_hyp_mappings(hyp_pgd, start, end,
-                                    __phys_to_pfn(phys_addr), PAGE_HYP);
+       for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+               int err;
+
+               phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+               err = __create_hyp_mappings(hyp_pgd, virt_addr,
+                                           virt_addr + PAGE_SIZE,
+                                           __phys_to_pfn(phys_addr),
+                                           PAGE_HYP);
+               if (err)
+                       return err;
+       }
+
+       return 0;
  }
  
  /**
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h

index e0c68d5bb7dc25dd3fa93dc0fa1b3899f5b09019..52886b89706caf466b1cc6c6586db70a7d9d962e 100644 (file)
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -10,7 +10,7 @@ UNWIND(       .fnstart        )
         and     r3, r0, #31             @ Get bit offset
         mov     r0, r0, lsr #5
         add     r1, r1, r0, lsl #2      @ Get word offset
-#if __LINUX_ARM_ARCH__ >= 7
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
         .arch_extension mp
         ALT_SMP(W(pldw) [r1])
         ALT_UP(W(nop))
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h

index 6d3782d85a9ff6d2db65a71de2632cc0b1151b33..a86fd0ed775788012197270c96ea8190282aed47 100644 (file)
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -218,20 +218,6 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
  #define iop_chan_pq_slot_count iop_chan_xor_slot_count
  #define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
  
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-                                       struct iop_adma_chan *chan)
-{
-       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-       return hw_desc->dest_addr;
-}
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-                                         struct iop_adma_chan *chan)
-{
-       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-       return hw_desc->q_dest_addr;
-}
-
  static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan)
  {
@@ -350,18 +336,6 @@ iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
         hw_desc->desc_ctrl = u_desc_ctrl.value;
  }
  
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-       union {
-               u32 value;
-               struct iop13xx_adma_desc_ctrl field;
-       } u_desc_ctrl;
-
-       u_desc_ctrl.value = hw_desc->desc_ctrl;
-       return u_desc_ctrl.field.pq_xfer_en;
-}
-
  static inline void
  iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
                           unsigned long flags)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c

index 78eeeca78f5ab331707fcd73b4956c503c2d880b..580ef2de82d728f8ecfde5f5f3b208a2e5525b06 100644 (file)
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -558,8 +558,8 @@ static void __init build_mem_type_table(void)
                 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
                 break;
         }
-       printk("Memory policy: ECC %sabled, Data cache %s\n",
-               ecc_mask ? "en" : "dis", cp->policy);
+       pr_info("Memory policy: %sData cache %s\n",
+               ecc_mask ? "ECC enabled, " : "", cp->policy);
  
         for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
                 struct mem_type *t = &mem_types[i];
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c

index 5c668b7a31f97e6df35dcec53b79d5a70a9d1d0b..55764a7ef1f021934ba2f0b0136fc1b1df2a2799 100644 (file)
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -18,6 +18,7 @@
  #include <asm/mach/arch.h>
  #include <asm/cputype.h>
  #include <asm/mpu.h>
+#include <asm/procinfo.h>
  
  #include "mm.h"
  
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S

index 60920f62fdf5994f04477bc94aba09dcd349385a..bd1781979a391825043d078192666e5a846cdae5 100644 (file)
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -92,7 +92,7 @@ ENDPROC(cpu_v7_dcache_clean_area)
  
  /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
  .globl cpu_v7_suspend_size
-.equ   cpu_v7_suspend_size, 4 * 8
+.equ   cpu_v7_suspend_size, 4 * 9
  #ifdef CONFIG_ARM_CPU_SUSPEND
  ENTRY(cpu_v7_do_suspend)
         stmfd   sp!, {r4 - r10, lr}
@@ -101,13 +101,17 @@ ENTRY(cpu_v7_do_suspend)
         stmia   r0!, {r4 - r5}
  #ifdef CONFIG_MMU
         mrc     p15, 0, r6, c3, c0, 0   @ Domain ID
+#ifdef CONFIG_ARM_LPAE
+       mrrc    p15, 1, r5, r7, c2      @ TTB 1
+#else
         mrc     p15, 0, r7, c2, c0, 1   @ TTB 1
+#endif
         mrc     p15, 0, r11, c2, c0, 2  @ TTB control register
  #endif
         mrc     p15, 0, r8, c1, c0, 0   @ Control register
         mrc     p15, 0, r9, c1, c0, 1   @ Auxiliary control register
         mrc     p15, 0, r10, c1, c0, 2  @ Co-processor access control
-       stmia   r0, {r6 - r11}
+       stmia   r0, {r5 - r11}
         ldmfd   sp!, {r4 - r10, pc}
  ENDPROC(cpu_v7_do_suspend)
  
@@ -118,16 +122,19 @@ ENTRY(cpu_v7_do_resume)
         ldmia   r0!, {r4 - r5}
         mcr     p15, 0, r4, c13, c0, 0  @ FCSE/PID
         mcr     p15, 0, r5, c13, c0, 3  @ User r/o thread ID
-       ldmia   r0, {r6 - r11}
+       ldmia   r0, {r5 - r11}
  #ifdef CONFIG_MMU
         mcr     p15, 0, ip, c8, c7, 0   @ invalidate TLBs
         mcr     p15, 0, r6, c3, c0, 0   @ Domain ID
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+       mcrr    p15, 0, r1, ip, c2      @ TTB 0
+       mcrr    p15, 1, r5, r7, c2      @ TTB 1
+#else
         ALT_SMP(orr     r1, r1, #TTB_FLAGS_SMP)
         ALT_UP(orr      r1, r1, #TTB_FLAGS_UP)
-#endif
         mcr     p15, 0, r1, c2, c0, 0   @ TTB 0
         mcr     p15, 0, r7, c2, c0, 1   @ TTB 1
+#endif
         mcr     p15, 0, r11, c2, c0, 2  @ TTB control register
         ldr     r4, =PRRR               @ PRRR
         ldr     r5, =NMRR               @ NMRR
diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S

index 4488fa27fe948c2e73018e962628c7883327bf07..2ffc298f061b3e7704e89dd634ece905ff13dcf4 100644 (file)
--- a/arch/avr32/boot/u-boot/head.S
+++ b/arch/avr32/boot/u-boot/head.S
@@ -8,6 +8,8 @@
   * published by the Free Software Foundation.
   */
  #include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/sysreg.h>
  
         /*
          * The kernel is loaded where we want it to be and all caches
@@ -20,11 +22,6 @@
         .section .init.text,"ax"
         .global _start
  _start:
-       /* Check if the boot loader actually provided a tag table */
-       lddpc   r0, magic_number
-       cp.w    r12, r0
-       brne    no_tag_table
-
         /* Initialize .bss */
         lddpc   r2, bss_start_addr
         lddpc   r3, end_addr
@@ -34,6 +31,25 @@ _start:
         cp      r2, r3
         brlo    1b
  
+       /* Initialize status register */
+       lddpc   r0, init_sr
+       mtsr    SYSREG_SR, r0
+
+       /* Set initial stack pointer */
+       lddpc   sp, stack_addr
+       sub     sp, -THREAD_SIZE
+
+#ifdef CONFIG_FRAME_POINTER
+       /* Mark last stack frame */
+       mov     lr, 0
+       mov     r7, 0
+#endif
+
+       /* Check if the boot loader actually provided a tag table */
+       lddpc   r0, magic_number
+       cp.w    r12, r0
+       brne    no_tag_table
+
         /*
          * Save the tag table address for later use. This must be done
          * _after_ .bss has been initialized...
@@ -53,8 +69,15 @@ bss_start_addr:
         .long   __bss_start
  end_addr:
         .long   _end
+init_sr:
+       .long   0x007f0000      /* Supervisor mode, everything masked */
+stack_addr:
+       .long   init_thread_union
+panic_addr:
+       .long   panic
  
  no_tag_table:
         sub     r12, pc, (. - 2f)
-       bral    panic
+       /* branch to panic() which can be far away with that construct */
+       lddpc   pc, panic_addr
  2:     .asciz  "Boot loader didn't provide correct magic number\n"
diff --git a/arch/avr32/include/asm/kprobes.h b/arch/avr32/include/asm/kprobes.h

index 996cb656474e267920ad6856f6445156349b0579..45f563ed73fd51e6d4b0590a56d0f7fa3e5f4c48 100644 (file)
--- a/arch/avr32/include/asm/kprobes.h
+++ b/arch/avr32/include/asm/kprobes.h
@@ -16,6 +16,7 @@
  typedef u16    kprobe_opcode_t;
  #define BREAKPOINT_INSTRUCTION 0xd673  /* breakpoint */
  #define MAX_INSN_SIZE          2
+#define MAX_STACK_SIZE         64      /* 32 would probably be OK */
  
  #define kretprobe_blacklist_size 0
  
@@ -26,6 +27,19 @@ struct arch_specific_insn {
         kprobe_opcode_t insn[MAX_INSN_SIZE];
  };
  
+struct prev_kprobe {
+       struct kprobe *kp;
+       unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+       unsigned int kprobe_status;
+       struct prev_kprobe prev_kprobe;
+       struct pt_regs jprobe_saved_regs;
+       char jprobes_stack[MAX_STACK_SIZE];
+};
+
  extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
  extern int kprobe_exceptions_notify(struct notifier_block *self,
                                     unsigned long val, void *data);
diff --git a/arch/avr32/include/uapi/asm/Kbuild b/arch/avr32/include/uapi/asm/Kbuild

index 3b85eaddf525f2be65d5772be3f28051afac7b43..08d8a3d76ea8628b6d749370c4edec3efb5b578c 100644 (file)
--- a/arch/avr32/include/uapi/asm/Kbuild
+++ b/arch/avr32/include/uapi/asm/Kbuild
@@ -2,35 +2,35 @@
  include include/uapi/asm-generic/Kbuild.asm
  
  header-y += auxvec.h
-header-y += bitsperlong.h
  header-y += byteorder.h
  header-y += cachectl.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
  header-y += msgbuf.h
  header-y += param.h
-header-y += poll.h
  header-y += posix_types.h
  header-y += ptrace.h
-header-y += resource.h
  header-y += sembuf.h
  header-y += setup.h
  header-y += shmbuf.h
  header-y += sigcontext.h
-header-y += siginfo.h
  header-y += signal.h
  header-y += socket.h
  header-y += sockios.h
  header-y += stat.h
-header-y += statfs.h
  header-y += swab.h
  header-y += termbits.h
  header-y += termios.h
  header-y += types.h
  header-y += unistd.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
  generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += siginfo.h
+generic-y += statfs.h
diff --git a/arch/avr32/include/uapi/asm/auxvec.h b/arch/avr32/include/uapi/asm/auxvec.h

index d5dd435bf8f4769cc5d20184eca9db07c81f64d1..4f02da3ffefa63d61bcaa8de9a6429a819c800c2 100644 (file)
--- a/arch/avr32/include/uapi/asm/auxvec.h
+++ b/arch/avr32/include/uapi/asm/auxvec.h
@@ -1,4 +1,4 @@
-#ifndef __ASM_AVR32_AUXVEC_H
-#define __ASM_AVR32_AUXVEC_H
+#ifndef _UAPI__ASM_AVR32_AUXVEC_H
+#define _UAPI__ASM_AVR32_AUXVEC_H
  
-#endif /* __ASM_AVR32_AUXVEC_H */
+#endif /* _UAPI__ASM_AVR32_AUXVEC_H */
diff --git a/arch/avr32/include/uapi/asm/bitsperlong.h b/arch/avr32/include/uapi/asm/bitsperlong.h

deleted file mode 100644 (file)

index 6dc0bb0..0000000
--- a/arch/avr32/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/avr32/include/uapi/asm/byteorder.h b/arch/avr32/include/uapi/asm/byteorder.h

index 50abc21619a8a288b9c46a2ac0d38ca56ce344e7..71242f0d39c6d7ef2caed95524356e159cd5796c 100644 (file)
--- a/arch/avr32/include/uapi/asm/byteorder.h
+++ b/arch/avr32/include/uapi/asm/byteorder.h
@@ -1,9 +1,9 @@
  /*
   * AVR32 endian-conversion functions.
   */
-#ifndef __ASM_AVR32_BYTEORDER_H
-#define __ASM_AVR32_BYTEORDER_H
+#ifndef _UAPI__ASM_AVR32_BYTEORDER_H
+#define _UAPI__ASM_AVR32_BYTEORDER_H
  
  #include <linux/byteorder/big_endian.h>
  
-#endif /* __ASM_AVR32_BYTEORDER_H */
+#endif /* _UAPI__ASM_AVR32_BYTEORDER_H */
diff --git a/arch/avr32/include/uapi/asm/cachectl.h b/arch/avr32/include/uapi/asm/cachectl.h

index 4faf1ce600616d82108523b157d065baa6cdeee1..573a9584dd57eafeefdb18754d0e0df7a9b65f77 100644 (file)
--- a/arch/avr32/include/uapi/asm/cachectl.h
+++ b/arch/avr32/include/uapi/asm/cachectl.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_CACHECTL_H
-#define __ASM_AVR32_CACHECTL_H
+#ifndef _UAPI__ASM_AVR32_CACHECTL_H
+#define _UAPI__ASM_AVR32_CACHECTL_H
  
  /*
   * Operations that can be performed through the cacheflush system call
@@ -8,4 +8,4 @@
  /* Clean the data cache, then invalidate the icache */
  #define CACHE_IFLUSH   0
  
-#endif /* __ASM_AVR32_CACHECTL_H */
+#endif /* _UAPI__ASM_AVR32_CACHECTL_H */
diff --git a/arch/avr32/include/uapi/asm/errno.h b/arch/avr32/include/uapi/asm/errno.h

deleted file mode 100644 (file)

index 558a724..0000000
--- a/arch/avr32/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_ERRNO_H
-#define __ASM_AVR32_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/arch/avr32/include/uapi/asm/fcntl.h b/arch/avr32/include/uapi/asm/fcntl.h

deleted file mode 100644 (file)

index 14c0c44..0000000
--- a/arch/avr32/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_FCNTL_H
-#define __ASM_AVR32_FCNTL_H
-
-#include <asm-generic/fcntl.h>
-
-#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/arch/avr32/include/uapi/asm/ioctl.h b/arch/avr32/include/uapi/asm/ioctl.h

deleted file mode 100644 (file)

index c8472c1..0000000
--- a/arch/avr32/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_IOCTL_H
-#define __ASM_AVR32_IOCTL_H
-
-#include <asm-generic/ioctl.h>
-
-#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/arch/avr32/include/uapi/asm/ioctls.h b/arch/avr32/include/uapi/asm/ioctls.h

deleted file mode 100644 (file)

index 909cf66..0000000
--- a/arch/avr32/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_IOCTLS_H
-#define __ASM_AVR32_IOCTLS_H
-
-#include <asm-generic/ioctls.h>
-
-#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/arch/avr32/include/uapi/asm/ipcbuf.h b/arch/avr32/include/uapi/asm/ipcbuf.h

deleted file mode 100644 (file)

index 84c7e51..0000000
--- a/arch/avr32/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/avr32/include/uapi/asm/kvm_para.h b/arch/avr32/include/uapi/asm/kvm_para.h

deleted file mode 100644 (file)

index 14fab8f..0000000
--- a/arch/avr32/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/avr32/include/uapi/asm/mman.h b/arch/avr32/include/uapi/asm/mman.h

deleted file mode 100644 (file)

index 8eebf89..0000000
--- a/arch/avr32/include/uapi/asm/mman.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
diff --git a/arch/avr32/include/uapi/asm/msgbuf.h b/arch/avr32/include/uapi/asm/msgbuf.h

index ac18bc4da7f7acfe154ca532093ea5712fe714ad..9eae6effad14029c1f128277eb8c0eec89fe2be5 100644 (file)
--- a/arch/avr32/include/uapi/asm/msgbuf.h
+++ b/arch/avr32/include/uapi/asm/msgbuf.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_MSGBUF_H
-#define __ASM_AVR32_MSGBUF_H
+#ifndef _UAPI__ASM_AVR32_MSGBUF_H
+#define _UAPI__ASM_AVR32_MSGBUF_H
  
  /*
   * The msqid64_ds structure for i386 architecture.
@@ -28,4 +28,4 @@ struct msqid64_ds {
         unsigned long  __unused5;
  };
  
-#endif /* __ASM_AVR32_MSGBUF_H */
+#endif /* _UAPI__ASM_AVR32_MSGBUF_H */
diff --git a/arch/avr32/include/uapi/asm/poll.h b/arch/avr32/include/uapi/asm/poll.h

deleted file mode 100644 (file)

index c98509d..0000000
--- a/arch/avr32/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/avr32/include/uapi/asm/posix_types.h b/arch/avr32/include/uapi/asm/posix_types.h

index 9ba9e749b3f34d7c2760d1d9784ff9ede9528ef3..5b813a8abf0946645d00979dc1bcd997f43362bd 100644 (file)
--- a/arch/avr32/include/uapi/asm/posix_types.h
+++ b/arch/avr32/include/uapi/asm/posix_types.h
@@ -5,8 +5,8 @@
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
-#ifndef __ASM_AVR32_POSIX_TYPES_H
-#define __ASM_AVR32_POSIX_TYPES_H
+#ifndef _UAPI__ASM_AVR32_POSIX_TYPES_H
+#define _UAPI__ASM_AVR32_POSIX_TYPES_H
  
  /*
   * This file is generally used by user-level software, so you need to
@@ -34,4 +34,4 @@ typedef unsigned short  __kernel_old_dev_t;
  
  #include <asm-generic/posix_types.h>
  
-#endif /* __ASM_AVR32_POSIX_TYPES_H */
+#endif /* _UAPI__ASM_AVR32_POSIX_TYPES_H */
diff --git a/arch/avr32/include/uapi/asm/resource.h b/arch/avr32/include/uapi/asm/resource.h

deleted file mode 100644 (file)

index c6dd101..0000000
--- a/arch/avr32/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_RESOURCE_H
-#define __ASM_AVR32_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif /* __ASM_AVR32_RESOURCE_H */
diff --git a/arch/avr32/include/uapi/asm/sembuf.h b/arch/avr32/include/uapi/asm/sembuf.h

index e472216e0c9717a2bd577775f9c686251c9688b9..6c6f7cf1e75ac99ce9f849d6a2343904cc2996d5 100644 (file)
--- a/arch/avr32/include/uapi/asm/sembuf.h
+++ b/arch/avr32/include/uapi/asm/sembuf.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SEMBUF_H
-#define __ASM_AVR32_SEMBUF_H
+#ifndef _UAPI__ASM_AVR32_SEMBUF_H
+#define _UAPI__ASM_AVR32_SEMBUF_H
  
  /*
  * The semid64_ds structure for AVR32 architecture.
@@ -22,4 +22,4 @@ struct semid64_ds {
          unsigned long   __unused4;
  };
  
-#endif /* __ASM_AVR32_SEMBUF_H */
+#endif /* _UAPI__ASM_AVR32_SEMBUF_H */
diff --git a/arch/avr32/include/uapi/asm/setup.h b/arch/avr32/include/uapi/asm/setup.h

index e58aa9356fafc080e311ac9a080ac05af49fb9e6..a654df7dba462c68de298a3d7b3741b0a5855cf5 100644 (file)
--- a/arch/avr32/include/uapi/asm/setup.h
+++ b/arch/avr32/include/uapi/asm/setup.h
@@ -13,5 +13,4 @@
  
  #define COMMAND_LINE_SIZE 256
  
-
  #endif /* _UAPI__ASM_AVR32_SETUP_H__ */
diff --git a/arch/avr32/include/uapi/asm/shmbuf.h b/arch/avr32/include/uapi/asm/shmbuf.h

index c62fba41739aff13c647044b8c1017a7c6307614..b94cf8b60b73df0567125f2877aed7dbd7308be9 100644 (file)
--- a/arch/avr32/include/uapi/asm/shmbuf.h
+++ b/arch/avr32/include/uapi/asm/shmbuf.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SHMBUF_H
-#define __ASM_AVR32_SHMBUF_H
+#ifndef _UAPI__ASM_AVR32_SHMBUF_H
+#define _UAPI__ASM_AVR32_SHMBUF_H
  
  /*
   * The shmid64_ds structure for i386 architecture.
@@ -39,4 +39,4 @@ struct shminfo64 {
         unsigned long   __unused4;
  };
  
-#endif /* __ASM_AVR32_SHMBUF_H */
+#endif /* _UAPI__ASM_AVR32_SHMBUF_H */
diff --git a/arch/avr32/include/uapi/asm/sigcontext.h b/arch/avr32/include/uapi/asm/sigcontext.h

index e04062b5f39fe421045ae6b6a4f5bd13920ca38d..27e56bf6377f6c00647b0ae5a0ff4c2cfce20542 100644 (file)
--- a/arch/avr32/include/uapi/asm/sigcontext.h
+++ b/arch/avr32/include/uapi/asm/sigcontext.h
@@ -5,8 +5,8 @@
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
-#ifndef __ASM_AVR32_SIGCONTEXT_H
-#define __ASM_AVR32_SIGCONTEXT_H
+#ifndef _UAPI__ASM_AVR32_SIGCONTEXT_H
+#define _UAPI__ASM_AVR32_SIGCONTEXT_H
  
  struct sigcontext {
         unsigned long   oldmask;
@@ -31,4 +31,4 @@ struct sigcontext {
         unsigned long   r0;
  };
  
-#endif /* __ASM_AVR32_SIGCONTEXT_H */
+#endif /* _UAPI__ASM_AVR32_SIGCONTEXT_H */
diff --git a/arch/avr32/include/uapi/asm/siginfo.h b/arch/avr32/include/uapi/asm/siginfo.h

deleted file mode 100644 (file)

index 5ee93f4..0000000
--- a/arch/avr32/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _AVR32_SIGINFO_H
-#define _AVR32_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif
diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h

index 1b77a93eff500c14ac9594c9f6098007353c8a78..ffe8c770cafd86efe310fe9882210691fcf44c44 100644 (file)
--- a/arch/avr32/include/uapi/asm/signal.h
+++ b/arch/avr32/include/uapi/asm/signal.h
@@ -118,5 +118,4 @@ typedef struct sigaltstack {
         size_t ss_size;
  } stack_t;
  
-
  #endif /* _UAPI__ASM_AVR32_SIGNAL_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h

index 4399364214349674999c872ca4779c7089c22160..cbf902e4cd9e9ef1528e48a24f3d56c3fc2d889d 100644 (file)
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SOCKET_H
-#define __ASM_AVR32_SOCKET_H
+#ifndef _UAPI__ASM_AVR32_SOCKET_H
+#define _UAPI__ASM_AVR32_SOCKET_H
  
  #include <asm/sockios.h>
  
@@ -78,4 +78,4 @@
  
  #define SO_MAX_PACING_RATE     47
  
-#endif /* __ASM_AVR32_SOCKET_H */
+#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/sockios.h b/arch/avr32/include/uapi/asm/sockios.h

index 0802d742f97d79138bbc5b4400f2fe1c145dece5..d04785453532a51fcfa0d3b654e875e18f1bfb30 100644 (file)
--- a/arch/avr32/include/uapi/asm/sockios.h
+++ b/arch/avr32/include/uapi/asm/sockios.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SOCKIOS_H
-#define __ASM_AVR32_SOCKIOS_H
+#ifndef _UAPI__ASM_AVR32_SOCKIOS_H
+#define _UAPI__ASM_AVR32_SOCKIOS_H
  
  /* Socket-level I/O control calls. */
  #define FIOSETOWN      0x8901
@@ -10,4 +10,4 @@
  #define SIOCGSTAMP     0x8906          /* Get stamp (timeval) */
  #define SIOCGSTAMPNS   0x8907          /* Get stamp (timespec) */
  
-#endif /* __ASM_AVR32_SOCKIOS_H */
+#endif /* _UAPI__ASM_AVR32_SOCKIOS_H */
diff --git a/arch/avr32/include/uapi/asm/stat.h b/arch/avr32/include/uapi/asm/stat.h

index e72881e10230506e5adba38057738680234892e1..c06acef7fce7f42d6204d5d97a1c7a42100409a6 100644 (file)
--- a/arch/avr32/include/uapi/asm/stat.h
+++ b/arch/avr32/include/uapi/asm/stat.h
@@ -5,8 +5,8 @@
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
-#ifndef __ASM_AVR32_STAT_H
-#define __ASM_AVR32_STAT_H
+#ifndef _UAPI__ASM_AVR32_STAT_H
+#define _UAPI__ASM_AVR32_STAT_H
  
  struct __old_kernel_stat {
          unsigned short st_dev;
@@ -76,4 +76,4 @@ struct stat64 {
         unsigned long   __unused2;
  };
  
-#endif /* __ASM_AVR32_STAT_H */
+#endif /* _UAPI__ASM_AVR32_STAT_H */
diff --git a/arch/avr32/include/uapi/asm/statfs.h b/arch/avr32/include/uapi/asm/statfs.h

deleted file mode 100644 (file)

index 2961bd1..0000000
--- a/arch/avr32/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_STATFS_H
-#define __ASM_AVR32_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif /* __ASM_AVR32_STATFS_H */
diff --git a/arch/avr32/include/uapi/asm/swab.h b/arch/avr32/include/uapi/asm/swab.h

index 14cc737bbca6e909dbd7ec44d36703731f8ca519..1a03549e7dc5ea9ef3818194cd0a5a319d90eddc 100644 (file)
--- a/arch/avr32/include/uapi/asm/swab.h
+++ b/arch/avr32/include/uapi/asm/swab.h
@@ -1,8 +1,8 @@
  /*
   * AVR32 byteswapping functions.
   */
-#ifndef __ASM_AVR32_SWAB_H
-#define __ASM_AVR32_SWAB_H
+#ifndef _UAPI__ASM_AVR32_SWAB_H
+#define _UAPI__ASM_AVR32_SWAB_H
  
  #include <linux/types.h>
  #include <linux/compiler.h>
@@ -32,4 +32,4 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
  #define __arch_swab32 __arch_swab32
  #endif
  
-#endif /* __ASM_AVR32_SWAB_H */
+#endif /* _UAPI__ASM_AVR32_SWAB_H */
diff --git a/arch/avr32/include/uapi/asm/termbits.h b/arch/avr32/include/uapi/asm/termbits.h

index 366adc5ebb100db9924f584568327db7ab70672c..32789ccb38f8434cfd494dbaed6088eb304156de 100644 (file)
--- a/arch/avr32/include/uapi/asm/termbits.h
+++ b/arch/avr32/include/uapi/asm/termbits.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_TERMBITS_H
-#define __ASM_AVR32_TERMBITS_H
+#ifndef _UAPI__ASM_AVR32_TERMBITS_H
+#define _UAPI__ASM_AVR32_TERMBITS_H
  
  #include <linux/posix_types.h>
  
@@ -193,4 +193,4 @@ struct ktermios {
  #define        TCSADRAIN       1
  #define        TCSAFLUSH       2
  
-#endif /* __ASM_AVR32_TERMBITS_H */
+#endif /* _UAPI__ASM_AVR32_TERMBITS_H */
diff --git a/arch/avr32/include/uapi/asm/termios.h b/arch/avr32/include/uapi/asm/termios.h

index b8ef8ea6335284bac7281f518410b2c568c9a632..c8a0081556c4da3b8dfb4041392fb1bb95591f40 100644 (file)
--- a/arch/avr32/include/uapi/asm/termios.h
+++ b/arch/avr32/include/uapi/asm/termios.h
@@ -46,5 +46,4 @@ struct termio {
  
  /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
  
-
  #endif /* _UAPI__ASM_AVR32_TERMIOS_H */
diff --git a/arch/avr32/include/uapi/asm/types.h b/arch/avr32/include/uapi/asm/types.h

index bb34ad349dfc1a12d35d1e48066baa71f4c4bee2..7c986c4e99b55cf3727ceb68eeb78841c8d99b11 100644 (file)
--- a/arch/avr32/include/uapi/asm/types.h
+++ b/arch/avr32/include/uapi/asm/types.h
@@ -5,4 +5,9 @@
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
+#ifndef _UAPI__ASM_AVR32_TYPES_H
+#define _UAPI__ASM_AVR32_TYPES_H
+
  #include <asm-generic/int-ll64.h>
+
+#endif /* _UAPI__ASM_AVR32_TYPES_H */
diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h

index 3eaa68753adba04cd4382424b4b8eeef9a1b440d..8822bf46ddc683758ea9c6be98d0f15d995c9394 100644 (file)
--- a/arch/avr32/include/uapi/asm/unistd.h
+++ b/arch/avr32/include/uapi/asm/unistd.h
@@ -301,5 +301,4 @@
  #define __NR_eventfd           281
  #define __NR_setns             283
  
-
  #endif /* _UAPI__ASM_AVR32_UNISTD_H */
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S

index 9899d3cc6f03b109f352faa9ac51387f96497cf7..7301f4806bbede59cf5eaa48649802ec75cddacf 100644 (file)
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -401,9 +401,10 @@ handle_critical:
         /* We should never get here... */
  bad_return:
         sub     r12, pc, (. - 1f)
-       bral    panic
+       lddpc   pc, 2f
         .align  2
  1:     .asciz  "Return from critical exception!"
+2:     .long   panic
  
         .align  1
  do_bus_error_write:
diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S

index 6163bd0acb958ae3a016ec125a574764fec48c12..59eae6dfbed2b5f1167864bf6311b0b231da9a10 100644 (file)
--- a/arch/avr32/kernel/head.S
+++ b/arch/avr32/kernel/head.S
@@ -10,33 +10,13 @@
  #include <linux/linkage.h>
  
  #include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/sysreg.h>
  
         .section .init.text,"ax"
         .global kernel_entry
  kernel_entry:
-       /* Initialize status register */
-       lddpc   r0, init_sr
-       mtsr    SYSREG_SR, r0
-
-       /* Set initial stack pointer */
-       lddpc   sp, stack_addr
-       sub     sp, -THREAD_SIZE
-
-#ifdef CONFIG_FRAME_POINTER
-       /* Mark last stack frame */
-       mov     lr, 0
-       mov     r7, 0
-#endif
-
         /* Start the show */
         lddpc   pc, kernel_start_addr
  
         .align  2
-init_sr:
-       .long   0x007f0000      /* Supervisor mode, everything masked */
-stack_addr:
-       .long   init_thread_union
  kernel_start_addr:
         .long   start_kernel
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c

index d43daf192b21d54df034e52c809a4ac4b2178161..4c530a82fc469f976b8a38a341f077a43f9c9998 100644 (file)
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1992,7 +1992,7 @@ sba_connect_bus(struct pci_bus *bus)
         if (PCI_CONTROLLER(bus)->iommu)
                 return;
  
-       handle = PCI_CONTROLLER(bus)->acpi_handle;
+       handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
         if (!handle)
                 return;
  
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h

index 80775f55f03f9293c2d05d2598e349f08ef9b56e..71fbaaa495ccc18af5f33cdaef92c0ab228dad8e 100644 (file)
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,7 +95,7 @@ struct iospace_resource {
  };
  
  struct pci_controller {
-       void *acpi_handle;
+       struct acpi_device *companion;
         void *iommu;
         int segment;
         int node;               /* nearest node with memory or -1 for global allocation */
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c

index 5a9ff1c3c3e912c5d0435ea86900a664e525fa16..cb592773c78b1ef1f86faa4a4190c507c4e9fdbf 100644 (file)
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2166,12 +2166,6 @@ static const struct file_operations pfm_file_ops = {
         .flush          = pfm_flush
  };
  
-static int
-pfmfs_delete_dentry(const struct dentry *dentry)
-{
-       return 1;
-}
-
  static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
  {
         return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
@@ -2179,7 +2173,7 @@ static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
  }
  
  static const struct dentry_operations pfmfs_dentry_operations = {
-       .d_delete = pfmfs_delete_dentry,
+       .d_delete = always_delete_dentry,
         .d_dname = pfmfs_dname,
  };
  
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c

index 2326790b7d8be4f9e6cffbea4c4a22fc1ab91f3e..9e4938d8ca4d297e331131a79d8458d97a27d256 100644 (file)
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -436,9 +436,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
         if (!controller)
                 return NULL;
  
-       controller->acpi_handle = device->handle;
+       controller->companion = device;
  
-       pxm = acpi_get_pxm(controller->acpi_handle);
+       pxm = acpi_get_pxm(device->handle);
  #ifdef CONFIG_NUMA
         if (pxm >= 0)
                 controller->node = pxm_to_node(pxm);
@@ -489,7 +489,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
  {
         struct pci_controller *controller = bridge->bus->sysdata;
  
-       ACPI_HANDLE_SET(&bridge->dev, controller->acpi_handle);
+       ACPI_COMPANION_SET(&bridge->dev, controller->companion);
         return 0;
  }
  
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c

index b1725398b5af49683622765652104a85c3d95228..0640739cc20cf2895fca507babfe7de9096d8a41 100644 (file)
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -132,7 +132,7 @@ sn_get_bussoft_ptr(struct pci_bus *bus)
         struct acpi_resource_vendor_typed *vendor;
  
  
-       handle = PCI_CONTROLLER(bus)->acpi_handle;
+       handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
         status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
                                           &sn_uuid, &buffer);
         if (ACPI_FAILURE(status)) {
@@ -360,7 +360,7 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
         acpi_status status;
         struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
  
-       rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
+       rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion);
          status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
                                         &segment);
          if (ACPI_SUCCESS(status)) {
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h

new file mode 100644 (file)

index 0000000..748016c
--- /dev/null
+++ b/arch/parisc/include/asm/socket.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <uapi/asm/socket.h>
+
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK  0x40000000
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h

index 63f4dd0b49c29c758807b68bbf9b7bf9b71911c3..4006964d8e12646761d954b9f73ff0b503e736b6 100644 (file)
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -4,14 +4,11 @@
  /*
   * User space memory access functions
   */
-#include <asm/processor.h>
  #include <asm/page.h>
  #include <asm/cache.h>
  #include <asm/errno.h>
  #include <asm-generic/uaccess-unaligned.h>
  
-#include <linux/sched.h>
-
  #define VERIFY_READ 0
  #define VERIFY_WRITE 1
  
@@ -36,43 +33,12 @@ extern int __get_user_bad(void);
  extern int __put_kernel_bad(void);
  extern int __put_user_bad(void);
  
-
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- */
-static inline int __range_not_ok(unsigned long addr, unsigned long size,
-                                unsigned long limit)
+static inline long access_ok(int type, const void __user * addr,
+               unsigned long size)
  {
-       unsigned long __newaddr = addr + size;
-       return (__newaddr < addr || __newaddr > limit || size > limit);
+       return 1;
  }
  
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
- *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
- *        to write to a block, it is always safe to read from it.
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only.  This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(type, addr, size)                                    \
-(      __chk_user_ptr(addr),                                           \
-       !__range_not_ok((unsigned long) (__force void *) (addr),        \
-                       size, user_addr_max())                          \
-)
-
  #define put_user __put_user
  #define get_user __get_user
  
@@ -253,11 +219,7 @@ extern long lstrnlen_user(const char __user *,long);
  /*
   * Complex access routines -- macros
   */
-#ifdef CONFIG_COMPAT
-#define user_addr_max() (TASK_SIZE)
-#else
-#define user_addr_max() (DEFAULT_TASK_SIZE)
-#endif
+#define user_addr_max() (~0UL)
  
  #define strnlen_user lstrnlen_user
  #define strlen_user(str) lstrnlen_user(str, 0x7fffffffL)
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h

index 7c614d01f1fa42df36a23b3633f6d6ae7d47aa6a..f33113a6141e7540da2195cc72469152edfbecf2 100644 (file)
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
+#ifndef _UAPI_ASM_SOCKET_H
+#define _UAPI_ASM_SOCKET_H
  
  #include <asm/sockios.h>
  
@@ -77,9 +77,4 @@
  
  #define SO_MAX_PACING_RATE     0x4048
  
-/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
- * have to define SOCK_NONBLOCK to a different value here.
- */
-#define SOCK_NONBLOCK   0x40000000
-
-#endif /* _ASM_SOCKET_H */
+#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c

index b5507ec06b846f09ed4d38c5841b4eecaffb156e..413dc1769299685f00289193ca301ba7e2c0839d 100644 (file)
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -161,7 +161,7 @@ static inline void prefetch_dst(const void *addr)
  /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
   * per loop.  This code is derived from glibc. 
   */
-static inline unsigned long copy_dstaligned(unsigned long dst,
+static noinline unsigned long copy_dstaligned(unsigned long dst,
                                         unsigned long src, unsigned long len)
  {
         /* gcc complains that a2 and a3 may be uninitialized, but actually
@@ -276,7 +276,7 @@ handle_store_error:
  /* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
   * In case of an access fault the faulty address can be read from the per_cpu
   * exception data struct. */
-static unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
+static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
                                         unsigned long len)
  {
         register unsigned long src, dst, t1, t2, t3;
@@ -529,7 +529,7 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
  {
         unsigned long addr = (unsigned long)src;
  
-       if (size < 0 || addr < PAGE_SIZE)
+       if (addr < PAGE_SIZE)
                 return -EFAULT;
  
         /* check for I/O space F_EXTEND(0xfff00000) access as well? */
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c

index 7584a5df0fa4a76f5e2815fac25a38436a587955..9d08c71a967ed2e1e86189272369f4b794b8453c 100644 (file)
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -282,16 +282,34 @@ bad_area:
  #endif
                 switch (code) {
                 case 15:        /* Data TLB miss fault/Data page fault */
+                       /* send SIGSEGV when outside of vma */
+                       if (!vma ||
+                           address < vma->vm_start || address > vma->vm_end) {
+                               si.si_signo = SIGSEGV;
+                               si.si_code = SEGV_MAPERR;
+                               break;
+                       }
+
+                       /* send SIGSEGV for wrong permissions */
+                       if ((vma->vm_flags & acc_type) != acc_type) {
+                               si.si_signo = SIGSEGV;
+                               si.si_code = SEGV_ACCERR;
+                               break;
+                       }
+
+                       /* probably address is outside of mapped file */
+                       /* fall through */
                 case 17:        /* NA data TLB miss / page fault */
                 case 18:        /* Unaligned access - PCXS only */
                         si.si_signo = SIGBUS;
-                       si.si_code = BUS_ADRERR;
+                       si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
                         break;
                 case 16:        /* Non-access instruction TLB miss fault */
                 case 26:        /* PCXL: Data memory access rights trap */
                 default:
                         si.si_signo = SIGSEGV;
-                       si.si_code = SEGV_MAPERR;
+                       si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
+                       break;
                 }
                 si.si_errno = 0;
                 si.si_addr = (void __user *) address;
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile

index 607acf54a425b2b50913ea6b4f48024a5d21aadc..8a2463670a5b8107243f313c7e272339570ce301 100644 (file)
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -111,6 +111,7 @@ endif
  endif
  
  CFLAGS-$(CONFIG_PPC64) := -mtraceback=no -mcall-aixdesc
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
  CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc)
  CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
  CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi

index 4c617bf8cdb24af8f3a202d40c4cc98b776768fd..4f6e48277c46170bb806bbb17e41c2887de4054f 100644 (file)
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -223,13 +223,13 @@
                 reg = <0xe2000 0x1000>;
         };
  
-/include/ "qoriq-dma-0.dtsi"
+/include/ "elo3-dma-0.dtsi"
         dma@100300 {
                 fsl,iommu-parent = <&pamu0>;
                 fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
         };
  
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-1.dtsi"
         dma@101300 {
                 fsl,iommu-parent = <&pamu0>;
                 fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi

new file mode 100644 (file)

index 0000000..3c210e0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x100000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma0: dma@100300 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,elo3-dma";
+       reg = <0x100300 0x4>,
+             <0x100600 0x4>;
+       ranges = <0x0 0x100100 0x500>;
+       dma-channel@0 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x0 0x80>;
+               interrupts = <28 2 0 0>;
+       };
+       dma-channel@80 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x80 0x80>;
+               interrupts = <29 2 0 0>;
+       };
+       dma-channel@100 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x100 0x80>;
+               interrupts = <30 2 0 0>;
+       };
+       dma-channel@180 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x180 0x80>;
+               interrupts = <31 2 0 0>;
+       };
+       dma-channel@300 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x300 0x80>;
+               interrupts = <76 2 0 0>;
+       };
+       dma-channel@380 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x380 0x80>;
+               interrupts = <77 2 0 0>;
+       };
+       dma-channel@400 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x400 0x80>;
+               interrupts = <78 2 0 0>;
+       };
+       dma-channel@480 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x480 0x80>;
+               interrupts = <79 2 0 0>;
+       };
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi

new file mode 100644 (file)

index 0000000..cccf3bb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x101000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma1: dma@101300 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,elo3-dma";
+       reg = <0x101300 0x4>,
+             <0x101600 0x4>;
+       ranges = <0x0 0x101100 0x500>;
+       dma-channel@0 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x0 0x80>;
+               interrupts = <32 2 0 0>;
+       };
+       dma-channel@80 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x80 0x80>;
+               interrupts = <33 2 0 0>;
+       };
+       dma-channel@100 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x100 0x80>;
+               interrupts = <34 2 0 0>;
+       };
+       dma-channel@180 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x180 0x80>;
+               interrupts = <35 2 0 0>;
+       };
+       dma-channel@300 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x300 0x80>;
+               interrupts = <80 2 0 0>;
+       };
+       dma-channel@380 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x380 0x80>;
+               interrupts = <81 2 0 0>;
+       };
+       dma-channel@400 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x400 0x80>;
+               interrupts = <82 2 0 0>;
+       };
+       dma-channel@480 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x480 0x80>;
+               interrupts = <83 2 0 0>;
+       };
+};
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi

index 510afa362de141465b50aada0cf0e7c9ceb83536..4143a9733cd01e0ad62d9d3d98902e7f414dd8f3 100644 (file)
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -387,8 +387,8 @@
                 reg        = <0xea000 0x4000>;
         };
  
-/include/ "qoriq-dma-0.dtsi"
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
  
  /include/ "qoriq-espi-0.dtsi"
         spi@110000 {
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig

new file mode 100644 (file)

index 0000000..62771e0
--- /dev/null
+++ b/arch/powerpc/configs/pseries_le_defconfig
@@ -0,0 +1,352 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_SPLPAR=y
+CONFIG_SCANLOG=m
+CONFIG_PPC_SMLPAR=y
+CONFIG_DTL=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_RTAS_FLASH=m
+CONFIG_IBMEBUS=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_CMA=y
+CONFIG_PPC_64K_PAGES=y
+CONFIG_PPC_SUBPAGE_PROT=y
+CONFIG_SCHED_SMT=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_RPA=m
+CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_BLK_DEV_FD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IBMVSCSI=y
+CONFIG_SCSI_IBMVFC=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_TUN=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VORTEX=y
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=y
+CONFIG_TIGON3=y
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_IBMVETH=y
+CONFIG_EHEA=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=m
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_ICOM=m
+CONFIG_SERIAL_JSM=m
+CONFIG_HVC_CONSOLE=y
+CONFIG_HVC_RTAS=y
+CONFIG_HVCS=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IBM_BSR=m
+CONFIG_GEN_RTC=y
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=1024
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_PLATFORM=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_EHCA=m
+CONFIG_INFINIBAND_CXGB3=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h

index cc0655a702a739236d29168293478153385ecfc6..935b5e7a1436dbbead45f9e578f75a024de0366f 100644 (file)
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -31,6 +31,8 @@
  extern unsigned long randomize_et_dyn(unsigned long base);
  #define ELF_ET_DYN_BASE                (randomize_et_dyn(0x20000000))
  
+#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
+
  /*
   * Our registers are always unsigned longs, whether we're a 32 bit
   * process or 64 bit, on either a 64 bit or 32 bit kernel.
@@ -86,6 +88,8 @@ typedef elf_vrregset_t elf_fpxregset_t;
  #ifdef __powerpc64__
  # define SET_PERSONALITY(ex)                                   \
  do {                                                           \
+       if (((ex).e_flags & 0x3) == 2)                          \
+               set_thread_flag(TIF_ELF2ABI);                   \
         if ((ex).e_ident[EI_CLASS] == ELFCLASS32)               \
                 set_thread_flag(TIF_32BIT);                     \
         else                                                    \
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h

index 0c7f2bfcf1348100fb10c4cd6f74dcee34e42756..d8b600b3f058bda7e0358931b885b8c5d4f30215 100644 (file)
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -403,6 +403,8 @@ static inline unsigned long cmo_get_page_size(void)
  extern long pSeries_enable_reloc_on_exc(void);
  extern long pSeries_disable_reloc_on_exc(void);
  
+extern long pseries_big_endian_exceptions(void);
+
  #else
  
  #define pSeries_enable_reloc_on_exc()  do {} while (0)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h

index a63b045e707ce8a5d34c91e8fa698808a8ee4647..12c32c5f533d924428714301f7482493df00e3c8 100644 (file)
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -287,6 +287,32 @@ static inline long disable_reloc_on_exceptions(void) {
         return plpar_set_mode(0, 3, 0, 0);
  }
  
+/*
+ * Take exceptions in big endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_big_endian_exceptions(void)
+{
+       /* mflags = 0: big endian exceptions */
+       return plpar_set_mode(0, 4, 0, 0);
+}
+
+/*
+ * Take exceptions in little endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_little_endian_exceptions(void)
+{
+       /* mflags = 1: little endian exceptions */
+       return plpar_set_mode(1, 4, 0, 0);
+}
+
  static inline long plapr_set_ciabr(unsigned long ciabr)
  {
         return plpar_set_mode(0, 1, ciabr, 0);
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index 98da78e0c2c0eeda19f1852faa725742aa43af76..084e0807db988e2a24b836df800406739f91a7e9 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -33,6 +33,7 @@ extern int boot_cpuid;
  extern int spinning_secondaries;
  
  extern void cpu_die(void);
+extern int cpu_to_chip_id(int cpu);
  
  #ifdef CONFIG_SMP
  
@@ -112,7 +113,6 @@ static inline struct cpumask *cpu_core_mask(int cpu)
  }
  
  extern int cpu_to_core_id(int cpu);
-extern int cpu_to_chip_id(int cpu);
  
  /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
   *
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h

index 8fd6cf6dcee854c8244010ce12d107abb0fa81d7..9854c564ac525b02b9f81c363a1cd4477cd76d4b 100644 (file)
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -105,6 +105,9 @@ static inline struct thread_info *current_thread_info(void)
  #define TIF_EMULATE_STACK_STORE        16      /* Is an instruction emulation
                                                 for stack store? */
  #define TIF_MEMDIE             17      /* is terminating due to OOM killer */
+#if defined(CONFIG_PPC64)
+#define TIF_ELF2ABI            18      /* function descriptors must die! */
+#endif
  
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
@@ -183,6 +186,12 @@ static inline bool test_thread_local_flags(unsigned int flags)
  #define is_32bit_task()        (1)
  #endif
  
+#if defined(CONFIG_PPC64)
+#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
+#else
+#define is_elf2_task() (0)
+#endif
+
  #endif /* !__ASSEMBLY__ */
  
  #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index 67130206534717b285fb50c77d38e4557f053eae..4bd687d5e7aa3f80e1b33912b5451bd34a3187b1 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -686,6 +686,15 @@ void eeh_save_bars(struct eeh_dev *edev)
  
         for (i = 0; i < 16; i++)
                 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+
+       /*
+        * For PCI bridges including root port, we need enable bus
+        * master explicitly. Otherwise, it can't fetch IODA table
+        * entries correctly. So we cache the bit in advance so that
+        * we can restore it after reset, either PHB range or PE range.
+        */
+       if (edev->mode & EEH_DEV_BRIDGE)
+               edev->config_space[1] |= PCI_COMMAND_MASTER;
  }
  
  /**
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c

index d27c5afc90aecfbe41506814d3a0c3891bdacc4b..72d748b56c86b2b9ae960e49b819dfedef36f61a 100644 (file)
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy)
                 pe = event->pe;
                 if (pe) {
                         eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
-                       pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
-                                pe->phb->global_number, pe->addr);
+                       if (pe->type & EEH_PE_PHB)
+                               pr_info("EEH: Detected error on PHB#%d\n",
+                                        pe->phb->global_number);
+                       else
+                               pr_info("EEH: Detected PCI bus error on "
+                                       "PHB#%d-PE#%x\n",
+                                       pe->phb->global_number, pe->addr);
                         eeh_handle_event(pe);
                         eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
                 } else {
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c

index 75c2d1009985eb8dbd9f3995fc900f9f21227d01..3386d8ab7eb0607b3c9d6f03e68824d4abe4bd88 100644 (file)
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -858,17 +858,21 @@ void show_regs(struct pt_regs * regs)
         printk("MSR: "REG" ", regs->msr);
         printbits(regs->msr, msr_bits);
         printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
-#ifdef CONFIG_PPC64
-       printk("SOFTE: %ld\n", regs->softe);
-#endif
         trap = TRAP(regs);
         if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
-               printk("CFAR: "REG"\n", regs->orig_gpr3);
-       if (trap == 0x300 || trap == 0x600)
+               printk("CFAR: "REG" ", regs->orig_gpr3);
+       if (trap == 0x200 || trap == 0x300 || trap == 0x600)
  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-               printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
+               printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
  #else
-               printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
+               printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+#endif
+#ifdef CONFIG_PPC64
+       printk("SOFTE: %ld ", regs->softe);
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (MSR_TM_ACTIVE(regs->msr))
+               printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
  #endif
  
         for (i = 0;  i < 32;  i++) {
@@ -886,9 +890,6 @@ void show_regs(struct pt_regs * regs)
          */
         printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
         printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch);
  #endif
         show_stack(current, (unsigned long *) regs->gpr[1]);
         if (!user_mode(regs))
@@ -1086,25 +1087,45 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
         regs->msr = MSR_USER;
  #else
         if (!is_32bit_task()) {
-               unsigned long entry, toc;
+               unsigned long entry;
  
-               /* start is a relocated pointer to the function descriptor for
-                * the elf _start routine.  The first entry in the function
-                * descriptor is the entry address of _start and the second
-                * entry is the TOC value we need to use.
-                */
-               __get_user(entry, (unsigned long __user *)start);
-               __get_user(toc, (unsigned long __user *)start+1);
+               if (is_elf2_task()) {
+                       /* Look ma, no function descriptors! */
+                       entry = start;
  
-               /* Check whether the e_entry function descriptor entries
-                * need to be relocated before we can use them.
-                */
-               if (load_addr != 0) {
-                       entry += load_addr;
-                       toc   += load_addr;
+                       /*
+                        * Ulrich says:
+                        *   The latest iteration of the ABI requires that when
+                        *   calling a function (at its global entry point),
+                        *   the caller must ensure r12 holds the entry point
+                        *   address (so that the function can quickly
+                        *   establish addressability).
+                        */
+                       regs->gpr[12] = start;
+                       /* Make sure that's restored on entry to userspace. */
+                       set_thread_flag(TIF_RESTOREALL);
+               } else {
+                       unsigned long toc;
+
+                       /* start is a relocated pointer to the function
+                        * descriptor for the elf _start routine.  The first
+                        * entry in the function descriptor is the entry
+                        * address of _start and the second entry is the TOC
+                        * value we need to use.
+                        */
+                       __get_user(entry, (unsigned long __user *)start);
+                       __get_user(toc, (unsigned long __user *)start+1);
+
+                       /* Check whether the e_entry function descriptor entries
+                        * need to be relocated before we can use them.
+                        */
+                       if (load_addr != 0) {
+                               entry += load_addr;
+                               toc   += load_addr;
+                       }
+                       regs->gpr[2] = toc;
                 }
                 regs->nip = entry;
-               regs->gpr[2] = toc;
                 regs->msr = MSR_USER64;
         } else {
                 regs->nip = start;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c

index f3a47098fb8e90b31e98a7d2808f6e691064e363..fa0ad8aafbccf3950506a96a64f75bc2141f1bd7 100644 (file)
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -777,6 +777,26 @@ int of_get_ibm_chip_id(struct device_node *np)
         return -1;
  }
  
+/**
+ * cpu_to_chip_id - Return the cpus chip-id
+ * @cpu: The logical cpu number.
+ *
+ * Return the value of the ibm,chip-id property corresponding to the given
+ * logical cpu number. If the chip-id can not be found, returns -1.
+ */
+int cpu_to_chip_id(int cpu)
+{
+       struct device_node *np;
+
+       np = of_get_cpu_node(cpu, NULL);
+       if (!np)
+               return -1;
+
+       of_node_put(np);
+       return of_get_ibm_chip_id(np);
+}
+EXPORT_SYMBOL(cpu_to_chip_id);
+
  #ifdef CONFIG_PPC_PSERIES
  /*
   * Fix up the uninitialized fields in a new device node:
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c

index 749778e0a69d97dfded0e719066fb1fb4eebac71..1844298f5ea49ea913111c761fb324398c5ed530 100644 (file)
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -457,7 +457,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
                 if (copy_vsx_to_user(&frame->mc_vsregs, current))
                         return 1;
                 msr |= MSR_VSX;
-       }
+       } else if (!ctx_has_vsx_region)
+               /*
+                * With a small context structure we can't hold the VSX
+                * registers, hence clear the MSR value to indicate the state
+                * was not saved.
+                */
+               msr &= ~MSR_VSX;
+
+
  #endif /* CONFIG_VSX */
  #ifdef CONFIG_SPE
         /* save spe registers */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c

index b3c615764c9b97bcb510d017bd9c8ff33e6d69ec..e66f67b8b9e67c1ca4bddbfaa6fb9e32e86c24a9 100644 (file)
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -701,12 +701,6 @@ badframe:
  int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
                 sigset_t *set, struct pt_regs *regs)
  {
-       /* Handler is *really* a pointer to the function descriptor for
-        * the signal routine.  The first entry in the function
-        * descriptor is the entry address of signal and the second
-        * entry is the TOC value we need to use.
-        */
-       func_descr_t __user *funct_desc_ptr;
         struct rt_sigframe __user *frame;
         unsigned long newsp = 0;
         long err = 0;
@@ -766,19 +760,32 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
                         goto badframe;
                 regs->link = (unsigned long) &frame->tramp[0];
         }
-       funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
  
         /* Allocate a dummy caller frame for the signal handler. */
         newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
         err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
  
         /* Set up "regs" so we "return" to the signal handler. */
-       err |= get_user(regs->nip, &funct_desc_ptr->entry);
+       if (is_elf2_task()) {
+               regs->nip = (unsigned long) ka->sa.sa_handler;
+               regs->gpr[12] = regs->nip;
+       } else {
+               /* Handler is *really* a pointer to the function descriptor for
+                * the signal routine.  The first entry in the function
+                * descriptor is the entry address of signal and the second
+                * entry is the TOC value we need to use.
+                */
+               func_descr_t __user *funct_desc_ptr =
+                       (func_descr_t __user *) ka->sa.sa_handler;
+
+               err |= get_user(regs->nip, &funct_desc_ptr->entry);
+               err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+       }
+
         /* enter the signal handler in native-endian mode */
         regs->msr &= ~MSR_LE;
         regs->msr |= (MSR_KERNEL & MSR_LE);
         regs->gpr[1] = newsp;
-       err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
         regs->gpr[3] = signr;
         regs->result = 0;
         if (ka->sa.sa_flags & SA_SIGINFO) {
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index 930cd8af35035441031e1abecbdde3472ab48808..a3b64f3bf9a298b057cfdc57b008ea01eebecf63 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -597,22 +597,6 @@ out:
         return id;
  }
  
-/* Return the value of the chip-id property corresponding
- * to the given logical cpu.
- */
-int cpu_to_chip_id(int cpu)
-{
-       struct device_node *np;
-
-       np = of_get_cpu_node(cpu, NULL);
-       if (!np)
-               return -1;
-
-       of_node_put(np);
-       return of_get_ibm_chip_id(np);
-}
-EXPORT_SYMBOL(cpu_to_chip_id);
-
  /* Helper routines for cpu to core mapping */
  int cpu_core_index_of_thread(int cpu)
  {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c

index 192b051df97e27e6a8a74b344151489af6f544d6..b3b144121cc99d64df0c409b0773259770ddf0af 100644 (file)
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -213,8 +213,6 @@ static u64 scan_dispatch_log(u64 stop_tb)
         if (i == be64_to_cpu(vpa->dtl_idx))
                 return 0;
         while (i < be64_to_cpu(vpa->dtl_idx)) {
-               if (dtl_consumer)
-                       dtl_consumer(dtl, i);
                 dtb = be64_to_cpu(dtl->timebase);
                 tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
                         be32_to_cpu(dtl->ready_to_enqueue_time);
@@ -227,6 +225,8 @@ static u64 scan_dispatch_log(u64 stop_tb)
                 }
                 if (dtb > stop_tb)
                         break;
+               if (dtl_consumer)
+                       dtl_consumer(dtl, i);
                 stolen += tb_delta;
                 ++i;
                 ++dtl;
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S

index 45ea281e9a21d479a36e47fbd94a3a0c65075bee..542c6f422e4d4a6288ef1cf18ac877c330ca4339 100644 (file)
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -142,6 +142,13 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)
  /* Size of CR reg in DWARF unwind info. */
  #define CRSIZE 4
  
+/* Offset of CR reg within a full word. */
+#ifdef __LITTLE_ENDIAN__
+#define CROFF 0
+#else
+#define CROFF (RSIZE - CRSIZE)
+#endif
+
  /* This is the offset of the VMX reg pointer.  */
  #define VREGS  48*RSIZE+33*8
  
@@ -181,7 +188,14 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)
    rsave (31, 31*RSIZE);                                                        \
    rsave (67, 32*RSIZE);                /* ap, used as temp for nip */          \
    rsave (65, 36*RSIZE);                /* lr */                                \
-  rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */
+  rsave (68, 38*RSIZE + CROFF);        /* cr fields */                         \
+  rsave (69, 38*RSIZE + CROFF);                                                \
+  rsave (70, 38*RSIZE + CROFF);                                                \
+  rsave (71, 38*RSIZE + CROFF);                                                \
+  rsave (72, 38*RSIZE + CROFF);                                                \
+  rsave (73, 38*RSIZE + CROFF);                                                \
+  rsave (74, 38*RSIZE + CROFF);                                                \
+  rsave (75, 38*RSIZE + CROFF)
  
  /* Describe where the FP regs are saved.  */
  #define EH_FRAME_FP \
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c

index e7d0c88f621aa08425cb502b9e68b4cbf549ccd3..76a64821f4a23653b64cc6ba0e32daec509ee5b5 100644 (file)
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1419,7 +1419,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
  
                 /* needed to ensure proper operation of coherent allocations
                  * later, in case driver doesn't set it explicitly */
-               dma_set_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
+               dma_coerce_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
         }
  
         /* register with generic device framework */
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c

index 6936547018b89e21bbe63b2371c9607917fd66dd..c5f734e20b0fc3a885a81a82f41a0ac5cd93f94a 100644 (file)
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -123,6 +123,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
         struct mm_struct *mm = current->mm;
         unsigned long addr, len, end;
         unsigned long next;
+       unsigned long flags;
         pgd_t *pgdp;
         int nr = 0;
  
@@ -156,7 +157,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
          * So long as we atomically load page table pointers versus teardown,
          * we can follow the address down to the the page and take a ref on it.
          */
-       local_irq_disable();
+       local_irq_save(flags);
  
         pgdp = pgd_offset(mm, addr);
         do {
@@ -179,7 +180,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
                         break;
         } while (pgdp++, addr = next, addr != end);
  
-       local_irq_enable();
+       local_irq_restore(flags);
  
         return nr;
  }
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c

index 3e99c149271aa0d4f454c95a9d23eaffa7debec6..7ce9cf3b698835c0dd2b2644d137ff7549c68e72 100644 (file)
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -258,7 +258,7 @@ static bool slice_scan_available(unsigned long addr,
                 slice = GET_HIGH_SLICE_INDEX(addr);
                 *boundary_addr = (slice + end) ?
                         ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-               return !!(available.high_slices & (1u << slice));
+               return !!(available.high_slices & (1ul << slice));
         }
  }
  
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype

index c2a566fb8bb89cc816b8841fe47fcdf35b222836..132f8726a257c4ed7608534a8f61496d89b69d80 100644 (file)
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -403,3 +403,14 @@ config PPC_DOORBELL
         default n
  
  endmenu
+
+config CPU_LITTLE_ENDIAN
+       bool "Build little endian kernel"
+       default n
+       help
+         This option selects whether a big endian or little endian kernel will
+         be built.
+
+         Note that if cross compiling a little endian kernel,
+         CROSS_COMPILE must point to a toolchain capable of targeting
+         little endian powerpc.
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c

index 8844628915dc4770b8d9ce35f4a03ab9dee5e94c..1cb160dc1609a5be5dbfd17ff0bfcbeaf362829b 100644 (file)
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -19,6 +19,7 @@
  #include <asm/io.h>
  #include <asm/prom.h>
  #include <asm/machdep.h>
+#include <asm/smp.h>
  
  
  struct powernv_rng {
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c

index 7fbc25b1813fe59ea80eceba65cb2a76027c0eec..ccb633e077b16d6c7a57bb6cfd4372ca87ceeb6a 100644 (file)
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -189,8 +189,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
         struct eeh_dev *edev;
         struct eeh_pe pe;
         struct pci_dn *pdn = PCI_DN(dn);
-       const u32 *class_code, *vendor_id, *device_id;
-       const u32 *regs;
+       const __be32 *classp, *vendorp, *devicep;
+       u32 class_code;
+       const __be32 *regs;
         u32 pcie_flags;
         int enable = 0;
         int ret;
@@ -201,22 +202,24 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
                 return NULL;
  
         /* Retrieve class/vendor/device IDs */
-       class_code = of_get_property(dn, "class-code", NULL);
-       vendor_id  = of_get_property(dn, "vendor-id", NULL);
-       device_id  = of_get_property(dn, "device-id", NULL);
+       classp = of_get_property(dn, "class-code", NULL);
+       vendorp = of_get_property(dn, "vendor-id", NULL);
+       devicep = of_get_property(dn, "device-id", NULL);
  
         /* Skip for bad OF node or PCI-ISA bridge */
-       if (!class_code || !vendor_id || !device_id)
+       if (!classp || !vendorp || !devicep)
                 return NULL;
         if (dn->type && !strcmp(dn->type, "isa"))
                 return NULL;
  
+       class_code = of_read_number(classp, 1);
+
         /*
          * Update class code and mode of eeh device. We need
          * correctly reflects that current device is root port
          * or PCIe switch downstream port.
          */
-       edev->class_code = *class_code;
+       edev->class_code = class_code;
         edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
         edev->mode &= 0xFFFFFF00;
         if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
@@ -243,12 +246,12 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
         /* Initialize the fake PE */
         memset(&pe, 0, sizeof(struct eeh_pe));
         pe.phb = edev->phb;
-       pe.config_addr = regs[0];
+       pe.config_addr = of_read_number(regs, 1);
  
         /* Enable EEH on the device */
         ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
         if (!ret) {
-               edev->config_addr = regs[0];
+               edev->config_addr = of_read_number(regs, 1);
                 /* Retrieve PE address */
                 edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
                 pe.addr = edev->pe_config_addr;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c

index 356bc75ca74f6f0bf8f08dd6eca37b5c538dbfc3..4fca3def9db951896864d0b716d7dea8a2364ba2 100644 (file)
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -245,6 +245,23 @@ static void pSeries_lpar_hptab_clear(void)
                                         &(ptes[j].pteh), &(ptes[j].ptel));
                 }
         }
+
+#ifdef __LITTLE_ENDIAN__
+       /* Reset exceptions to big endian */
+       if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+               long rc;
+
+               rc = pseries_big_endian_exceptions();
+               /*
+                * At this point it is unlikely panic() will get anything
+                * out to the user, but at least this will stop us from
+                * continuing on further and creating an even more
+                * difficult to debug situation.
+                */
+               if (rc)
+                       panic("Could not enable big endian exceptions");
+       }
+#endif
  }
  
  /*
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c

index a702f1c0824292286bc008300886955844ffc380..72a102758d4e5f54e8540b5e78e6417f1a23da3d 100644 (file)
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -13,6 +13,7 @@
  #include <linux/of.h>
  #include <asm/archrandom.h>
  #include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
  
  
  static int pseries_get_random_long(unsigned long *v)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c

index 1f97e2b87a62b85d30bf848a8dbab31938304f21..c1f1908587011d6d131dd56bdf2e1cb283c41128 100644 (file)
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -442,6 +442,32 @@ static void pSeries_machine_kexec(struct kimage *image)
  }
  #endif
  
+#ifdef __LITTLE_ENDIAN__
+long pseries_big_endian_exceptions(void)
+{
+       long rc;
+
+       while (1) {
+               rc = enable_big_endian_exceptions();
+               if (!H_IS_LONG_BUSY(rc))
+                       return rc;
+               mdelay(get_longbusy_msecs(rc));
+       }
+}
+
+static long pseries_little_endian_exceptions(void)
+{
+       long rc;
+
+       while (1) {
+               rc = enable_little_endian_exceptions();
+               if (!H_IS_LONG_BUSY(rc))
+                       return rc;
+               mdelay(get_longbusy_msecs(rc));
+       }
+}
+#endif
+
  static void __init pSeries_setup_arch(void)
  {
         panic_timeout = 10;
@@ -698,6 +724,22 @@ static int __init pSeries_probe(void)
         /* Now try to figure out if we are running on LPAR */
         of_scan_flat_dt(pseries_probe_fw_features, NULL);
  
+#ifdef __LITTLE_ENDIAN__
+       if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+               long rc;
+               /*
+                * Tell the hypervisor that we want our exceptions to
+                * be taken in little endian mode. If this fails we don't
+                * want to use BUG() because it will trigger an exception.
+                */
+               rc = pseries_little_endian_exceptions();
+               if (rc) {
+                       ppc_md.progress("H_SET_MODE LE exception fail", 0);
+                       panic("Could not enable little endian exceptions");
+               }
+       }
+#endif
+
         if (firmware_has_feature(FW_FEATURE_LPAR))
                 hpte_init_lpar();
         else
diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c

index 8ef53bc2e70e2e442962820382f4664b62aa7a34..aaa46b353715e94fd7eabb71f6227f3971524ba3 100644 (file)
--- a/arch/powerpc/platforms/wsp/chroma.c
+++ b/arch/powerpc/platforms/wsp/chroma.c
@@ -15,6 +15,7 @@
  #include <linux/of.h>
  #include <linux/smp.h>
  #include <linux/time.h>
+#include <linux/of_fdt.h>
  
  #include <asm/machdep.h>
  #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c

index d18e6cc19df376eab16f7dcbcb09516f874ac289..a3c87f3957502bfe8713af0bf796b6b4ddaa417c 100644 (file)
--- a/arch/powerpc/platforms/wsp/h8.c
+++ b/arch/powerpc/platforms/wsp/h8.c
@@ -10,6 +10,7 @@
  #include <linux/kernel.h>
  #include <linux/of.h>
  #include <linux/io.h>
+#include <linux/of_address.h>
  
  #include "wsp.h"
  
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c

index 2d3b1dd9571da71aec4b11ab97ef5a0cc106be4f..9cd92e645028ecc8041ab856a5cf35621a2929c7 100644 (file)
--- a/arch/powerpc/platforms/wsp/ics.c
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -18,6 +18,8 @@
  #include <linux/smp.h>
  #include <linux/spinlock.h>
  #include <linux/types.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
  
  #include <asm/io.h>
  #include <asm/irq.h>
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c

index cb565bf93650ef7e49f1f57ae162c996b062b109..3f672980793817c72c8bad6f203ec65ed0472c4d 100644 (file)
--- a/arch/powerpc/platforms/wsp/opb_pic.c
+++ b/arch/powerpc/platforms/wsp/opb_pic.c
@@ -15,6 +15,8 @@
  #include <linux/of.h>
  #include <linux/slab.h>
  #include <linux/time.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
  
  #include <asm/reg_a2.h>
  #include <asm/irq.h>
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c

index 508ec8282b96f7e2d67641619d48d43f3917f808..a87b414c766af3f986b3575a37183e8ddb632255 100644 (file)
--- a/arch/powerpc/platforms/wsp/psr2.c
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -15,6 +15,7 @@
  #include <linux/of.h>
  #include <linux/smp.h>
  #include <linux/time.h>
+#include <linux/of_fdt.h>
  
  #include <asm/machdep.h>
  #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c

index 8928507affead7aa3f9f128232af8d4822188331..6538b4de34fccdab9fbda2cbe5a93ebf161c1277 100644 (file)
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -14,6 +14,7 @@
  #include <linux/of.h>
  #include <linux/spinlock.h>
  #include <linux/types.h>
+#include <linux/of_address.h>
  
  #include <asm/cputhreads.h>
  #include <asm/reg_a2.h>
diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c

index ddb6efe889144dd78e15e80150e7b8a75bbb2d89..58cd1f00e1efa8a02754c4855ac21b24a316a37e 100644 (file)
--- a/arch/powerpc/platforms/wsp/wsp.c
+++ b/arch/powerpc/platforms/wsp/wsp.c
@@ -13,6 +13,7 @@
  #include <linux/smp.h>
  #include <linux/delay.h>
  #include <linux/time.h>
+#include <linux/of_address.h>
  
  #include <asm/scom.h>
  
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h

index 7d7443283a9d5b2dc19796a7d18e9bebd66e2164..947b5c417e830ae0ead192964ef07f8a68d88851 100644 (file)
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -15,7 +15,7 @@ struct pci_sysdata {
         int             domain;         /* PCI domain */
         int             node;           /* NUMA node */
  #ifdef CONFIG_ACPI
-       void            *acpi;          /* ACPI-specific data */
+       struct acpi_device *companion;  /* ACPI companion device */
  #endif
  #ifdef CONFIG_X86_64
         void            *iommu;         /* IOMMU private data */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h

index b93e09a0fa21c34ee20a5cf3ddd8319efa07f40c..37813b5ddc37472dba6c64b8ff3f2508dc085de0 100644 (file)
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -147,6 +147,8 @@
  #define MSR_PP1_ENERGY_STATUS          0x00000641
  #define MSR_PP1_POLICY                 0x00000642
  
+#define MSR_CORE_C1_RES                        0x00000660
+
  #define MSR_AMD64_MC0_MASK             0xc0010044
  
  #define MSR_IA32_MCx_CTL(x)            (MSR_IA32_MC0_CTL + 4*(x))
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c

index daff69e21150d054a109a889630f730702088b76..1185fe7a7f47b053ba3c0fcb1b079d1614581e57 100644 (file)
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -296,4 +296,4 @@ static struct kernel_param_ops audit_param_ops = {
         .get = param_get_bool,
  };
  
-module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
+arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c

index a7cccb6d7fec680e184a57c197106a7d475467b1..c96314abd144ca91cfcccaba72352ad0fdc6cf5b 100644 (file)
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -61,6 +61,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
  #if PAGETABLE_LEVELS > 2
  void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
  {
+       struct page *page = virt_to_page(pmd);
         paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
         /*
          * NOTE! For PAE, any changes to the top page-directory-pointer-table
@@ -69,7 +70,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
  #ifdef CONFIG_X86_PAE
         tlb->need_flush_all = 1;
  #endif
-       tlb_remove_page(tlb, virt_to_page(pmd));
+       pgtable_pmd_page_dtor(page);
+       tlb_remove_page(tlb, page);
  }
  
  #if PAGETABLE_LEVELS > 3
@@ -209,7 +211,7 @@ static int preallocate_pmds(pmd_t *pmds[])
                 if (!pmd)
                         failed = true;
                 if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
-                       free_page((unsigned long)pmds[i]);
+                       free_page((unsigned long)pmd);
                         pmd = NULL;
                         failed = true;
                 }
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c

index 7fb24e53d4c8b88b1fd374a327e0535269f7e328..4f25ec0775526f45133d9087bc13fd4f76dfb24e 100644 (file)
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -518,7 +518,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
         sd = &info->sd;
         sd->domain = domain;
         sd->node = node;
-       sd->acpi = device->handle;
+       sd->companion = device;
         /*
          * Maybe the desired pci bus has been already scanned. In such case
          * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -589,7 +589,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
  {
         struct pci_sysdata *sd = bridge->bus->sysdata;
  
-       ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
+       ACPI_COMPANION_SET(&bridge->dev, sd->companion);
         return 0;
  }
  
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 862f458d4760340935017f61f27c59feab476119..cdc629cf075b74f27f7801565ec3d90f3e299ce0 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -171,9 +171,12 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  }
  EXPORT_SYMBOL(blk_mq_can_queue);
  
-static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq,
-                              unsigned int rw_flags)
+static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+                              struct request *rq, unsigned int rw_flags)
  {
+       if (blk_queue_io_stat(q))
+               rw_flags |= REQ_IO_STAT;
+
         rq->mq_ctx = ctx;
         rq->cmd_flags = rw_flags;
         ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
@@ -197,7 +200,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
  
                 rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
                 if (rq) {
-                       blk_mq_rq_ctx_init(ctx, rq, rw);
+                       blk_mq_rq_ctx_init(q, ctx, rq, rw);
                         break;
                 } else if (!(gfp & __GFP_WAIT))
                         break;
@@ -718,6 +721,8 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
  {
         struct blk_mq_ctx *ctx = rq->mq_ctx;
  
+       trace_block_rq_insert(hctx->queue, rq);
+
         list_add_tail(&rq->queuelist, &ctx->rq_list);
         blk_mq_hctx_mark_pending(hctx, ctx);
  
@@ -921,7 +926,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
         trace_block_getrq(q, bio, rw);
         rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
         if (likely(rq))
-               blk_mq_rq_ctx_init(ctx, rq, rw);
+               blk_mq_rq_ctx_init(q, ctx, rq, rw);
         else {
                 blk_mq_put_ctx(ctx);
                 trace_block_sleeprq(q, bio, rw);
@@ -1377,6 +1382,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
         q->queue_hw_ctx = hctxs;
  
         q->mq_ops = reg->ops;
+       q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
  
         blk_queue_make_request(q, blk_mq_make_request);
         blk_queue_rq_timed_out(q, reg->ops->timeout);
diff --git a/block/partitions/efi.c b/block/partitions/efi.c

index a8287b49d0621d1778295ad0516c8ccbf22ed0fa..dc51f467a560558ab4812d339fe2bd24f83a00b2 100644 (file)
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -96,6 +96,7 @@
   * - Code works, detects all the partitions.
   *
   ************************************************************/
+#include <linux/kernel.h>
  #include <linux/crc32.h>
  #include <linux/ctype.h>
  #include <linux/math64.h>
@@ -715,8 +716,8 @@ int efi_partition(struct parsed_partitions *state)
                 efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
  
                 /* Naively convert UTF16-LE to 7 bits. */
-               label_max = min(sizeof(info->volname) - 1,
-                               sizeof(ptes[i].partition_name));
+               label_max = min(ARRAY_SIZE(info->volname) - 1,
+                               ARRAY_SIZE(ptes[i].partition_name));
                 info->volname[label_max] = 0;
                 while (label_count < label_max) {
                         u8 c = ptes[i].partition_name[label_count] & 0xff;
diff --git a/crypto/Kconfig b/crypto/Kconfig

index 71f337aefa3905feaca892b7ac3b49b4bcb411e3..4ae5734fb4733bb8e264565867fa6d73d7b11f6b 100644 (file)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1402,6 +1402,9 @@ config CRYPTO_USER_API_SKCIPHER
           This option enables the user-spaces interface for symmetric
           key cipher algorithms.
  
+config CRYPTO_HASH_INFO
+       bool
+
  source "drivers/crypto/Kconfig"
  source crypto/asymmetric_keys/Kconfig
  
diff --git a/crypto/Makefile b/crypto/Makefile

index 80019ba8da3a2113ce8a48bf924bba9ca7d96e50..b3a7e807e08bca306619a3e7250afbc9160fecbe 100644 (file)
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
  obj-$(CONFIG_XOR_BLOCKS) += xor.o
  obj-$(CONFIG_ASYNC_CORE) += async_tx/
  obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
+obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig

index 6d2c2ea12559c57624b687a06e0b6f3f3ae4291a..03a6eb95ab500bd37fe2b2e4000a73b1c01a99b0 100644 (file)
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -12,6 +12,8 @@ if ASYMMETRIC_KEY_TYPE
  config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
         tristate "Asymmetric public-key crypto algorithm subtype"
         select MPILIB
+       select PUBLIC_KEY_ALGO_RSA
+       select CRYPTO_HASH_INFO
         help
           This option provides support for asymmetric public key type handling.
           If signature generation and/or verification are to be used,
@@ -20,8 +22,8 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
  
  config PUBLIC_KEY_ALGO_RSA
         tristate "RSA public-key algorithm"
-       depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
         select MPILIB_EXTRA
+       select MPILIB
         help
           This option enables support for the RSA algorithm (PKCS#1, RFC3447).
  
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c

index cf807654d221c80fced3280ae23bc666a2089348..b77eb53047882ad26de8de4892e8568a4b792326 100644 (file)
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -209,6 +209,7 @@ struct key_type key_type_asymmetric = {
         .match          = asymmetric_key_match,
         .destroy        = asymmetric_key_destroy,
         .describe       = asymmetric_key_describe,
+       .def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
  };
  EXPORT_SYMBOL_GPL(key_type_asymmetric);
  
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c

index cb2e29180a87286321f593649365c2cb2fc95247..97eb001960b97774e89643f65711d2a68e6a3a8f 100644 (file)
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -22,29 +22,25 @@
  
  MODULE_LICENSE("GPL");
  
-const char *const pkey_algo[PKEY_ALGO__LAST] = {
+const char *const pkey_algo_name[PKEY_ALGO__LAST] = {
         [PKEY_ALGO_DSA]         = "DSA",
         [PKEY_ALGO_RSA]         = "RSA",
  };
-EXPORT_SYMBOL_GPL(pkey_algo);
+EXPORT_SYMBOL_GPL(pkey_algo_name);
  
-const char *const pkey_hash_algo[PKEY_HASH__LAST] = {
-       [PKEY_HASH_MD4]         = "md4",
-       [PKEY_HASH_MD5]         = "md5",
-       [PKEY_HASH_SHA1]        = "sha1",
-       [PKEY_HASH_RIPE_MD_160] = "rmd160",
-       [PKEY_HASH_SHA256]      = "sha256",
-       [PKEY_HASH_SHA384]      = "sha384",
-       [PKEY_HASH_SHA512]      = "sha512",
-       [PKEY_HASH_SHA224]      = "sha224",
+const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST] = {
+#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
+       defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
+       [PKEY_ALGO_RSA]         = &RSA_public_key_algorithm,
+#endif
  };
-EXPORT_SYMBOL_GPL(pkey_hash_algo);
+EXPORT_SYMBOL_GPL(pkey_algo);
  
-const char *const pkey_id_type[PKEY_ID_TYPE__LAST] = {
+const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = {
         [PKEY_ID_PGP]           = "PGP",
         [PKEY_ID_X509]          = "X509",
  };
-EXPORT_SYMBOL_GPL(pkey_id_type);
+EXPORT_SYMBOL_GPL(pkey_id_type_name);
  
  /*
   * Provide a part of a description of the key for /proc/keys.
@@ -56,7 +52,7 @@ static void public_key_describe(const struct key *asymmetric_key,
  
         if (key)
                 seq_printf(m, "%s.%s",
-                          pkey_id_type[key->id_type], key->algo->name);
+                          pkey_id_type_name[key->id_type], key->algo->name);
  }
  
  /*
@@ -78,21 +74,45 @@ EXPORT_SYMBOL_GPL(public_key_destroy);
  /*
   * Verify a signature using a public key.
   */
-static int public_key_verify_signature(const struct key *key,
-                                      const struct public_key_signature *sig)
+int public_key_verify_signature(const struct public_key *pk,
+                               const struct public_key_signature *sig)
  {
-       const struct public_key *pk = key->payload.data;
+       const struct public_key_algorithm *algo;
+
+       BUG_ON(!pk);
+       BUG_ON(!pk->mpi[0]);
+       BUG_ON(!pk->mpi[1]);
+       BUG_ON(!sig);
+       BUG_ON(!sig->digest);
+       BUG_ON(!sig->mpi[0]);
+
+       algo = pk->algo;
+       if (!algo) {
+               if (pk->pkey_algo >= PKEY_ALGO__LAST)
+                       return -ENOPKG;
+               algo = pkey_algo[pk->pkey_algo];
+               if (!algo)
+                       return -ENOPKG;
+       }
  
-       if (!pk->algo->verify_signature)
+       if (!algo->verify_signature)
                 return -ENOTSUPP;
  
-       if (sig->nr_mpi != pk->algo->n_sig_mpi) {
+       if (sig->nr_mpi != algo->n_sig_mpi) {
                 pr_debug("Signature has %u MPI not %u\n",
-                        sig->nr_mpi, pk->algo->n_sig_mpi);
+                        sig->nr_mpi, algo->n_sig_mpi);
                 return -EINVAL;
         }
  
-       return pk->algo->verify_signature(pk, sig);
+       return algo->verify_signature(pk, sig);
+}
+EXPORT_SYMBOL_GPL(public_key_verify_signature);
+
+static int public_key_verify_signature_2(const struct key *key,
+                                        const struct public_key_signature *sig)
+{
+       const struct public_key *pk = key->payload.data;
+       return public_key_verify_signature(pk, sig);
  }
  
  /*
@@ -103,6 +123,6 @@ struct asymmetric_key_subtype public_key_subtype = {
         .name                   = "public_key",
         .describe               = public_key_describe,
         .destroy                = public_key_destroy,
-       .verify_signature       = public_key_verify_signature,
+       .verify_signature       = public_key_verify_signature_2,
  };
  EXPORT_SYMBOL_GPL(public_key_subtype);
diff --git a/crypto/asymmetric_keys/public_key.h b/crypto/asymmetric_keys/public_key.h

index 5e5e35626899e845bb673530854f9aa127336fd6..5c37a22a0637acdc7abf8f07c610c2f72dfd59bb 100644 (file)
--- a/crypto/asymmetric_keys/public_key.h
+++ b/crypto/asymmetric_keys/public_key.h
@@ -28,3 +28,9 @@ struct public_key_algorithm {
  };
  
  extern const struct public_key_algorithm RSA_public_key_algorithm;
+
+/*
+ * public_key.c
+ */
+extern int public_key_verify_signature(const struct public_key *pk,
+                                      const struct public_key_signature *sig);
diff --git a/crypto/asymmetric_keys/rsa.c b/crypto/asymmetric_keys/rsa.c

index 4a6a0696f8a3b165618fe7c62516a7446e354c24..90a17f59ba2800d197366dbcb0835a9adc598320 100644 (file)
--- a/crypto/asymmetric_keys/rsa.c
+++ b/crypto/asymmetric_keys/rsa.c
@@ -73,13 +73,13 @@ static const struct {
         size_t size;
  } RSA_ASN1_templates[PKEY_HASH__LAST] = {
  #define _(X) { RSA_digest_info_##X, sizeof(RSA_digest_info_##X) }
-       [PKEY_HASH_MD5]         = _(MD5),
-       [PKEY_HASH_SHA1]        = _(SHA1),
-       [PKEY_HASH_RIPE_MD_160] = _(RIPE_MD_160),
-       [PKEY_HASH_SHA256]      = _(SHA256),
-       [PKEY_HASH_SHA384]      = _(SHA384),
-       [PKEY_HASH_SHA512]      = _(SHA512),
-       [PKEY_HASH_SHA224]      = _(SHA224),
+       [HASH_ALGO_MD5]         = _(MD5),
+       [HASH_ALGO_SHA1]        = _(SHA1),
+       [HASH_ALGO_RIPE_MD_160] = _(RIPE_MD_160),
+       [HASH_ALGO_SHA256]      = _(SHA256),
+       [HASH_ALGO_SHA384]      = _(SHA384),
+       [HASH_ALGO_SHA512]      = _(SHA512),
+       [HASH_ALGO_SHA224]      = _(SHA224),
  #undef _
  };
  
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c

index facbf26bc6bbbc91eb879b0a5cdf40d01e8f2b05..29893162497ca352101b4d2126a0d03590e50ed1 100644 (file)
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -47,6 +47,8 @@ void x509_free_certificate(struct x509_certificate *cert)
                 kfree(cert->subject);
                 kfree(cert->fingerprint);
                 kfree(cert->authority);
+               kfree(cert->sig.digest);
+               mpi_free(cert->sig.rsa.s);
                 kfree(cert);
         }
  }
@@ -152,33 +154,33 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
                 return -ENOPKG; /* Unsupported combination */
  
         case OID_md4WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_MD5;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_MD5;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                 break;
  
         case OID_sha1WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA1;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA1;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                 break;
  
         case OID_sha256WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA256;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA256;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                 break;
  
         case OID_sha384WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA384;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA384;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                 break;
  
         case OID_sha512WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA512;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA512;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                 break;
  
         case OID_sha224WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA224;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA224;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                 break;
         }
  
@@ -203,8 +205,8 @@ int x509_note_signature(void *context, size_t hdrlen,
                 return -EINVAL;
         }
  
-       ctx->cert->sig = value;
-       ctx->cert->sig_size = vlen;
+       ctx->cert->raw_sig = value;
+       ctx->cert->raw_sig_size = vlen;
         return 0;
  }
  
@@ -343,8 +345,9 @@ int x509_extract_key_data(void *context, size_t hdrlen,
         if (ctx->last_oid != OID_rsaEncryption)
                 return -ENOPKG;
  
-       /* There seems to be an extraneous 0 byte on the front of the data */
-       ctx->cert->pkey_algo = PKEY_ALGO_RSA;
+       ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA;
+
+       /* Discard the BIT STRING metadata */
         ctx->key = value + 1;
         ctx->key_size = vlen - 1;
         return 0;
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h

index f86dc5fcc4ad46accfb003b464c784fb5763218b..87d9cc26f630625d7c57e3309456de2a356a46b5 100644 (file)
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -9,6 +9,7 @@
   * 2 of the Licence, or (at your option) any later version.
   */
  
+#include <linux/time.h>
  #include <crypto/public_key.h>
  
  struct x509_certificate {
@@ -20,13 +21,11 @@ struct x509_certificate {
         char            *authority;             /* Authority key fingerprint as hex */
         struct tm       valid_from;
         struct tm       valid_to;
-       enum pkey_algo  pkey_algo : 8;          /* Public key algorithm */
-       enum pkey_algo  sig_pkey_algo : 8;      /* Signature public key algorithm */
-       enum pkey_hash_algo sig_hash_algo : 8;  /* Signature hash algorithm */
         const void      *tbs;                   /* Signed data */
-       size_t          tbs_size;               /* Size of signed data */
-       const void      *sig;                   /* Signature data */
-       size_t          sig_size;               /* Size of sigature */
+       unsigned        tbs_size;               /* Size of signed data */
+       unsigned        raw_sig_size;           /* Size of sigature */
+       const void      *raw_sig;               /* Signature data */
+       struct public_key_signature sig;        /* Signature parameters */
  };
  
  /*
@@ -34,3 +33,10 @@ struct x509_certificate {
   */
  extern void x509_free_certificate(struct x509_certificate *cert);
  extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen);
+
+/*
+ * x509_public_key.c
+ */
+extern int x509_get_sig_params(struct x509_certificate *cert);
+extern int x509_check_signature(const struct public_key *pub,
+                               struct x509_certificate *cert);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c

index 06007f0e880c330903b5536e9d9c194da302738c..f83300b6e8c13033e5e239be8d48e7fb379668de 100644 (file)
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -18,85 +18,162 @@
  #include <linux/asn1_decoder.h>
  #include <keys/asymmetric-subtype.h>
  #include <keys/asymmetric-parser.h>
+#include <keys/system_keyring.h>
  #include <crypto/hash.h>
  #include "asymmetric_keys.h"
  #include "public_key.h"
  #include "x509_parser.h"
  
-static const
-struct public_key_algorithm *x509_public_key_algorithms[PKEY_ALGO__LAST] = {
-       [PKEY_ALGO_DSA]         = NULL,
-#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
-       defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
-       [PKEY_ALGO_RSA]         = &RSA_public_key_algorithm,
-#endif
-};
+/*
+ * Find a key in the given keyring by issuer and authority.
+ */
+static struct key *x509_request_asymmetric_key(
+       struct key *keyring,
+       const char *signer, size_t signer_len,
+       const char *authority, size_t auth_len)
+{
+       key_ref_t key;
+       char *id;
+
+       /* Construct an identifier. */
+       id = kmalloc(signer_len + 2 + auth_len + 1, GFP_KERNEL);
+       if (!id)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(id, signer, signer_len);
+       id[signer_len + 0] = ':';
+       id[signer_len + 1] = ' ';
+       memcpy(id + signer_len + 2, authority, auth_len);
+       id[signer_len + 2 + auth_len] = 0;
+
+       pr_debug("Look up: \"%s\"\n", id);
+
+       key = keyring_search(make_key_ref(keyring, 1),
+                            &key_type_asymmetric, id);
+       if (IS_ERR(key))
+               pr_debug("Request for module key '%s' err %ld\n",
+                        id, PTR_ERR(key));
+       kfree(id);
+
+       if (IS_ERR(key)) {
+               switch (PTR_ERR(key)) {
+                       /* Hide some search errors */
+               case -EACCES:
+               case -ENOTDIR:
+               case -EAGAIN:
+                       return ERR_PTR(-ENOKEY);
+               default:
+                       return ERR_CAST(key);
+               }
+       }
+
+       pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
+       return key_ref_to_ptr(key);
+}
  
  /*
- * Check the signature on a certificate using the provided public key
+ * Set up the signature parameters in an X.509 certificate.  This involves
+ * digesting the signed data and extracting the signature.
   */
-static int x509_check_signature(const struct public_key *pub,
-                               const struct x509_certificate *cert)
+int x509_get_sig_params(struct x509_certificate *cert)
  {
-       struct public_key_signature *sig;
         struct crypto_shash *tfm;
         struct shash_desc *desc;
         size_t digest_size, desc_size;
+       void *digest;
         int ret;
  
         pr_devel("==>%s()\n", __func__);
-       
+
+       if (cert->sig.rsa.s)
+               return 0;
+
+       cert->sig.rsa.s = mpi_read_raw_data(cert->raw_sig, cert->raw_sig_size);
+       if (!cert->sig.rsa.s)
+               return -ENOMEM;
+       cert->sig.nr_mpi = 1;
+
         /* Allocate the hashing algorithm we're going to need and find out how
          * big the hash operational data will be.
          */
-       tfm = crypto_alloc_shash(pkey_hash_algo[cert->sig_hash_algo], 0, 0);
+       tfm = crypto_alloc_shash(hash_algo_name[cert->sig.pkey_hash_algo], 0, 0);
         if (IS_ERR(tfm))
                 return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm);
  
         desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
         digest_size = crypto_shash_digestsize(tfm);
  
-       /* We allocate the hash operational data storage on the end of our
-        * context data.
+       /* We allocate the hash operational data storage on the end of the
+        * digest storage space.
          */
         ret = -ENOMEM;
-       sig = kzalloc(sizeof(*sig) + desc_size + digest_size, GFP_KERNEL);
-       if (!sig)
-               goto error_no_sig;
+       digest = kzalloc(digest_size + desc_size, GFP_KERNEL);
+       if (!digest)
+               goto error;
  
-       sig->pkey_hash_algo     = cert->sig_hash_algo;
-       sig->digest             = (u8 *)sig + sizeof(*sig) + desc_size;
-       sig->digest_size        = digest_size;
+       cert->sig.digest = digest;
+       cert->sig.digest_size = digest_size;
  
-       desc = (void *)sig + sizeof(*sig);
-       desc->tfm       = tfm;
-       desc->flags     = CRYPTO_TFM_REQ_MAY_SLEEP;
+       desc = digest + digest_size;
+       desc->tfm = tfm;
+       desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
  
         ret = crypto_shash_init(desc);
         if (ret < 0)
                 goto error;
+       might_sleep();
+       ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest);
+error:
+       crypto_free_shash(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(x509_get_sig_params);
  
-       ret = -ENOMEM;
-       sig->rsa.s = mpi_read_raw_data(cert->sig, cert->sig_size);
-       if (!sig->rsa.s)
-               goto error;
+/*
+ * Check the signature on a certificate using the provided public key
+ */
+int x509_check_signature(const struct public_key *pub,
+                        struct x509_certificate *cert)
+{
+       int ret;
  
-       ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest);
-       if (ret < 0)
-               goto error_mpi;
+       pr_devel("==>%s()\n", __func__);
  
-       ret = pub->algo->verify_signature(pub, sig);
+       ret = x509_get_sig_params(cert);
+       if (ret < 0)
+               return ret;
  
+       ret = public_key_verify_signature(pub, &cert->sig);
         pr_debug("Cert Verification: %d\n", ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(x509_check_signature);
  
-error_mpi:
-       mpi_free(sig->rsa.s);
-error:
-       kfree(sig);
-error_no_sig:
-       crypto_free_shash(tfm);
+/*
+ * Check the new certificate against the ones in the trust keyring.  If one of
+ * those is the signing key and validates the new certificate, then mark the
+ * new certificate as being trusted.
+ *
+ * Return 0 if the new certificate was successfully validated, 1 if we couldn't
+ * find a matching parent certificate in the trusted list and an error if there
+ * is a matching certificate but the signature check fails.
+ */
+static int x509_validate_trust(struct x509_certificate *cert,
+                              struct key *trust_keyring)
+{
+       const struct public_key *pk;
+       struct key *key;
+       int ret = 1;
  
-       pr_devel("<==%s() = %d\n", __func__, ret);
+       key = x509_request_asymmetric_key(trust_keyring,
+                                         cert->issuer, strlen(cert->issuer),
+                                         cert->authority,
+                                         strlen(cert->authority));
+       if (!IS_ERR(key))  {
+               pk = key->payload.data;
+               ret = x509_check_signature(pk, cert);
+       }
         return ret;
  }
  
@@ -106,7 +183,6 @@ error_no_sig:
  static int x509_key_preparse(struct key_preparsed_payload *prep)
  {
         struct x509_certificate *cert;
-       struct tm now;
         size_t srlen, sulen;
         char *desc = NULL;
         int ret;
@@ -117,7 +193,18 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
  
         pr_devel("Cert Issuer: %s\n", cert->issuer);
         pr_devel("Cert Subject: %s\n", cert->subject);
-       pr_devel("Cert Key Algo: %s\n", pkey_algo[cert->pkey_algo]);
+
+       if (cert->pub->pkey_algo >= PKEY_ALGO__LAST ||
+           cert->sig.pkey_algo >= PKEY_ALGO__LAST ||
+           cert->sig.pkey_hash_algo >= PKEY_HASH__LAST ||
+           !pkey_algo[cert->pub->pkey_algo] ||
+           !pkey_algo[cert->sig.pkey_algo] ||
+           !hash_algo_name[cert->sig.pkey_hash_algo]) {
+               ret = -ENOPKG;
+               goto error_free_cert;
+       }
+
+       pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]);
         pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n",
                  cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1,
                  cert->valid_from.tm_mday, cert->valid_from.tm_hour,
@@ -127,61 +214,29 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
                  cert->valid_to.tm_mday, cert->valid_to.tm_hour,
                  cert->valid_to.tm_min,  cert->valid_to.tm_sec);
         pr_devel("Cert Signature: %s + %s\n",
-                pkey_algo[cert->sig_pkey_algo],
-                pkey_hash_algo[cert->sig_hash_algo]);
+                pkey_algo_name[cert->sig.pkey_algo],
+                hash_algo_name[cert->sig.pkey_hash_algo]);
  
-       if (!cert->fingerprint || !cert->authority) {
-               pr_warn("Cert for '%s' must have SubjKeyId and AuthKeyId extensions\n",
+       if (!cert->fingerprint) {
+               pr_warn("Cert for '%s' must have a SubjKeyId extension\n",
                         cert->subject);
                 ret = -EKEYREJECTED;
                 goto error_free_cert;
         }
  
-       time_to_tm(CURRENT_TIME.tv_sec, 0, &now);
-       pr_devel("Now: %04ld-%02d-%02d %02d:%02d:%02d\n",
-                now.tm_year + 1900, now.tm_mon + 1, now.tm_mday,
-                now.tm_hour, now.tm_min,  now.tm_sec);
-       if (now.tm_year < cert->valid_from.tm_year ||
-           (now.tm_year == cert->valid_from.tm_year &&
-            (now.tm_mon < cert->valid_from.tm_mon ||
-             (now.tm_mon == cert->valid_from.tm_mon &&
-              (now.tm_mday < cert->valid_from.tm_mday ||
-               (now.tm_mday == cert->valid_from.tm_mday &&
-                (now.tm_hour < cert->valid_from.tm_hour ||
-                 (now.tm_hour == cert->valid_from.tm_hour &&
-                  (now.tm_min < cert->valid_from.tm_min ||
-                   (now.tm_min == cert->valid_from.tm_min &&
-                    (now.tm_sec < cert->valid_from.tm_sec
-                     ))))))))))) {
-               pr_warn("Cert %s is not yet valid\n", cert->fingerprint);
-               ret = -EKEYREJECTED;
-               goto error_free_cert;
-       }
-       if (now.tm_year > cert->valid_to.tm_year ||
-           (now.tm_year == cert->valid_to.tm_year &&
-            (now.tm_mon > cert->valid_to.tm_mon ||
-             (now.tm_mon == cert->valid_to.tm_mon &&
-              (now.tm_mday > cert->valid_to.tm_mday ||
-               (now.tm_mday == cert->valid_to.tm_mday &&
-                (now.tm_hour > cert->valid_to.tm_hour ||
-                 (now.tm_hour == cert->valid_to.tm_hour &&
-                  (now.tm_min > cert->valid_to.tm_min ||
-                   (now.tm_min == cert->valid_to.tm_min &&
-                    (now.tm_sec > cert->valid_to.tm_sec
-                     ))))))))))) {
-               pr_warn("Cert %s has expired\n", cert->fingerprint);
-               ret = -EKEYEXPIRED;
-               goto error_free_cert;
-       }
-
-       cert->pub->algo = x509_public_key_algorithms[cert->pkey_algo];
+       cert->pub->algo = pkey_algo[cert->pub->pkey_algo];
         cert->pub->id_type = PKEY_ID_X509;
  
-       /* Check the signature on the key */
-       if (strcmp(cert->fingerprint, cert->authority) == 0) {
-               ret = x509_check_signature(cert->pub, cert);
+       /* Check the signature on the key if it appears to be self-signed */
+       if (!cert->authority ||
+           strcmp(cert->fingerprint, cert->authority) == 0) {
+               ret = x509_check_signature(cert->pub, cert); /* self-signed */
                 if (ret < 0)
                         goto error_free_cert;
+       } else {
+               ret = x509_validate_trust(cert, system_trusted_keyring);
+               if (!ret)
+                       prep->trusted = 1;
         }
  
         /* Propose a description */
@@ -237,3 +292,6 @@ static void __exit x509_key_exit(void)
  
  module_init(x509_key_init);
  module_exit(x509_key_exit);
+
+MODULE_DESCRIPTION("X.509 certificate parser");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c

index 9e62feffb374536995e08357866df9787635b19e..f8c0b8dbeb7582beca1ee7fd5c7aaac58aba23cd 100644 (file)
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                                                       &dest, 1, &src, 1, len);
         struct dma_device *device = chan ? chan->device : NULL;
         struct dma_async_tx_descriptor *tx = NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
  
-       if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
-               dma_addr_t dma_dest, dma_src;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+
+       if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
                 unsigned long dma_prep_flags = 0;
  
                 if (submit->cb_fn)
                         dma_prep_flags |= DMA_PREP_INTERRUPT;
                 if (submit->flags & ASYNC_TX_FENCE)
                         dma_prep_flags |= DMA_PREP_FENCE;
-               dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
-                                       DMA_FROM_DEVICE);
-
-               dma_src = dma_map_page(device->dev, src, src_offset, len,
-                                      DMA_TO_DEVICE);
-
-               tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-                                                   len, dma_prep_flags);
-               if (!tx) {
-                       dma_unmap_page(device->dev, dma_dest, len,
-                                      DMA_FROM_DEVICE);
-                       dma_unmap_page(device->dev, dma_src, len,
-                                      DMA_TO_DEVICE);
-               }
+
+               unmap->to_cnt = 1;
+               unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
+                                             DMA_TO_DEVICE);
+               unmap->from_cnt = 1;
+               unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
+                                             DMA_FROM_DEVICE);
+               unmap->len = len;
+
+               tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                                   unmap->addr[0], len,
+                                                   dma_prep_flags);
         }
  
         if (tx) {
                 pr_debug("%s: (async) len: %zu\n", __func__, len);
+
+               dma_set_unmap(tx, unmap);
                 async_tx_submit(chan, tx, submit);
         } else {
                 void *dest_buf, *src_buf;
@@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                 async_tx_sync_epilog(submit);
         }
  
+       dmaengine_unmap_put(unmap);
+
         return tx;
  }
  EXPORT_SYMBOL_GPL(async_memcpy);
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c

index 91d5d385899ee06c507cee1aa496330405d6276b..d05327caf69dbc18532478b122ea9834e67f1fd9 100644 (file)
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -46,49 +46,24 @@ static struct page *pq_scribble_page;
   * do_async_gen_syndrome - asynchronously calculate P and/or Q
   */
  static __async_inline struct dma_async_tx_descriptor *
-do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
-                     const unsigned char *scfs, unsigned int offset, int disks,
-                     size_t len, dma_addr_t *dma_src,
+do_async_gen_syndrome(struct dma_chan *chan,
+                     const unsigned char *scfs, int disks,
+                     struct dmaengine_unmap_data *unmap,
+                     enum dma_ctrl_flags dma_flags,
                       struct async_submit_ctl *submit)
  {
         struct dma_async_tx_descriptor *tx = NULL;
         struct dma_device *dma = chan->device;
-       enum dma_ctrl_flags dma_flags = 0;
         enum async_tx_flags flags_orig = submit->flags;
         dma_async_tx_callback cb_fn_orig = submit->cb_fn;
         dma_async_tx_callback cb_param_orig = submit->cb_param;
         int src_cnt = disks - 2;
-       unsigned char coefs[src_cnt];
         unsigned short pq_src_cnt;
         dma_addr_t dma_dest[2];
         int src_off = 0;
-       int idx;
-       int i;
  
-       /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
-       if (P(blocks, disks))
-               dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
-                                          len, DMA_BIDIRECTIONAL);
-       else
-               dma_flags |= DMA_PREP_PQ_DISABLE_P;
-       if (Q(blocks, disks))
-               dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
-                                          len, DMA_BIDIRECTIONAL);
-       else
-               dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-
-       /* convert source addresses being careful to collapse 'empty'
-        * sources and update the coefficients accordingly
-        */
-       for (i = 0, idx = 0; i < src_cnt; i++) {
-               if (blocks[i] == NULL)
-                       continue;
-               dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
-                                           DMA_TO_DEVICE);
-               coefs[idx] = scfs[i];
-               idx++;
-       }
-       src_cnt = idx;
+       if (submit->flags & ASYNC_TX_FENCE)
+               dma_flags |= DMA_PREP_FENCE;
  
         while (src_cnt > 0) {
                 submit->flags = flags_orig;
@@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
                 if (src_cnt > pq_src_cnt) {
                         submit->flags &= ~ASYNC_TX_ACK;
                         submit->flags |= ASYNC_TX_FENCE;
-                       dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
                         submit->cb_fn = NULL;
                         submit->cb_param = NULL;
                 } else {
-                       dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
                         submit->cb_fn = cb_fn_orig;
                         submit->cb_param = cb_param_orig;
                         if (cb_fn_orig)
                                 dma_flags |= DMA_PREP_INTERRUPT;
                 }
-               if (submit->flags & ASYNC_TX_FENCE)
-                       dma_flags |= DMA_PREP_FENCE;
  
-               /* Since we have clobbered the src_list we are committed
-                * to doing this asynchronously.  Drivers force forward
-                * progress in case they can not provide a descriptor
+               /* Drivers force forward progress in case they can not provide
+                * a descriptor
                  */
                 for (;;) {
+                       dma_dest[0] = unmap->addr[disks - 2];
+                       dma_dest[1] = unmap->addr[disks - 1];
                         tx = dma->device_prep_dma_pq(chan, dma_dest,
-                                                    &dma_src[src_off],
+                                                    &unmap->addr[src_off],
                                                      pq_src_cnt,
-                                                    &coefs[src_off], len,
+                                                    &scfs[src_off], unmap->len,
                                                      dma_flags);
                         if (likely(tx))
                                 break;
@@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
                         dma_async_issue_pending(chan);
                 }
  
+               dma_set_unmap(tx, unmap);
                 async_tx_submit(chan, tx, submit);
                 submit->depend_tx = tx;
  
@@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
   * set to NULL those buffers will be replaced with the raid6_zero_page
   * in the synchronous path and omitted in the hardware-asynchronous
   * path.
- *
- * 'blocks' note: if submit->scribble is NULL then the contents of
- * 'blocks' may be overwritten to perform address conversions
- * (dma_map_page() or page_address()).
   */
  struct dma_async_tx_descriptor *
  async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
@@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
                                                       &P(blocks, disks), 2,
                                                       blocks, src_cnt, len);
         struct dma_device *device = chan ? chan->device : NULL;
-       dma_addr_t *dma_src = NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
  
         BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
  
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) blocks;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
  
-       if (dma_src && device &&
+       if (unmap &&
             (src_cnt <= dma_maxpq(device, 0) ||
              dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
             is_dma_pq_aligned(device, offset, 0, len)) {
+               struct dma_async_tx_descriptor *tx;
+               enum dma_ctrl_flags dma_flags = 0;
+               unsigned char coefs[src_cnt];
+               int i, j;
+
                 /* run the p+q asynchronously */
                 pr_debug("%s: (async) disks: %d len: %zu\n",
                          __func__, disks, len);
-               return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
-                                            disks, len, dma_src, submit);
+
+               /* convert source addresses being careful to collapse 'empty'
+                * sources and update the coefficients accordingly
+                */
+               unmap->len = len;
+               for (i = 0, j = 0; i < src_cnt; i++) {
+                       if (blocks[i] == NULL)
+                               continue;
+                       unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
+                                                     len, DMA_TO_DEVICE);
+                       coefs[j] = raid6_gfexp[i];
+                       unmap->to_cnt++;
+                       j++;
+               }
+
+               /*
+                * DMAs use destinations as sources,
+                * so use BIDIRECTIONAL mapping
+                */
+               unmap->bidi_cnt++;
+               if (P(blocks, disks))
+                       unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
+                                                       offset, len, DMA_BIDIRECTIONAL);
+               else {
+                       unmap->addr[j++] = 0;
+                       dma_flags |= DMA_PREP_PQ_DISABLE_P;
+               }
+
+               unmap->bidi_cnt++;
+               if (Q(blocks, disks))
+                       unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
+                                                      offset, len, DMA_BIDIRECTIONAL);
+               else {
+                       unmap->addr[j++] = 0;
+                       dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+               }
+
+               tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
+               dmaengine_unmap_put(unmap);
+               return tx;
         }
  
+       dmaengine_unmap_put(unmap);
+
         /* run the pq synchronously */
         pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
  
@@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
         struct dma_async_tx_descriptor *tx;
         unsigned char coefs[disks-2];
         enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
-       dma_addr_t *dma_src = NULL;
-       int src_cnt = 0;
+       struct dmaengine_unmap_data *unmap = NULL;
  
         BUG_ON(disks < 4);
  
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) blocks;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
  
-       if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+       if (unmap && disks <= dma_maxpq(device, 0) &&
             is_dma_pq_aligned(device, offset, 0, len)) {
                 struct device *dev = device->dev;
-               dma_addr_t *pq = &dma_src[disks-2];
-               int i;
+               dma_addr_t pq[2];
+               int i, j = 0, src_cnt = 0;
  
                 pr_debug("%s: (async) disks: %d len: %zu\n",
                          __func__, disks, len);
-               if (!P(blocks, disks))
+
+               unmap->len = len;
+               for (i = 0; i < disks-2; i++)
+                       if (likely(blocks[i])) {
+                               unmap->addr[j] = dma_map_page(dev, blocks[i],
+                                                             offset, len,
+                                                             DMA_TO_DEVICE);
+                               coefs[j] = raid6_gfexp[i];
+                               unmap->to_cnt++;
+                               src_cnt++;
+                               j++;
+                       }
+
+               if (!P(blocks, disks)) {
+                       pq[0] = 0;
                         dma_flags |= DMA_PREP_PQ_DISABLE_P;
-               else
+               } else {
                         pq[0] = dma_map_page(dev, P(blocks, disks),
                                              offset, len,
                                              DMA_TO_DEVICE);
-               if (!Q(blocks, disks))
+                       unmap->addr[j++] = pq[0];
+                       unmap->to_cnt++;
+               }
+               if (!Q(blocks, disks)) {
+                       pq[1] = 0;
                         dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-               else
+               } else {
                         pq[1] = dma_map_page(dev, Q(blocks, disks),
                                              offset, len,
                                              DMA_TO_DEVICE);
+                       unmap->addr[j++] = pq[1];
+                       unmap->to_cnt++;
+               }
  
                 if (submit->flags & ASYNC_TX_FENCE)
                         dma_flags |= DMA_PREP_FENCE;
-               for (i = 0; i < disks-2; i++)
-                       if (likely(blocks[i])) {
-                               dma_src[src_cnt] = dma_map_page(dev, blocks[i],
-                                                               offset, len,
-                                                               DMA_TO_DEVICE);
-                               coefs[src_cnt] = raid6_gfexp[i];
-                               src_cnt++;
-                       }
-
                 for (;;) {
-                       tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+                       tx = device->device_prep_dma_pq_val(chan, pq,
+                                                           unmap->addr,
                                                             src_cnt,
                                                             coefs,
                                                             len, pqres,
@@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
                         async_tx_quiesce(&submit->depend_tx);
                         dma_async_issue_pending(chan);
                 }
+
+               dma_set_unmap(tx, unmap);
                 async_tx_submit(chan, tx, submit);
  
                 return tx;
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c

index a9f08a6a582ebccce298f718d0bbf5db8b1e1a7a..934a849814958e6ea37b9dbdb96abc820c4fe9e1 100644 (file)
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -26,6 +26,7 @@
  #include <linux/dma-mapping.h>
  #include <linux/raid/pq.h>
  #include <linux/async_tx.h>
+#include <linux/dmaengine.h>
  
  static struct dma_async_tx_descriptor *
  async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
@@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
         struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
                                                       &dest, 1, srcs, 2, len);
         struct dma_device *dma = chan ? chan->device : NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
         const u8 *amul, *bmul;
         u8 ax, bx;
         u8 *a, *b, *c;
  
-       if (dma) {
-               dma_addr_t dma_dest[2];
-               dma_addr_t dma_src[2];
+       if (dma)
+               unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+       if (unmap) {
                 struct device *dev = dma->dev;
+               dma_addr_t pq[2];
                 struct dma_async_tx_descriptor *tx;
                 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
  
                 if (submit->flags & ASYNC_TX_FENCE)
                         dma_flags |= DMA_PREP_FENCE;
-               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-               dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
-               dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
-               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+               unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+               unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+               unmap->to_cnt = 2;
+
+               unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               unmap->bidi_cnt = 1;
+               /* engine only looks at Q, but expects it to follow P */
+               pq[1] = unmap->addr[2];
+
+               unmap->len = len;
+               tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
                                              len, dma_flags);
                 if (tx) {
+                       dma_set_unmap(tx, unmap);
                         async_tx_submit(chan, tx, submit);
+                       dmaengine_unmap_put(unmap);
                         return tx;
                 }
  
                 /* could not get a descriptor, unmap and fall through to
                  * the synchronous path
                  */
-               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
-               dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+               dmaengine_unmap_put(unmap);
         }
  
         /* run the operation synchronously */
@@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
         struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
                                                       &dest, 1, &src, 1, len);
         struct dma_device *dma = chan ? chan->device : NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
         const u8 *qmul; /* Q multiplier table */
         u8 *d, *s;
  
-       if (dma) {
+       if (dma)
+               unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+       if (unmap) {
                 dma_addr_t dma_dest[2];
-               dma_addr_t dma_src[1];
                 struct device *dev = dma->dev;
                 struct dma_async_tx_descriptor *tx;
                 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
  
                 if (submit->flags & ASYNC_TX_FENCE)
                         dma_flags |= DMA_PREP_FENCE;
-               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-               dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
-               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
-                                            len, dma_flags);
+               unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+               unmap->to_cnt++;
+               unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               dma_dest[1] = unmap->addr[1];
+               unmap->bidi_cnt++;
+               unmap->len = len;
+
+               /* this looks funny, but the engine looks for Q at
+                * dma_dest[1] and ignores dma_dest[0] as a dest
+                * due to DMA_PREP_PQ_DISABLE_P
+                */
+               tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
+                                            1, &coef, len, dma_flags);
+
                 if (tx) {
+                       dma_set_unmap(tx, unmap);
+                       dmaengine_unmap_put(unmap);
                         async_tx_submit(chan, tx, submit);
                         return tx;
                 }
@@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
                 /* could not get a descriptor, unmap and fall through to
                  * the synchronous path
                  */
-               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+               dmaengine_unmap_put(unmap);
         }
  
         /* no channel available, or failed to allocate a descriptor, so
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c

index 7be34248b450896cfc056d1709513df4f172f79a..39ea4791a3c977ad7eda47498b9f7d7741b438e2 100644 (file)
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -128,7 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
                 }
                 device->device_issue_pending(chan);
         } else {
-               if (dma_wait_for_async_tx(depend_tx) != DMA_SUCCESS)
+               if (dma_wait_for_async_tx(depend_tx) != DMA_COMPLETE)
                         panic("%s: DMA error waiting for depend_tx\n",
                               __func__);
                 tx->tx_submit(tx);
@@ -280,7 +280,7 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
                  * we are referring to the correct operation
                  */
                 BUG_ON(async_tx_test_ack(*tx));
-               if (dma_wait_for_async_tx(*tx) != DMA_SUCCESS)
+               if (dma_wait_for_async_tx(*tx) != DMA_COMPLETE)
                         panic("%s: DMA error waiting for transaction\n",
                               __func__);
                 async_tx_ack(*tx);
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c

index 8ade0a0481c67149e72d7878deb306a0fd33e5c2..3c562f5a60bbb34f19e6b90f4d858b04d3d496e2 100644 (file)
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,48 +33,31 @@
  
  /* do_async_xor - dma map the pages and perform the xor with an engine */
  static __async_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
-            unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
              struct async_submit_ctl *submit)
  {
         struct dma_device *dma = chan->device;
         struct dma_async_tx_descriptor *tx = NULL;
-       int src_off = 0;
-       int i;
         dma_async_tx_callback cb_fn_orig = submit->cb_fn;
         void *cb_param_orig = submit->cb_param;
         enum async_tx_flags flags_orig = submit->flags;
-       enum dma_ctrl_flags dma_flags;
-       int xor_src_cnt = 0;
-       dma_addr_t dma_dest;
-
-       /* map the dest bidrectional in case it is re-used as a source */
-       dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
-       for (i = 0; i < src_cnt; i++) {
-               /* only map the dest once */
-               if (!src_list[i])
-                       continue;
-               if (unlikely(src_list[i] == dest)) {
-                       dma_src[xor_src_cnt++] = dma_dest;
-                       continue;
-               }
-               dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
-                                                     len, DMA_TO_DEVICE);
-       }
-       src_cnt = xor_src_cnt;
+       enum dma_ctrl_flags dma_flags = 0;
+       int src_cnt = unmap->to_cnt;
+       int xor_src_cnt;
+       dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
+       dma_addr_t *src_list = unmap->addr;
  
         while (src_cnt) {
+               dma_addr_t tmp;
+
                 submit->flags = flags_orig;
-               dma_flags = 0;
                 xor_src_cnt = min(src_cnt, (int)dma->max_xor);
-               /* if we are submitting additional xors, leave the chain open,
-                * clear the callback parameters, and leave the destination
-                * buffer mapped
+               /* if we are submitting additional xors, leave the chain open
+                * and clear the callback parameters
                  */
                 if (src_cnt > xor_src_cnt) {
                         submit->flags &= ~ASYNC_TX_ACK;
                         submit->flags |= ASYNC_TX_FENCE;
-                       dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
                         submit->cb_fn = NULL;
                         submit->cb_param = NULL;
                 } else {
@@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                         dma_flags |= DMA_PREP_INTERRUPT;
                 if (submit->flags & ASYNC_TX_FENCE)
                         dma_flags |= DMA_PREP_FENCE;
-               /* Since we have clobbered the src_list we are committed
-                * to doing this asynchronously.  Drivers force forward progress
-                * in case they can not provide a descriptor
+
+               /* Drivers force forward progress in case they can not provide a
+                * descriptor
                  */
-               tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
-                                             xor_src_cnt, len, dma_flags);
+               tmp = src_list[0];
+               if (src_list > unmap->addr)
+                       src_list[0] = dma_dest;
+               tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
+                                             xor_src_cnt, unmap->len,
+                                             dma_flags);
+               src_list[0] = tmp;
+
  
                 if (unlikely(!tx))
                         async_tx_quiesce(&submit->depend_tx);
@@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                 while (unlikely(!tx)) {
                         dma_async_issue_pending(chan);
                         tx = dma->device_prep_dma_xor(chan, dma_dest,
-                                                     &dma_src[src_off],
-                                                     xor_src_cnt, len,
+                                                     src_list,
+                                                     xor_src_cnt, unmap->len,
                                                       dma_flags);
                 }
  
+               dma_set_unmap(tx, unmap);
                 async_tx_submit(chan, tx, submit);
                 submit->depend_tx = tx;
  
                 if (src_cnt > xor_src_cnt) {
                         /* drop completed sources */
                         src_cnt -= xor_src_cnt;
-                       src_off += xor_src_cnt;
-
                         /* use the intermediate result a source */
-                       dma_src[--src_off] = dma_dest;
                         src_cnt++;
+                       src_list += xor_src_cnt - 1;
                 } else
                         break;
         }
@@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
         struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
                                                       &dest, 1, src_list,
                                                       src_cnt, len);
-       dma_addr_t *dma_src = NULL;
+       struct dma_device *device = chan ? chan->device : NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
  
         BUG_ON(src_cnt <= 1);
  
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) src_list;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+
+       if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
+               struct dma_async_tx_descriptor *tx;
+               int i, j;
  
-       if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
                 /* run the xor asynchronously */
                 pr_debug("%s (async): len: %zu\n", __func__, len);
  
-               return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-                                   dma_src, submit);
+               unmap->len = len;
+               for (i = 0, j = 0; i < src_cnt; i++) {
+                       if (!src_list[i])
+                               continue;
+                       unmap->to_cnt++;
+                       unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
+                                                       offset, len, DMA_TO_DEVICE);
+               }
+
+               /* map it bidirectional as it may be re-used as a source */
+               unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
+                                             DMA_BIDIRECTIONAL);
+               unmap->bidi_cnt = 1;
+
+               tx = do_async_xor(chan, unmap, submit);
+               dmaengine_unmap_put(unmap);
+               return tx;
         } else {
+               dmaengine_unmap_put(unmap);
                 /* run the xor synchronously */
                 pr_debug("%s (sync): len: %zu\n", __func__, len);
                 WARN_ONCE(chan, "%s: no space for dma address conversion\n",
@@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
         struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
         struct dma_device *device = chan ? chan->device : NULL;
         struct dma_async_tx_descriptor *tx = NULL;
-       dma_addr_t *dma_src = NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
  
         BUG_ON(src_cnt <= 1);
  
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) src_list;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
  
-       if (dma_src && device && src_cnt <= device->max_xor &&
+       if (unmap && src_cnt <= device->max_xor &&
             is_dma_xor_aligned(device, offset, 0, len)) {
                 unsigned long dma_prep_flags = 0;
                 int i;
@@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
                         dma_prep_flags |= DMA_PREP_INTERRUPT;
                 if (submit->flags & ASYNC_TX_FENCE)
                         dma_prep_flags |= DMA_PREP_FENCE;
-               for (i = 0; i < src_cnt; i++)
-                       dma_src[i] = dma_map_page(device->dev, src_list[i],
-                                                 offset, len, DMA_TO_DEVICE);
  
-               tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+               for (i = 0; i < src_cnt; i++) {
+                       unmap->addr[i] = dma_map_page(device->dev, src_list[i],
+                                                     offset, len, DMA_TO_DEVICE);
+                       unmap->to_cnt++;
+               }
+               unmap->len = len;
+
+               tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
                                                      len, result,
                                                      dma_prep_flags);
                 if (unlikely(!tx)) {
@@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
                         while (!tx) {
                                 dma_async_issue_pending(chan);
                                 tx = device->device_prep_dma_xor_val(chan,
-                                       dma_src, src_cnt, len, result,
+                                       unmap->addr, src_cnt, len, result,
                                         dma_prep_flags);
                         }
                 }
-
+               dma_set_unmap(tx, unmap);
                 async_tx_submit(chan, tx, submit);
         } else {
                 enum async_tx_flags flags_orig = submit->flags;
@@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
                 async_tx_sync_epilog(submit);
                 submit->flags = flags_orig;
         }
+       dmaengine_unmap_put(unmap);
  
         return tx;
  }
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c

index 4a92bac744dce500a209f5e4e6033d465eff5dc9..dad95f45b88f6566afc62df645151d3a2ae60092 100644 (file)
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -28,7 +28,7 @@
  #undef pr
  #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
  
-#define NDISKS 16 /* Including P and Q */
+#define NDISKS 64 /* Including P and Q */
  
  static struct page *dataptrs[NDISKS];
  static addr_conv_t addr_conv[NDISKS];
@@ -219,6 +219,14 @@ static int raid6_test(void)
                 err += test(11, &tests);
                 err += test(12, &tests);
         }
+
+       /* the 24 disk case is special for ioatdma as it is the boudary point
+        * at which it needs to switch from 8-source ops to 16-source
+        * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
+        */
+       if (NDISKS > 24)
+               err += test(24, &tests);
+
         err += test(NDISKS, &tests);
  
         pr("\n");
diff --git a/crypto/hash_info.c b/crypto/hash_info.c

new file mode 100644 (file)

index 0000000..3e7ff46
--- /dev/null
+++ b/crypto/hash_info.c
@@ -0,0 +1,56 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/export.h>
+#include <crypto/hash_info.h>
+
+const char *const hash_algo_name[HASH_ALGO__LAST] = {
+       [HASH_ALGO_MD4]         = "md4",
+       [HASH_ALGO_MD5]         = "md5",
+       [HASH_ALGO_SHA1]        = "sha1",
+       [HASH_ALGO_RIPE_MD_160] = "rmd160",
+       [HASH_ALGO_SHA256]      = "sha256",
+       [HASH_ALGO_SHA384]      = "sha384",
+       [HASH_ALGO_SHA512]      = "sha512",
+       [HASH_ALGO_SHA224]      = "sha224",
+       [HASH_ALGO_RIPE_MD_128] = "rmd128",
+       [HASH_ALGO_RIPE_MD_256] = "rmd256",
+       [HASH_ALGO_RIPE_MD_320] = "rmd320",
+       [HASH_ALGO_WP_256]      = "wp256",
+       [HASH_ALGO_WP_384]      = "wp384",
+       [HASH_ALGO_WP_512]      = "wp512",
+       [HASH_ALGO_TGR_128]     = "tgr128",
+       [HASH_ALGO_TGR_160]     = "tgr160",
+       [HASH_ALGO_TGR_192]     = "tgr192",
+};
+EXPORT_SYMBOL_GPL(hash_algo_name);
+
+const int hash_digest_size[HASH_ALGO__LAST] = {
+       [HASH_ALGO_MD4]         = MD5_DIGEST_SIZE,
+       [HASH_ALGO_MD5]         = MD5_DIGEST_SIZE,
+       [HASH_ALGO_SHA1]        = SHA1_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_160] = RMD160_DIGEST_SIZE,
+       [HASH_ALGO_SHA256]      = SHA256_DIGEST_SIZE,
+       [HASH_ALGO_SHA384]      = SHA384_DIGEST_SIZE,
+       [HASH_ALGO_SHA512]      = SHA512_DIGEST_SIZE,
+       [HASH_ALGO_SHA224]      = SHA224_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_128] = RMD128_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_256] = RMD256_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_320] = RMD320_DIGEST_SIZE,
+       [HASH_ALGO_WP_256]      = WP256_DIGEST_SIZE,
+       [HASH_ALGO_WP_384]      = WP384_DIGEST_SIZE,
+       [HASH_ALGO_WP_512]      = WP512_DIGEST_SIZE,
+       [HASH_ALGO_TGR_128]     = TGR128_DIGEST_SIZE,
+       [HASH_ALGO_TGR_160]     = TGR160_DIGEST_SIZE,
+       [HASH_ALGO_TGR_192]     = TGR192_DIGEST_SIZE,
+};
+EXPORT_SYMBOL_GPL(hash_digest_size);
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index c95df0b8c8808e505f9dbfc7c8bcc68da48af57d..5d9248526d780b06c51bee967cdd088c9c90e5e5 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -235,17 +235,6 @@ config ACPI_INITRD_TABLE_OVERRIDE
           initrd, therefore it's safe to say Y.
           See Documentation/acpi/initrd_table_override.txt for details
  
-config ACPI_BLACKLIST_YEAR
-       int "Disable ACPI for systems before Jan 1st this year" if X86_32
-       default 0
-       help
-         Enter a 4-digit year, e.g., 2001, to disable ACPI by default
-         on platforms with DMI BIOS date before January 1st that year.
-         "acpi=force" can be used to override this mechanism.
-
-         Enter 0 to disable this mechanism and allow ACPI to
-         run by default no matter what the year.  (default)
-
  config ACPI_DEBUG
         bool "Debug Statements"
         default n
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c

index b9f0d5f4bba51cecb6ffe8fda126762850818263..8711e3797165fa73fd0c401cedbb54c61eb68e4a 100644 (file)
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -56,7 +56,6 @@ static int ac_sleep_before_get_state_ms;
  
  struct acpi_ac {
         struct power_supply charger;
-       struct acpi_device *adev;
         struct platform_device *pdev;
         unsigned long long state;
  };
@@ -70,8 +69,9 @@ struct acpi_ac {
  static int acpi_ac_get_state(struct acpi_ac *ac)
  {
         acpi_status status;
+       acpi_handle handle = ACPI_HANDLE(&ac->pdev->dev);
  
-       status = acpi_evaluate_integer(ac->adev->handle, "_PSR", NULL,
+       status = acpi_evaluate_integer(handle, "_PSR", NULL,
                                        &ac->state);
         if (ACPI_FAILURE(status)) {
                 ACPI_EXCEPTION((AE_INFO, status,
@@ -119,6 +119,7 @@ static enum power_supply_property ac_props[] = {
  static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
  {
         struct acpi_ac *ac = data;
+       struct acpi_device *adev;
  
         if (!ac)
                 return;
@@ -141,10 +142,11 @@ static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
                         msleep(ac_sleep_before_get_state_ms);
  
                 acpi_ac_get_state(ac);
-               acpi_bus_generate_netlink_event(ac->adev->pnp.device_class,
+               adev = ACPI_COMPANION(&ac->pdev->dev);
+               acpi_bus_generate_netlink_event(adev->pnp.device_class,
                                                 dev_name(&ac->pdev->dev),
                                                 event, (u32) ac->state);
-               acpi_notifier_call_chain(ac->adev, event, (u32) ac->state);
+               acpi_notifier_call_chain(adev, event, (u32) ac->state);
                 kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
         }
  
@@ -178,8 +180,8 @@ static int acpi_ac_probe(struct platform_device *pdev)
         if (!pdev)
                 return -EINVAL;
  
-       result = acpi_bus_get_device(ACPI_HANDLE(&pdev->dev), &adev);
-       if (result)
+       adev = ACPI_COMPANION(&pdev->dev);
+       if (!adev)
                 return -ENODEV;
  
         ac = kzalloc(sizeof(struct acpi_ac), GFP_KERNEL);
@@ -188,7 +190,6 @@ static int acpi_ac_probe(struct platform_device *pdev)
  
         strcpy(acpi_device_name(adev), ACPI_AC_DEVICE_NAME);
         strcpy(acpi_device_class(adev), ACPI_AC_CLASS);
-       ac->adev = adev;
         ac->pdev = pdev;
         platform_set_drvdata(pdev, ac);
  
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c

index d3961014aad7ff9d77bcb296ec2f851f96d64b5b..6745fe137b9ea541ae729429035eaeca8fc8fc07 100644 (file)
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -163,6 +163,15 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
         { "80860F41", (unsigned long)&byt_i2c_dev_desc },
         { "INT33B2", },
  
+       { "INT3430", (unsigned long)&lpt_dev_desc },
+       { "INT3431", (unsigned long)&lpt_dev_desc },
+       { "INT3432", (unsigned long)&lpt_dev_desc },
+       { "INT3433", (unsigned long)&lpt_dev_desc },
+       { "INT3434", (unsigned long)&lpt_uart_dev_desc },
+       { "INT3435", (unsigned long)&lpt_uart_dev_desc },
+       { "INT3436", (unsigned long)&lpt_sdio_dev_desc },
+       { "INT3437", },
+
         { }
  };
  
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c

index 8a4cfc7e71f0f83cc7a8644e75eab627c8e3c52d..dbfe49e5fd63cc179559b2c5caee57d27324c012 100644 (file)
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -111,7 +111,7 @@ int acpi_create_platform_device(struct acpi_device *adev,
         pdevinfo.id = -1;
         pdevinfo.res = resources;
         pdevinfo.num_res = count;
-       pdevinfo.acpi_node.handle = adev->handle;
+       pdevinfo.acpi_node.companion = adev;
         pdev = platform_device_register_full(&pdevinfo);
         if (IS_ERR(pdev)) {
                 dev_err(&adev->dev, "platform device creation failed: %ld\n",
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c

index fb848378d5824691af9e84b4f100a01e47df7d39..078c4f7fe2dd97c42d3e13ea11ebde804e3ae4f0 100644 (file)
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -75,39 +75,6 @@ static struct acpi_blacklist_item acpi_blacklist[] __initdata = {
         {""}
  };
  
-#if    CONFIG_ACPI_BLACKLIST_YEAR
-
-static int __init blacklist_by_year(void)
-{
-       int year;
-
-       /* Doesn't exist? Likely an old system */
-       if (!dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL)) {
-               printk(KERN_ERR PREFIX "no DMI BIOS year, "
-                       "acpi=force is required to enable ACPI\n" );
-               return 1;
-       }
-       /* 0? Likely a buggy new BIOS */
-       if (year == 0) {
-               printk(KERN_ERR PREFIX "DMI BIOS year==0, "
-                       "assuming ACPI-capable machine\n" );
-               return 0;
-       }
-       if (year < CONFIG_ACPI_BLACKLIST_YEAR) {
-               printk(KERN_ERR PREFIX "BIOS age (%d) fails cutoff (%d), "
-                      "acpi=force is required to enable ACPI\n",
-                      year, CONFIG_ACPI_BLACKLIST_YEAR);
-               return 1;
-       }
-       return 0;
-}
-#else
-static inline int blacklist_by_year(void)
-{
-       return 0;
-}
-#endif
-
  int __init acpi_blacklisted(void)
  {
         int i = 0;
@@ -166,8 +133,6 @@ int __init acpi_blacklisted(void)
                 }
         }
  
-       blacklisted += blacklist_by_year();
-
         dmi_check_system(acpi_osi_dmi_table);
  
         return blacklisted;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c

index d42b2fb5a7e94131ce2633ef95f4d7631729325f..b3480cf7db1a1d1eba0e8d884cb456f92b76b660 100644 (file)
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -22,16 +22,12 @@
   * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   */
  
-#include <linux/device.h>
+#include <linux/acpi.h>
  #include <linux/export.h>
  #include <linux/mutex.h>
  #include <linux/pm_qos.h>
  #include <linux/pm_runtime.h>
  
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-
  #include "internal.h"
  
  #define _COMPONENT     ACPI_POWER_COMPONENT
@@ -548,7 +544,7 @@ static int acpi_dev_pm_get_state(struct device *dev, struct acpi_device *adev,
   */
  int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
  {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+       acpi_handle handle = ACPI_HANDLE(dev);
         struct acpi_device *adev;
         int ret, d_min, d_max;
  
@@ -656,7 +652,7 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
         if (!device_run_wake(phys_dev))
                 return -EINVAL;
  
-       handle = DEVICE_ACPI_HANDLE(phys_dev);
+       handle = ACPI_HANDLE(phys_dev);
         if (!handle || acpi_bus_get_device(handle, &adev)) {
                 dev_dbg(phys_dev, "ACPI handle without context in %s!\n",
                         __func__);
@@ -700,7 +696,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
         if (!device_can_wakeup(dev))
                 return -EINVAL;
  
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
         if (!handle || acpi_bus_get_device(handle, &adev)) {
                 dev_dbg(dev, "ACPI handle without context in %s!\n", __func__);
                 return -ENODEV;
@@ -722,7 +718,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
   */
  struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
  {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+       acpi_handle handle = ACPI_HANDLE(dev);
         struct acpi_device *adev;
  
         return handle && !acpi_bus_get_device(handle, &adev) ? adev : NULL;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c

index d5309fd494589b4d1596cb1c03576ed1e7ef252e..ba5b56db9d27c7fafa3b19c6d1f5d2549308aea6 100644 (file)
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -173,9 +173,10 @@ static void start_transaction(struct acpi_ec *ec)
  static void advance_transaction(struct acpi_ec *ec, u8 status)
  {
         unsigned long flags;
-       struct transaction *t = ec->curr;
+       struct transaction *t;
  
         spin_lock_irqsave(&ec->lock, flags);
+       t = ec->curr;
         if (!t)
                 goto unlock;
         if (t->wlen > t->wi) {
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c

index 10f0f40587bb73309eee9e959fc1049cbaf6dc05..a22a295edb692347f16066bff26d145973ab2523 100644 (file)
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -197,30 +197,28 @@ static void acpi_physnode_link_name(char *buf, unsigned int node_id)
  
  int acpi_bind_one(struct device *dev, acpi_handle handle)
  {
-       struct acpi_device *acpi_dev;
-       acpi_status status;
+       struct acpi_device *acpi_dev = NULL;
         struct acpi_device_physical_node *physical_node, *pn;
         char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
         struct list_head *physnode_list;
         unsigned int node_id;
         int retval = -EINVAL;
  
-       if (ACPI_HANDLE(dev)) {
+       if (ACPI_COMPANION(dev)) {
                 if (handle) {
-                       dev_warn(dev, "ACPI handle is already set\n");
+                       dev_warn(dev, "ACPI companion already set\n");
                         return -EINVAL;
                 } else {
-                       handle = ACPI_HANDLE(dev);
+                       acpi_dev = ACPI_COMPANION(dev);
                 }
+       } else {
+               acpi_bus_get_device(handle, &acpi_dev);
         }
-       if (!handle)
+       if (!acpi_dev)
                 return -EINVAL;
  
+       get_device(&acpi_dev->dev);
         get_device(dev);
-       status = acpi_bus_get_device(handle, &acpi_dev);
-       if (ACPI_FAILURE(status))
-               goto err;
-
         physical_node = kzalloc(sizeof(*physical_node), GFP_KERNEL);
         if (!physical_node) {
                 retval = -ENOMEM;
@@ -242,10 +240,11 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
  
                         dev_warn(dev, "Already associated with ACPI node\n");
                         kfree(physical_node);
-                       if (ACPI_HANDLE(dev) != handle)
+                       if (ACPI_COMPANION(dev) != acpi_dev)
                                 goto err;
  
                         put_device(dev);
+                       put_device(&acpi_dev->dev);
                         return 0;
                 }
                 if (pn->node_id == node_id) {
@@ -259,8 +258,8 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
         list_add(&physical_node->node, physnode_list);
         acpi_dev->physical_node_count++;
  
-       if (!ACPI_HANDLE(dev))
-               ACPI_HANDLE_SET(dev, acpi_dev->handle);
+       if (!ACPI_COMPANION(dev))
+               ACPI_COMPANION_SET(dev, acpi_dev);
  
         acpi_physnode_link_name(physical_node_name, node_id);
         retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
@@ -283,27 +282,21 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
         return 0;
  
   err:
-       ACPI_HANDLE_SET(dev, NULL);
+       ACPI_COMPANION_SET(dev, NULL);
         put_device(dev);
+       put_device(&acpi_dev->dev);
         return retval;
  }
  EXPORT_SYMBOL_GPL(acpi_bind_one);
  
  int acpi_unbind_one(struct device *dev)
  {
+       struct acpi_device *acpi_dev = ACPI_COMPANION(dev);
         struct acpi_device_physical_node *entry;
-       struct acpi_device *acpi_dev;
-       acpi_status status;
  
-       if (!ACPI_HANDLE(dev))
+       if (!acpi_dev)
                 return 0;
  
-       status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev);
-       if (ACPI_FAILURE(status)) {
-               dev_err(dev, "Oops, ACPI handle corrupt in %s()\n", __func__);
-               return -EINVAL;
-       }
-
         mutex_lock(&acpi_dev->physical_node_lock);
  
         list_for_each_entry(entry, &acpi_dev->physical_node_list, node)
@@ -316,9 +309,10 @@ int acpi_unbind_one(struct device *dev)
                         acpi_physnode_link_name(physnode_name, entry->node_id);
                         sysfs_remove_link(&acpi_dev->dev.kobj, physnode_name);
                         sysfs_remove_link(&dev->kobj, "firmware_node");
-                       ACPI_HANDLE_SET(dev, NULL);
-                       /* acpi_bind_one() increase refcnt by one. */
+                       ACPI_COMPANION_SET(dev, NULL);
+                       /* Drop references taken by acpi_bind_one(). */
                         put_device(dev);
+                       put_device(&acpi_dev->dev);
                         kfree(entry);
                         break;
                 }
@@ -328,6 +322,15 @@ int acpi_unbind_one(struct device *dev)
  }
  EXPORT_SYMBOL_GPL(acpi_unbind_one);
  
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr)
+{
+       struct acpi_device *adev;
+
+       if (!acpi_bus_get_device(acpi_get_child(parent, addr), &adev))
+               ACPI_COMPANION_SET(dev, adev);
+}
+EXPORT_SYMBOL_GPL(acpi_preset_companion);
+
  static int acpi_platform_notify(struct device *dev)
  {
         struct acpi_bus_type *type = acpi_get_bus_type(dev);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c

index 56f05869b08df2ab89bafd661d97b282c36d0b8b..0703bff5e60ecaf2f9d207f9ffc1691d50380384 100644 (file)
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -575,6 +575,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
                 dev_err(&device->dev,
                         "Bus %04x:%02x not present in PCI namespace\n",
                         root->segment, (unsigned int)root->secondary.start);
+               device->driver_data = NULL;
                 result = -ENODEV;
                 goto end;
         }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index 55f9dedbbf9fedc5aae2cf21852c7474514cd0d1..15daa21fcd056cf75b50eccf34a08482e0e24e8a 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -289,24 +289,17 @@ void acpi_bus_device_eject(void *data, u32 ost_src)
  {
         struct acpi_device *device = data;
         acpi_handle handle = device->handle;
-       struct acpi_scan_handler *handler;
         u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
         int error;
  
         lock_device_hotplug();
         mutex_lock(&acpi_scan_lock);
  
-       handler = device->handler;
-       if (!handler || !handler->hotplug.enabled) {
-               put_device(&device->dev);
-               goto err_support;
-       }
-
         if (ost_src == ACPI_NOTIFY_EJECT_REQUEST)
                 acpi_evaluate_hotplug_ost(handle, ACPI_NOTIFY_EJECT_REQUEST,
                                           ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
  
-       if (handler->hotplug.mode == AHM_CONTAINER)
+       if (device->handler && device->handler->hotplug.mode == AHM_CONTAINER)
                 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
  
         error = acpi_scan_hot_remove(device);
@@ -411,8 +404,7 @@ static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data)
                 break;
         case ACPI_NOTIFY_EJECT_REQUEST:
                 acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
-               status = acpi_bus_get_device(handle, &adev);
-               if (ACPI_FAILURE(status))
+               if (acpi_bus_get_device(handle, &adev))
                         goto err_out;
  
                 get_device(&adev->dev);
@@ -1997,6 +1989,7 @@ static int acpi_bus_scan_fixed(void)
                 if (result)
                         return result;
  
+               device->flags.match_driver = true;
                 result = device_attach(&device->dev);
                 if (result < 0)
                         return result;
@@ -2013,6 +2006,7 @@ static int acpi_bus_scan_fixed(void)
                 if (result)
                         return result;
  
+               device->flags.match_driver = true;
                 result = device_attach(&device->dev);
         }
  
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c

index 18dbdff4656e7d6c6b8f665e9fcdb2e22067cf98..995e91bcb97b7b4d5f21d585de1f5a7e46bd9ebd 100644 (file)
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -81,13 +81,6 @@ module_param(brightness_switch_enabled, bool, 0644);
  static bool allow_duplicates;
  module_param(allow_duplicates, bool, 0644);
  
-/*
- * Some BIOSes claim they use minimum backlight at boot,
- * and this may bring dimming screen after boot
- */
-static bool use_bios_initial_backlight = 1;
-module_param(use_bios_initial_backlight, bool, 0644);
-
  /*
   * For Windows 8 systems: if set ture and the GPU driver has
   * registered a backlight interface, skip registering ACPI video's.
@@ -406,12 +399,6 @@ static int __init video_set_bqc_offset(const struct dmi_system_id *d)
         return 0;
  }
  
-static int video_ignore_initial_backlight(const struct dmi_system_id *d)
-{
-       use_bios_initial_backlight = 0;
-       return 0;
-}
-
  static struct dmi_system_id video_dmi_table[] __initdata = {
         /*
          * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121
@@ -456,54 +443,6 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
                 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"),
                 },
         },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Folio 13-2000",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "Fujitsu E753",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Pavilion dm4",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dm4 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Pavilion g6 Notebook PC",
-        .matches = {
-                DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-                DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion g6 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP 1000 Notebook PC",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP 1000 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Pavilion m4",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion m4 Notebook PC"),
-               },
-       },
         {}
  };
  
@@ -839,20 +778,18 @@ acpi_video_init_brightness(struct acpi_video_device *device)
         if (!device->cap._BQC)
                 goto set_level;
  
-       if (use_bios_initial_backlight) {
-               level = acpi_video_bqc_value_to_level(device, level_old);
-               /*
-                * On some buggy laptops, _BQC returns an uninitialized
-                * value when invoked for the first time, i.e.
-                * level_old is invalid (no matter whether it's a level
-                * or an index). Set the backlight to max_level in this case.
-                */
-               for (i = 2; i < br->count; i++)
-                       if (level == br->levels[i])
-                               break;
-               if (i == br->count || !level)
-                       level = max_level;
-       }
+       level = acpi_video_bqc_value_to_level(device, level_old);
+       /*
+        * On some buggy laptops, _BQC returns an uninitialized
+        * value when invoked for the first time, i.e.
+        * level_old is invalid (no matter whether it's a level
+        * or an index). Set the backlight to max_level in this case.
+        */
+       for (i = 2; i < br->count; i++)
+               if (level == br->levels[i])
+                       break;
+       if (i == br->count || !level)
+               level = max_level;
  
  set_level:
         result = acpi_video_device_lcd_set_level(device, level);
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c

index ab714d2ad978644752ca3c9bdff74ae1c9f63934..4372cfa883c9c36cf2624f88186ef3e0e7b27d31 100644 (file)
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -185,7 +185,7 @@ void ata_acpi_bind_port(struct ata_port *ap)
         if (libata_noacpi || ap->flags & ATA_FLAG_ACPI_SATA || !host_handle)
                 return;
  
-       ACPI_HANDLE_SET(&ap->tdev, acpi_get_child(host_handle, ap->port_no));
+       acpi_preset_companion(&ap->tdev, host_handle, ap->port_no);
  
         if (ata_acpi_gtm(ap, &ap->__acpi_init_gtm) == 0)
                 ap->pflags |= ATA_PFLAG_INIT_GTM_VALID;
@@ -222,7 +222,7 @@ void ata_acpi_bind_dev(struct ata_device *dev)
                 parent_handle = port_handle;
         }
  
-       ACPI_HANDLE_SET(&dev->tdev, acpi_get_child(parent_handle, adr));
+       acpi_preset_companion(&dev->tdev, parent_handle, adr);
  
         register_hotplug_dock_device(ata_dev_acpi_handle(dev),
                                      &ata_acpi_dev_dock_ops, dev, NULL, NULL);
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c

index 853f610af28fbc9dff0ee59d1fa1e6688cbfd732..e88690ebfd827b8a02558c4b611b474a4f8c4a1e 100644 (file)
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -396,8 +396,7 @@ dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
         struct dma_async_tx_descriptor *tx;
         struct dma_chan *chan = acdev->dma_chan;
         dma_cookie_t cookie;
-       unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-               DMA_COMPL_SKIP_DEST_UNMAP;
+       unsigned long flags = DMA_PREP_INTERRUPT;
         int ret = 0;
  
         tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c

index 47051cd251132ce97752bc8f8f9748be272c0f35..3a94b799f16640eb6a8e34ef2df78a31e3085e9c 100644 (file)
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -432,7 +432,7 @@ struct platform_device *platform_device_register_full(
                 goto err_alloc;
  
         pdev->dev.parent = pdevinfo->parent;
-       ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle);
+       ACPI_COMPANION_SET(&pdev->dev, pdevinfo->acpi_node.companion);
  
         if (pdevinfo->dma_mask) {
                 /*
@@ -463,7 +463,7 @@ struct platform_device *platform_device_register_full(
         ret = platform_device_add(pdev);
         if (ret) {
  err:
-               ACPI_HANDLE_SET(&pdev->dev, NULL);
+               ACPI_COMPANION_SET(&pdev->dev, NULL);
                 kfree(pdev->dev.dma_mask);
  
  err_alloc:
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c

index c12e9b9556be7d5d49d5d78c9de09eb39ce8a0f8..1b41fca3d65a54545c6c124e0df696998c29a1af 100644 (file)
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1350,6 +1350,9 @@ static int device_prepare(struct device *dev, pm_message_t state)
  
         device_unlock(dev);
  
+       if (error)
+               pm_runtime_put(dev);
+
         return error;
  }
  
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c

index b5d842370cc9e75bfcf67f38ca6c03ddb259e8fe..ea192ec029c45bf7354d8184b44e1eb460cdbf77 100644 (file)
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq)
         blk_end_request_all(rq, 0);
  }
  
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
  
  static void null_ipi_cmd_end_io(void *data)
  {
@@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd)
         put_cpu();
  }
  
-#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+#endif /* CONFIG_SMP */
  
  static inline void null_handle_cmd(struct nullb_cmd *cmd)
  {
@@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
                 end_cmd(cmd);
                 break;
         case NULL_IRQ_SOFTIRQ:
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
                 null_cmd_end_ipi(cmd);
  #else
                 end_cmd(cmd);
@@ -571,7 +571,7 @@ static int __init null_init(void)
  {
         unsigned int i;
  
-#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#if !defined(CONFIG_SMP)
         if (irqmode == NULL_IRQ_SOFTIRQ) {
                 pr_warn("null_blk: softirq completions not available.\n");
                 pr_warn("null_blk: using direct completions.\n");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index 588479d58f52f1496d8395c99ec410a3c1a10da3..6a680d4de7f1c3dcfa7999e45efc98b4b14d99fc 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -199,15 +199,16 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
  
         spin_lock_irqsave(&vblk->vq_lock, flags);
         if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+               virtqueue_kick(vblk->vq);
                 spin_unlock_irqrestore(&vblk->vq_lock, flags);
                 blk_mq_stop_hw_queue(hctx);
-               virtqueue_kick(vblk->vq);
                 return BLK_MQ_RQ_QUEUE_BUSY;
         }
-       spin_unlock_irqrestore(&vblk->vq_lock, flags);
  
         if (last)
                 virtqueue_kick(vblk->vq);
+
+       spin_unlock_irqrestore(&vblk->vq_lock, flags);
         return BLK_MQ_RQ_QUEUE_OK;
  }
  
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig

index 94c0c74434eac641a3ddfcd18324913ecedeaafa..1a65838888cdbb37ec2551fd132c7c91cc66c511 100644 (file)
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -33,6 +33,15 @@ config TCG_TIS
           from within Linux.  To compile this driver as a module, choose
           M here; the module will be called tpm_tis.
  
+config TCG_TIS_I2C_ATMEL
+       tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
+       depends on I2C
+       ---help---
+         If you have an Atmel I2C TPM security chip say Yes and it will be
+         accessible from within Linux.
+         To compile this driver as a module, choose M here; the module will
+         be called tpm_tis_i2c_atmel.
+
  config TCG_TIS_I2C_INFINEON
         tristate "TPM Interface Specification 1.2 Interface (I2C - Infineon)"
         depends on I2C
@@ -42,7 +51,17 @@ config TCG_TIS_I2C_INFINEON
           Specification 0.20 say Yes and it will be accessible from within
           Linux.
           To compile this driver as a module, choose M here; the module
-         will be called tpm_tis_i2c_infineon.
+         will be called tpm_i2c_infineon.
+
+config TCG_TIS_I2C_NUVOTON
+       tristate "TPM Interface Specification 1.2 Interface (I2C - Nuvoton)"
+       depends on I2C
+       ---help---
+         If you have a TPM security chip with an I2C interface from
+         Nuvoton Technology Corp. say Yes and it will be accessible
+         from within Linux.
+         To compile this driver as a module, choose M here; the module
+         will be called tpm_i2c_nuvoton.
  
  config TCG_NSC
         tristate "National Semiconductor TPM Interface"
@@ -82,14 +101,14 @@ config TCG_IBMVTPM
           as a module, choose M here; the module will be called tpm_ibmvtpm.
  
  config TCG_ST33_I2C
-        tristate "STMicroelectronics ST33 I2C TPM"
-        depends on I2C
-        depends on GPIOLIB
-        ---help---
-        If you have a TPM security chip from STMicroelectronics working with
-        an I2C bus say Yes and it will be accessible from within Linux.
-        To compile this driver as a module, choose M here; the module will be
-        called tpm_stm_st33_i2c.
+       tristate "STMicroelectronics ST33 I2C TPM"
+       depends on I2C
+       depends on GPIOLIB
+       ---help---
+         If you have a TPM security chip from STMicroelectronics working with
+         an I2C bus say Yes and it will be accessible from within Linux.
+         To compile this driver as a module, choose M here; the module will be
+         called tpm_stm_st33_i2c.
  
  config TCG_XEN
         tristate "XEN TPM Interface"
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile

index eb41ff97d0ad13f577ff942364ffe8314a5a2999..b80a4000daeee7b72f7f945637544312473edeb6 100644 (file)
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -2,17 +2,20 @@
  # Makefile for the kernel tpm device drivers.
  #
  obj-$(CONFIG_TCG_TPM) += tpm.o
+tpm-y := tpm-interface.o
+tpm-$(CONFIG_ACPI) += tpm_ppi.o
+
  ifdef CONFIG_ACPI
-       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
-       tpm_bios-objs += tpm_eventlog.o tpm_acpi.o tpm_ppi.o
+       tpm-y += tpm_eventlog.o tpm_acpi.o
  else
  ifdef CONFIG_TCG_IBMVTPM
-       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
-       tpm_bios-objs += tpm_eventlog.o tpm_of.o
+       tpm-y += tpm_eventlog.o tpm_of.o
  endif
  endif
  obj-$(CONFIG_TCG_TIS) += tpm_tis.o
+obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
  obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
+obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
  obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
  obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
  obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c

new file mode 100644 (file)

index 0000000..6ae41d3
--- /dev/null
+++ b/drivers/char/tpm/tpm-interface.c
@@ -0,0 +1,1554 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Authors:
+ * Leendert van Doorn <leendert@watson.ibm.com>
+ * Dave Safford <safford@watson.ibm.com>
+ * Reiner Sailer <sailer@watson.ibm.com>
+ * Kylene Hall <kjhall@us.ibm.com>
+ *
+ * Maintained by: <tpmdd-devel@lists.sourceforge.net>
+ *
+ * Device driver for TCG/TCPA TPM (trusted platform module).
+ * Specifications at www.trustedcomputinggroup.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * Note, the TPM chip is not interrupt driven (only polling)
+ * and can have very long timeouts (minutes!). Hence the unusual
+ * calls to msleep.
+ *
+ */
+
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/freezer.h>
+
+#include "tpm.h"
+#include "tpm_eventlog.h"
+
+enum tpm_duration {
+       TPM_SHORT = 0,
+       TPM_MEDIUM = 1,
+       TPM_LONG = 2,
+       TPM_UNDEFINED,
+};
+
+#define TPM_MAX_ORDINAL 243
+#define TSC_MAX_ORDINAL 12
+#define TPM_PROTECTED_COMMAND 0x00
+#define TPM_CONNECTION_COMMAND 0x40
+
+/*
+ * Bug workaround - some TPM's don't flush the most
+ * recently changed pcr on suspend, so force the flush
+ * with an extend to the selected _unused_ non-volatile pcr.
+ */
+static int tpm_suspend_pcr;
+module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
+MODULE_PARM_DESC(suspend_pcr,
+                "PCR to use for dummy writes to faciltate flush on suspend.");
+
+static LIST_HEAD(tpm_chip_list);
+static DEFINE_SPINLOCK(driver_lock);
+static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
+
+/*
+ * Array with one entry per ordinal defining the maximum amount
+ * of time the chip could take to return the result.  The ordinal
+ * designation of short, medium or long is defined in a table in
+ * TCG Specification TPM Main Part 2 TPM Structures Section 17. The
+ * values of the SHORT, MEDIUM, and LONG durations are retrieved
+ * from the chip during initialization with a call to tpm_get_timeouts.
+ */
+static const u8 tpm_ordinal_duration[TPM_MAX_ORDINAL] = {
+       TPM_UNDEFINED,          /* 0 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 5 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 10 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_LONG,
+       TPM_LONG,
+       TPM_MEDIUM,             /* 15 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_LONG,
+       TPM_SHORT,              /* 20 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_SHORT,              /* 25 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,             /* 30 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 35 */
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 40 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 45 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_LONG,
+       TPM_MEDIUM,             /* 50 */
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 55 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 60 */
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,             /* 65 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 70 */
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 75 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_LONG,               /* 80 */
+       TPM_UNDEFINED,
+       TPM_MEDIUM,
+       TPM_LONG,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 85 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 90 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 95 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 100 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 105 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 110 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 115 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_LONG,               /* 120 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_SHORT,              /* 125 */
+       TPM_SHORT,
+       TPM_LONG,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 130 */
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,          /* 135 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 140 */
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 145 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 150 */
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 155 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 160 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 165 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_LONG,               /* 170 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 175 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 180 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,             /* 185 */
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 190 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 195 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 200 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_SHORT,              /* 205 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,             /* 210 */
+       TPM_UNDEFINED,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,          /* 215 */
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_SHORT,              /* 220 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 225 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 230 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 235 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 240 */
+       TPM_UNDEFINED,
+       TPM_MEDIUM,
+};
+
+static void user_reader_timeout(unsigned long ptr)
+{
+       struct tpm_chip *chip = (struct tpm_chip *) ptr;
+
+       schedule_work(&chip->work);
+}
+
+static void timeout_work(struct work_struct *work)
+{
+       struct tpm_chip *chip = container_of(work, struct tpm_chip, work);
+
+       mutex_lock(&chip->buffer_mutex);
+       atomic_set(&chip->data_pending, 0);
+       memset(chip->data_buffer, 0, TPM_BUFSIZE);
+       mutex_unlock(&chip->buffer_mutex);
+}
+
+/*
+ * Returns max number of jiffies to wait
+ */
+unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
+                                          u32 ordinal)
+{
+       int duration_idx = TPM_UNDEFINED;
+       int duration = 0;
+       u8 category = (ordinal >> 24) & 0xFF;
+
+       if ((category == TPM_PROTECTED_COMMAND && ordinal < TPM_MAX_ORDINAL) ||
+           (category == TPM_CONNECTION_COMMAND && ordinal < TSC_MAX_ORDINAL))
+               duration_idx = tpm_ordinal_duration[ordinal];
+
+       if (duration_idx != TPM_UNDEFINED)
+               duration = chip->vendor.duration[duration_idx];
+       if (duration <= 0)
+               return 2 * 60 * HZ;
+       else
+               return duration;
+}
+EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
+
+/*
+ * Internal kernel interface to transmit TPM commands
+ */
+static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+                           size_t bufsiz)
+{
+       ssize_t rc;
+       u32 count, ordinal;
+       unsigned long stop;
+
+       if (bufsiz > TPM_BUFSIZE)
+               bufsiz = TPM_BUFSIZE;
+
+       count = be32_to_cpu(*((__be32 *) (buf + 2)));
+       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+       if (count == 0)
+               return -ENODATA;
+       if (count > bufsiz) {
+               dev_err(chip->dev,
+                       "invalid count value %x %zx\n", count, bufsiz);
+               return -E2BIG;
+       }
+
+       mutex_lock(&chip->tpm_mutex);
+
+       rc = chip->vendor.send(chip, (u8 *) buf, count);
+       if (rc < 0) {
+               dev_err(chip->dev,
+                       "tpm_transmit: tpm_send: error %zd\n", rc);
+               goto out;
+       }
+
+       if (chip->vendor.irq)
+               goto out_recv;
+
+       stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
+       do {
+               u8 status = chip->vendor.status(chip);
+               if ((status & chip->vendor.req_complete_mask) ==
+                   chip->vendor.req_complete_val)
+                       goto out_recv;
+
+               if (chip->vendor.req_canceled(chip, status)) {
+                       dev_err(chip->dev, "Operation Canceled\n");
+                       rc = -ECANCELED;
+                       goto out;
+               }
+
+               msleep(TPM_TIMEOUT);    /* CHECK */
+               rmb();
+       } while (time_before(jiffies, stop));
+
+       chip->vendor.cancel(chip);
+       dev_err(chip->dev, "Operation Timed out\n");
+       rc = -ETIME;
+       goto out;
+
+out_recv:
+       rc = chip->vendor.recv(chip, (u8 *) buf, bufsiz);
+       if (rc < 0)
+               dev_err(chip->dev,
+                       "tpm_transmit: tpm_recv: error %zd\n", rc);
+out:
+       mutex_unlock(&chip->tpm_mutex);
+       return rc;
+}
+
+#define TPM_DIGEST_SIZE 20
+#define TPM_RET_CODE_IDX 6
+
+enum tpm_capabilities {
+       TPM_CAP_FLAG = cpu_to_be32(4),
+       TPM_CAP_PROP = cpu_to_be32(5),
+       CAP_VERSION_1_1 = cpu_to_be32(0x06),
+       CAP_VERSION_1_2 = cpu_to_be32(0x1A)
+};
+
+enum tpm_sub_capabilities {
+       TPM_CAP_PROP_PCR = cpu_to_be32(0x101),
+       TPM_CAP_PROP_MANUFACTURER = cpu_to_be32(0x103),
+       TPM_CAP_FLAG_PERM = cpu_to_be32(0x108),
+       TPM_CAP_FLAG_VOL = cpu_to_be32(0x109),
+       TPM_CAP_PROP_OWNER = cpu_to_be32(0x111),
+       TPM_CAP_PROP_TIS_TIMEOUT = cpu_to_be32(0x115),
+       TPM_CAP_PROP_TIS_DURATION = cpu_to_be32(0x120),
+
+};
+
+static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
+                           int len, const char *desc)
+{
+       int err;
+
+       len = tpm_transmit(chip, (u8 *) cmd, len);
+       if (len <  0)
+               return len;
+       else if (len < TPM_HEADER_SIZE)
+               return -EFAULT;
+
+       err = be32_to_cpu(cmd->header.out.return_code);
+       if (err != 0 && desc)
+               dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
+
+       return err;
+}
+
+#define TPM_INTERNAL_RESULT_SIZE 200
+#define TPM_TAG_RQU_COMMAND cpu_to_be16(193)
+#define TPM_ORD_GET_CAP cpu_to_be32(101)
+#define TPM_ORD_GET_RANDOM cpu_to_be32(70)
+
+static const struct tpm_input_header tpm_getcap_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(22),
+       .ordinal = TPM_ORD_GET_CAP
+};
+
+ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap,
+                  const char *desc)
+{
+       struct tpm_cmd_t tpm_cmd;
+       int rc;
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       tpm_cmd.header.in = tpm_getcap_header;
+       if (subcap_id == CAP_VERSION_1_1 || subcap_id == CAP_VERSION_1_2) {
+               tpm_cmd.params.getcap_in.cap = subcap_id;
+               /*subcap field not necessary */
+               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(0);
+               tpm_cmd.header.in.length -= cpu_to_be32(sizeof(__be32));
+       } else {
+               if (subcap_id == TPM_CAP_FLAG_PERM ||
+                   subcap_id == TPM_CAP_FLAG_VOL)
+                       tpm_cmd.params.getcap_in.cap = TPM_CAP_FLAG;
+               else
+                       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+               tpm_cmd.params.getcap_in.subcap = subcap_id;
+       }
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
+       if (!rc)
+               *cap = tpm_cmd.params.getcap_out.cap;
+       return rc;
+}
+
+void tpm_gen_interrupt(struct tpm_chip *chip)
+{
+       struct  tpm_cmd_t tpm_cmd;
+       ssize_t rc;
+
+       tpm_cmd.header.in = tpm_getcap_header;
+       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+                       "attempting to determine the timeouts");
+}
+EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
+
+#define TPM_ORD_STARTUP cpu_to_be32(153)
+#define TPM_ST_CLEAR cpu_to_be16(1)
+#define TPM_ST_STATE cpu_to_be16(2)
+#define TPM_ST_DEACTIVATED cpu_to_be16(3)
+static const struct tpm_input_header tpm_startup_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(12),
+       .ordinal = TPM_ORD_STARTUP
+};
+
+static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
+{
+       struct tpm_cmd_t start_cmd;
+       start_cmd.header.in = tpm_startup_header;
+       start_cmd.params.startup_in.startup_type = startup_type;
+       return transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
+                           "attempting to start the TPM");
+}
+
+int tpm_get_timeouts(struct tpm_chip *chip)
+{
+       struct tpm_cmd_t tpm_cmd;
+       struct timeout_t *timeout_cap;
+       struct duration_t *duration_cap;
+       ssize_t rc;
+       u32 timeout;
+       unsigned int scale = 1;
+
+       tpm_cmd.header.in = tpm_getcap_header;
+       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
+
+       if (rc == TPM_ERR_INVALID_POSTINIT) {
+               /* The TPM is not started, we are the first to talk to it.
+                  Execute a startup command. */
+               dev_info(chip->dev, "Issuing TPM_STARTUP");
+               if (tpm_startup(chip, TPM_ST_CLEAR))
+                       return rc;
+
+               tpm_cmd.header.in = tpm_getcap_header;
+               tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+               tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+               rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+                                 NULL);
+       }
+       if (rc) {
+               dev_err(chip->dev,
+                       "A TPM error (%zd) occurred attempting to determine the timeouts\n",
+                       rc);
+               goto duration;
+       }
+
+       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+           be32_to_cpu(tpm_cmd.header.out.length)
+           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
+               return -EINVAL;
+
+       timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
+       /* Don't overwrite default if value is 0 */
+       timeout = be32_to_cpu(timeout_cap->a);
+       if (timeout && timeout < 1000) {
+               /* timeouts in msec rather usec */
+               scale = 1000;
+               chip->vendor.timeout_adjusted = true;
+       }
+       if (timeout)
+               chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
+       timeout = be32_to_cpu(timeout_cap->b);
+       if (timeout)
+               chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
+       timeout = be32_to_cpu(timeout_cap->c);
+       if (timeout)
+               chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
+       timeout = be32_to_cpu(timeout_cap->d);
+       if (timeout)
+               chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
+
+duration:
+       tpm_cmd.header.in = tpm_getcap_header;
+       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
+
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+                       "attempting to determine the durations");
+       if (rc)
+               return rc;
+
+       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+           be32_to_cpu(tpm_cmd.header.out.length)
+           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
+               return -EINVAL;
+
+       duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
+       chip->vendor.duration[TPM_SHORT] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
+       chip->vendor.duration[TPM_MEDIUM] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
+       chip->vendor.duration[TPM_LONG] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
+
+       /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
+        * value wrong and apparently reports msecs rather than usecs. So we
+        * fix up the resulting too-small TPM_SHORT value to make things work.
+        * We also scale the TPM_MEDIUM and -_LONG values by 1000.
+        */
+       if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
+               chip->vendor.duration[TPM_SHORT] = HZ;
+               chip->vendor.duration[TPM_MEDIUM] *= 1000;
+               chip->vendor.duration[TPM_LONG] *= 1000;
+               chip->vendor.duration_adjusted = true;
+               dev_info(chip->dev, "Adjusting TPM timeout parameters.");
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_get_timeouts);
+
+#define TPM_ORD_CONTINUE_SELFTEST 83
+#define CONTINUE_SELFTEST_RESULT_SIZE 10
+
+static struct tpm_input_header continue_selftest_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(10),
+       .ordinal = cpu_to_be32(TPM_ORD_CONTINUE_SELFTEST),
+};
+
+/**
+ * tpm_continue_selftest -- run TPM's selftest
+ * @chip: TPM chip to use
+ *
+ * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
+ * a TPM error code.
+ */
+static int tpm_continue_selftest(struct tpm_chip *chip)
+{
+       int rc;
+       struct tpm_cmd_t cmd;
+
+       cmd.header.in = continue_selftest_header;
+       rc = transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
+                         "continue selftest");
+       return rc;
+}
+
+ssize_t tpm_show_enabled(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
+                        "attempting to determine the permanent enabled state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", !cap.perm_flags.disable);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_enabled);
+
+ssize_t tpm_show_active(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
+                        "attempting to determine the permanent active state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", !cap.perm_flags.deactivated);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_active);
+
+ssize_t tpm_show_owned(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_PROP_OWNER, &cap,
+                        "attempting to determine the owner state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", cap.owned);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_owned);
+
+ssize_t tpm_show_temp_deactivated(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_FLAG_VOL, &cap,
+                        "attempting to determine the temporary state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", cap.stclear_flags.deactivated);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_temp_deactivated);
+
+/*
+ * tpm_chip_find_get - return tpm_chip for given chip number
+ */
+static struct tpm_chip *tpm_chip_find_get(int chip_num)
+{
+       struct tpm_chip *pos, *chip = NULL;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
+               if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
+                       continue;
+
+               if (try_module_get(pos->dev->driver->owner)) {
+                       chip = pos;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return chip;
+}
+
+#define TPM_ORDINAL_PCRREAD cpu_to_be32(21)
+#define READ_PCR_RESULT_SIZE 30
+static struct tpm_input_header pcrread_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(14),
+       .ordinal = TPM_ORDINAL_PCRREAD
+};
+
+static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
+{
+       int rc;
+       struct tpm_cmd_t cmd;
+
+       cmd.header.in = pcrread_header;
+       cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
+       rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
+                         "attempting to read a pcr value");
+
+       if (rc == 0)
+               memcpy(res_buf, cmd.params.pcrread_out.pcr_result,
+                      TPM_DIGEST_SIZE);
+       return rc;
+}
+
+/**
+ * tpm_pcr_read - read a pcr value
+ * @chip_num:  tpm idx # or ANY
+ * @pcr_idx:   pcr idx to retrieve
+ * @res_buf:   TPM_PCR value
+ *             size of res_buf is 20 bytes (or NULL if you don't care)
+ *
+ * The TPM driver should be built-in, but for whatever reason it
+ * isn't, protect against the chip disappearing, by incrementing
+ * the module usage count.
+ */
+int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf)
+{
+       struct tpm_chip *chip;
+       int rc;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+       rc = __tpm_pcr_read(chip, pcr_idx, res_buf);
+       tpm_chip_put(chip);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_pcr_read);
+
+/**
+ * tpm_pcr_extend - extend pcr value with hash
+ * @chip_num:  tpm idx # or AN&
+ * @pcr_idx:   pcr idx to extend
+ * @hash:      hash value used to extend pcr value
+ *
+ * The TPM driver should be built-in, but for whatever reason it
+ * isn't, protect against the chip disappearing, by incrementing
+ * the module usage count.
+ */
+#define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
+#define EXTEND_PCR_RESULT_SIZE 34
+static struct tpm_input_header pcrextend_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(34),
+       .ordinal = TPM_ORD_PCR_EXTEND
+};
+
+int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
+{
+       struct tpm_cmd_t cmd;
+       int rc;
+       struct tpm_chip *chip;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+
+       cmd.header.in = pcrextend_header;
+       cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
+       memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
+       rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+                         "attempting extend a PCR value");
+
+       tpm_chip_put(chip);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_pcr_extend);
+
+/**
+ * tpm_do_selftest - have the TPM continue its selftest and wait until it
+ *                   can receive further commands
+ * @chip: TPM chip to use
+ *
+ * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
+ * a TPM error code.
+ */
+int tpm_do_selftest(struct tpm_chip *chip)
+{
+       int rc;
+       unsigned int loops;
+       unsigned int delay_msec = 100;
+       unsigned long duration;
+       struct tpm_cmd_t cmd;
+
+       duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST);
+
+       loops = jiffies_to_msecs(duration) / delay_msec;
+
+       rc = tpm_continue_selftest(chip);
+       /* This may fail if there was no TPM driver during a suspend/resume
+        * cycle; some may return 10 (BAD_ORDINAL), others 28 (FAILEDSELFTEST)
+        */
+       if (rc)
+               return rc;
+
+       do {
+               /* Attempt to read a PCR value */
+               cmd.header.in = pcrread_header;
+               cmd.params.pcrread_in.pcr_idx = cpu_to_be32(0);
+               rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE);
+               /* Some buggy TPMs will not respond to tpm_tis_ready() for
+                * around 300ms while the self test is ongoing, keep trying
+                * until the self test duration expires. */
+               if (rc == -ETIME) {
+                       dev_info(chip->dev, HW_ERR "TPM command timed out during continue self test");
+                       msleep(delay_msec);
+                       continue;
+               }
+
+               if (rc < TPM_HEADER_SIZE)
+                       return -EFAULT;
+
+               rc = be32_to_cpu(cmd.header.out.return_code);
+               if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
+                       dev_info(chip->dev,
+                                "TPM is disabled/deactivated (0x%X)\n", rc);
+                       /* TPM is disabled and/or deactivated; driver can
+                        * proceed and TPM does handle commands for
+                        * suspend/resume correctly
+                        */
+                       return 0;
+               }
+               if (rc != TPM_WARN_DOING_SELFTEST)
+                       return rc;
+               msleep(delay_msec);
+       } while (--loops > 0);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_do_selftest);
+
+int tpm_send(u32 chip_num, void *cmd, size_t buflen)
+{
+       struct tpm_chip *chip;
+       int rc;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+
+       rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
+
+       tpm_chip_put(chip);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_send);
+
+ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
+                     char *buf)
+{
+       cap_t cap;
+       u8 digest[TPM_DIGEST_SIZE];
+       ssize_t rc;
+       int i, j, num_pcrs;
+       char *str = buf;
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       rc = tpm_getcap(dev, TPM_CAP_PROP_PCR, &cap,
+                       "attempting to determine the number of PCRS");
+       if (rc)
+               return 0;
+
+       num_pcrs = be32_to_cpu(cap.num_pcrs);
+       for (i = 0; i < num_pcrs; i++) {
+               rc = __tpm_pcr_read(chip, i, digest);
+               if (rc)
+                       break;
+               str += sprintf(str, "PCR-%02d: ", i);
+               for (j = 0; j < TPM_DIGEST_SIZE; j++)
+                       str += sprintf(str, "%02X ", digest[j]);
+               str += sprintf(str, "\n");
+       }
+       return str - buf;
+}
+EXPORT_SYMBOL_GPL(tpm_show_pcrs);
+
+#define  READ_PUBEK_RESULT_SIZE 314
+#define TPM_ORD_READPUBEK cpu_to_be32(124)
+static struct tpm_input_header tpm_readpubek_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(30),
+       .ordinal = TPM_ORD_READPUBEK
+};
+
+ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
+                      char *buf)
+{
+       u8 *data;
+       struct tpm_cmd_t tpm_cmd;
+       ssize_t err;
+       int i, rc;
+       char *str = buf;
+
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       tpm_cmd.header.in = tpm_readpubek_header;
+       err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
+                       "attempting to read the PUBEK");
+       if (err)
+               goto out;
+
+       /*
+          ignore header 10 bytes
+          algorithm 32 bits (1 == RSA )
+          encscheme 16 bits
+          sigscheme 16 bits
+          parameters (RSA 12->bytes: keybit, #primes, expbit)
+          keylenbytes 32 bits
+          256 byte modulus
+          ignore checksum 20 bytes
+        */
+       data = tpm_cmd.params.readpubek_out_buffer;
+       str +=
+           sprintf(str,
+                   "Algorithm: %02X %02X %02X %02X\n"
+                   "Encscheme: %02X %02X\n"
+                   "Sigscheme: %02X %02X\n"
+                   "Parameters: %02X %02X %02X %02X "
+                   "%02X %02X %02X %02X "
+                   "%02X %02X %02X %02X\n"
+                   "Modulus length: %d\n"
+                   "Modulus:\n",
+                   data[0], data[1], data[2], data[3],
+                   data[4], data[5],
+                   data[6], data[7],
+                   data[12], data[13], data[14], data[15],
+                   data[16], data[17], data[18], data[19],
+                   data[20], data[21], data[22], data[23],
+                   be32_to_cpu(*((__be32 *) (data + 24))));
+
+       for (i = 0; i < 256; i++) {
+               str += sprintf(str, "%02X ", data[i + 28]);
+               if ((i + 1) % 16 == 0)
+                       str += sprintf(str, "\n");
+       }
+out:
+       rc = str - buf;
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_pubek);
+
+
+ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
+                     char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+       char *str = buf;
+
+       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
+                       "attempting to determine the manufacturer");
+       if (rc)
+               return 0;
+       str += sprintf(str, "Manufacturer: 0x%x\n",
+                      be32_to_cpu(cap.manufacturer_id));
+
+       /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */
+       rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap,
+                        "attempting to determine the 1.2 version");
+       if (!rc) {
+               str += sprintf(str,
+                              "TCG version: %d.%d\nFirmware version: %d.%d\n",
+                              cap.tpm_version_1_2.Major,
+                              cap.tpm_version_1_2.Minor,
+                              cap.tpm_version_1_2.revMajor,
+                              cap.tpm_version_1_2.revMinor);
+       } else {
+               /* Otherwise just use TPM_STRUCT_VER */
+               rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
+                               "attempting to determine the 1.1 version");
+               if (rc)
+                       return 0;
+               str += sprintf(str,
+                              "TCG version: %d.%d\nFirmware version: %d.%d\n",
+                              cap.tpm_version.Major,
+                              cap.tpm_version.Minor,
+                              cap.tpm_version.revMajor,
+                              cap.tpm_version.revMinor);
+       }
+
+       return str - buf;
+}
+EXPORT_SYMBOL_GPL(tpm_show_caps);
+
+ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip->vendor.duration[TPM_LONG] == 0)
+               return 0;
+
+       return sprintf(buf, "%d %d %d [%s]\n",
+                      jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
+                      jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
+                      jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
+                      chip->vendor.duration_adjusted
+                      ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_durations);
+
+ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d %d %d %d [%s]\n",
+                      jiffies_to_usecs(chip->vendor.timeout_a),
+                      jiffies_to_usecs(chip->vendor.timeout_b),
+                      jiffies_to_usecs(chip->vendor.timeout_c),
+                      jiffies_to_usecs(chip->vendor.timeout_d),
+                      chip->vendor.timeout_adjusted
+                      ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_timeouts);
+
+ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
+                       const char *buf, size_t count)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+       if (chip == NULL)
+               return 0;
+
+       chip->vendor.cancel(chip);
+       return count;
+}
+EXPORT_SYMBOL_GPL(tpm_store_cancel);
+
+static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask,
+                                       bool check_cancel, bool *canceled)
+{
+       u8 status = chip->vendor.status(chip);
+
+       *canceled = false;
+       if ((status & mask) == mask)
+               return true;
+       if (check_cancel && chip->vendor.req_canceled(chip, status)) {
+               *canceled = true;
+               return true;
+       }
+       return false;
+}
+
+int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
+                     wait_queue_head_t *queue, bool check_cancel)
+{
+       unsigned long stop;
+       long rc;
+       u8 status;
+       bool canceled = false;
+
+       /* check current status */
+       status = chip->vendor.status(chip);
+       if ((status & mask) == mask)
+               return 0;
+
+       stop = jiffies + timeout;
+
+       if (chip->vendor.irq) {
+again:
+               timeout = stop - jiffies;
+               if ((long)timeout <= 0)
+                       return -ETIME;
+               rc = wait_event_interruptible_timeout(*queue,
+                       wait_for_tpm_stat_cond(chip, mask, check_cancel,
+                                              &canceled),
+                       timeout);
+               if (rc > 0) {
+                       if (canceled)
+                               return -ECANCELED;
+                       return 0;
+               }
+               if (rc == -ERESTARTSYS && freezing(current)) {
+                       clear_thread_flag(TIF_SIGPENDING);
+                       goto again;
+               }
+       } else {
+               do {
+                       msleep(TPM_TIMEOUT);
+                       status = chip->vendor.status(chip);
+                       if ((status & mask) == mask)
+                               return 0;
+               } while (time_before(jiffies, stop));
+       }
+       return -ETIME;
+}
+EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
+/*
+ * Device file system interface to the TPM
+ *
+ * It's assured that the chip will be opened just once,
+ * by the check of is_open variable, which is protected
+ * by driver_lock.
+ */
+int tpm_open(struct inode *inode, struct file *file)
+{
+       struct miscdevice *misc = file->private_data;
+       struct tpm_chip *chip = container_of(misc, struct tpm_chip,
+                                            vendor.miscdev);
+
+       if (test_and_set_bit(0, &chip->is_open)) {
+               dev_dbg(chip->dev, "Another process owns this TPM\n");
+               return -EBUSY;
+       }
+
+       chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL);
+       if (chip->data_buffer == NULL) {
+               clear_bit(0, &chip->is_open);
+               return -ENOMEM;
+       }
+
+       atomic_set(&chip->data_pending, 0);
+
+       file->private_data = chip;
+       get_device(chip->dev);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_open);
+
+/*
+ * Called on file close
+ */
+int tpm_release(struct inode *inode, struct file *file)
+{
+       struct tpm_chip *chip = file->private_data;
+
+       del_singleshot_timer_sync(&chip->user_read_timer);
+       flush_work(&chip->work);
+       file->private_data = NULL;
+       atomic_set(&chip->data_pending, 0);
+       kzfree(chip->data_buffer);
+       clear_bit(0, &chip->is_open);
+       put_device(chip->dev);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_release);
+
+ssize_t tpm_write(struct file *file, const char __user *buf,
+                 size_t size, loff_t *off)
+{
+       struct tpm_chip *chip = file->private_data;
+       size_t in_size = size;
+       ssize_t out_size;
+
+       /* cannot perform a write until the read has cleared
+          either via tpm_read or a user_read_timer timeout.
+          This also prevents splitted buffered writes from blocking here.
+       */
+       if (atomic_read(&chip->data_pending) != 0)
+               return -EBUSY;
+
+       if (in_size > TPM_BUFSIZE)
+               return -E2BIG;
+
+       mutex_lock(&chip->buffer_mutex);
+
+       if (copy_from_user
+           (chip->data_buffer, (void __user *) buf, in_size)) {
+               mutex_unlock(&chip->buffer_mutex);
+               return -EFAULT;
+       }
+
+       /* atomic tpm command send and result receive */
+       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
+       if (out_size < 0) {
+               mutex_unlock(&chip->buffer_mutex);
+               return out_size;
+       }
+
+       atomic_set(&chip->data_pending, out_size);
+       mutex_unlock(&chip->buffer_mutex);
+
+       /* Set a timeout by which the reader must come claim the result */
+       mod_timer(&chip->user_read_timer, jiffies + (60 * HZ));
+
+       return in_size;
+}
+EXPORT_SYMBOL_GPL(tpm_write);
+
+ssize_t tpm_read(struct file *file, char __user *buf,
+                size_t size, loff_t *off)
+{
+       struct tpm_chip *chip = file->private_data;
+       ssize_t ret_size;
+       int rc;
+
+       del_singleshot_timer_sync(&chip->user_read_timer);
+       flush_work(&chip->work);
+       ret_size = atomic_read(&chip->data_pending);
+       if (ret_size > 0) {     /* relay data */
+               ssize_t orig_ret_size = ret_size;
+               if (size < ret_size)
+                       ret_size = size;
+
+               mutex_lock(&chip->buffer_mutex);
+               rc = copy_to_user(buf, chip->data_buffer, ret_size);
+               memset(chip->data_buffer, 0, orig_ret_size);
+               if (rc)
+                       ret_size = -EFAULT;
+
+               mutex_unlock(&chip->buffer_mutex);
+       }
+
+       atomic_set(&chip->data_pending, 0);
+
+       return ret_size;
+}
+EXPORT_SYMBOL_GPL(tpm_read);
+
+void tpm_remove_hardware(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip == NULL) {
+               dev_err(dev, "No device data found\n");
+               return;
+       }
+
+       spin_lock(&driver_lock);
+       list_del_rcu(&chip->list);
+       spin_unlock(&driver_lock);
+       synchronize_rcu();
+
+       misc_deregister(&chip->vendor.miscdev);
+       sysfs_remove_group(&dev->kobj, chip->vendor.attr_group);
+       tpm_remove_ppi(&dev->kobj);
+       tpm_bios_log_teardown(chip->bios_dir);
+
+       /* write it this way to be explicit (chip->dev == dev) */
+       put_device(chip->dev);
+}
+EXPORT_SYMBOL_GPL(tpm_remove_hardware);
+
+#define TPM_ORD_SAVESTATE cpu_to_be32(152)
+#define SAVESTATE_RESULT_SIZE 10
+
+static struct tpm_input_header savestate_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(10),
+       .ordinal = TPM_ORD_SAVESTATE
+};
+
+/*
+ * We are about to suspend. Save the TPM state
+ * so that it can be restored.
+ */
+int tpm_pm_suspend(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+       struct tpm_cmd_t cmd;
+       int rc, try;
+
+       u8 dummy_hash[TPM_DIGEST_SIZE] = { 0 };
+
+       if (chip == NULL)
+               return -ENODEV;
+
+       /* for buggy tpm, flush pcrs with extend to selected dummy */
+       if (tpm_suspend_pcr) {
+               cmd.header.in = pcrextend_header;
+               cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
+               memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
+                      TPM_DIGEST_SIZE);
+               rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+                                 "extending dummy pcr before suspend");
+       }
+
+       /* now do the actual savestate */
+       for (try = 0; try < TPM_RETRY; try++) {
+               cmd.header.in = savestate_header;
+               rc = transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
+
+               /*
+                * If the TPM indicates that it is too busy to respond to
+                * this command then retry before giving up.  It can take
+                * several seconds for this TPM to be ready.
+                *
+                * This can happen if the TPM has already been sent the
+                * SaveState command before the driver has loaded.  TCG 1.2
+                * specification states that any communication after SaveState
+                * may cause the TPM to invalidate previously saved state.
+                */
+               if (rc != TPM_WARN_RETRY)
+                       break;
+               msleep(TPM_TIMEOUT_RETRY);
+       }
+
+       if (rc)
+               dev_err(chip->dev,
+                       "Error (%d) sending savestate before suspend\n", rc);
+       else if (try > 0)
+               dev_warn(chip->dev, "TPM savestate took %dms\n",
+                        try * TPM_TIMEOUT_RETRY);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_pm_suspend);
+
+/*
+ * Resume from a power safe. The BIOS already restored
+ * the TPM state.
+ */
+int tpm_pm_resume(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip == NULL)
+               return -ENODEV;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_pm_resume);
+
+#define TPM_GETRANDOM_RESULT_SIZE      18
+static struct tpm_input_header tpm_getrandom_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(14),
+       .ordinal = TPM_ORD_GET_RANDOM
+};
+
+/**
+ * tpm_get_random() - Get random bytes from the tpm's RNG
+ * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+ * @out: destination buffer for the random bytes
+ * @max: the max number of bytes to write to @out
+ *
+ * Returns < 0 on error and the number of bytes read on success
+ */
+int tpm_get_random(u32 chip_num, u8 *out, size_t max)
+{
+       struct tpm_chip *chip;
+       struct tpm_cmd_t tpm_cmd;
+       u32 recd, num_bytes = min_t(u32, max, TPM_MAX_RNG_DATA);
+       int err, total = 0, retries = 5;
+       u8 *dest = out;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+
+       if (!out || !num_bytes || max > TPM_MAX_RNG_DATA)
+               return -EINVAL;
+
+       do {
+               tpm_cmd.header.in = tpm_getrandom_header;
+               tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
+
+               err = transmit_cmd(chip, &tpm_cmd,
+                                  TPM_GETRANDOM_RESULT_SIZE + num_bytes,
+                                  "attempting get random");
+               if (err)
+                       break;
+
+               recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
+               memcpy(dest, tpm_cmd.params.getrandom_out.rng_data, recd);
+
+               dest += recd;
+               total += recd;
+               num_bytes -= recd;
+       } while (retries-- && total < max);
+
+       return total ? total : -EIO;
+}
+EXPORT_SYMBOL_GPL(tpm_get_random);
+
+/* In case vendor provided release function, call it too.*/
+
+void tpm_dev_vendor_release(struct tpm_chip *chip)
+{
+       if (!chip)
+               return;
+
+       if (chip->vendor.release)
+               chip->vendor.release(chip->dev);
+
+       clear_bit(chip->dev_num, dev_mask);
+}
+EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
+
+
+/*
+ * Once all references to platform device are down to 0,
+ * release all allocated structures.
+ */
+void tpm_dev_release(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (!chip)
+               return;
+
+       tpm_dev_vendor_release(chip);
+
+       chip->release(dev);
+       kfree(chip);
+}
+EXPORT_SYMBOL_GPL(tpm_dev_release);
+
+/*
+ * Called from tpm_<specific>.c probe function only for devices
+ * the driver has determined it should claim.  Prior to calling
+ * this function the specific probe function has called pci_enable_device
+ * upon errant exit from this function specific probe function should call
+ * pci_disable_device
+ */
+struct tpm_chip *tpm_register_hardware(struct device *dev,
+                                       const struct tpm_vendor_specific *entry)
+{
+       struct tpm_chip *chip;
+
+       /* Driver specific per-device data */
+       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+
+       if (chip == NULL)
+               return NULL;
+
+       mutex_init(&chip->buffer_mutex);
+       mutex_init(&chip->tpm_mutex);
+       INIT_LIST_HEAD(&chip->list);
+
+       INIT_WORK(&chip->work, timeout_work);
+
+       setup_timer(&chip->user_read_timer, user_reader_timeout,
+                       (unsigned long)chip);
+
+       memcpy(&chip->vendor, entry, sizeof(struct tpm_vendor_specific));
+
+       chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES);
+
+       if (chip->dev_num >= TPM_NUM_DEVICES) {
+               dev_err(dev, "No available tpm device numbers\n");
+               goto out_free;
+       } else if (chip->dev_num == 0)
+               chip->vendor.miscdev.minor = TPM_MINOR;
+       else
+               chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR;
+
+       set_bit(chip->dev_num, dev_mask);
+
+       scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm",
+                 chip->dev_num);
+       chip->vendor.miscdev.name = chip->devname;
+
+       chip->vendor.miscdev.parent = dev;
+       chip->dev = get_device(dev);
+       chip->release = dev->release;
+       dev->release = tpm_dev_release;
+       dev_set_drvdata(dev, chip);
+
+       if (misc_register(&chip->vendor.miscdev)) {
+               dev_err(chip->dev,
+                       "unable to misc_register %s, minor %d\n",
+                       chip->vendor.miscdev.name,
+                       chip->vendor.miscdev.minor);
+               goto put_device;
+       }
+
+       if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) {
+               misc_deregister(&chip->vendor.miscdev);
+               goto put_device;
+       }
+
+       if (tpm_add_ppi(&dev->kobj)) {
+               misc_deregister(&chip->vendor.miscdev);
+               goto put_device;
+       }
+
+       chip->bios_dir = tpm_bios_log_setup(chip->devname);
+
+       /* Make chip available */
+       spin_lock(&driver_lock);
+       list_add_rcu(&chip->list, &tpm_chip_list);
+       spin_unlock(&driver_lock);
+
+       return chip;
+
+put_device:
+       put_device(chip->dev);
+out_free:
+       kfree(chip);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(tpm_register_hardware);
+
+MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_DESCRIPTION("TPM Driver");
+MODULE_VERSION("2.0");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c

deleted file mode 100644 (file)

index e3c974a..0000000
--- a/drivers/char/tpm/tpm.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@watson.ibm.com>
- * Dave Safford <safford@watson.ibm.com>
- * Reiner Sailer <sailer@watson.ibm.com>
- * Kylene Hall <kjhall@us.ibm.com>
- *
- * Maintained by: <tpmdd-devel@lists.sourceforge.net>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org      
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- * 
- * Note, the TPM chip is not interrupt driven (only polling)
- * and can have very long timeouts (minutes!). Hence the unusual
- * calls to msleep.
- *
- */
-
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/freezer.h>
-
-#include "tpm.h"
-#include "tpm_eventlog.h"
-
-enum tpm_duration {
-       TPM_SHORT = 0,
-       TPM_MEDIUM = 1,
-       TPM_LONG = 2,
-       TPM_UNDEFINED,
-};
-
-#define TPM_MAX_ORDINAL 243
-#define TSC_MAX_ORDINAL 12
-#define TPM_PROTECTED_COMMAND 0x00
-#define TPM_CONNECTION_COMMAND 0x40
-
-/*
- * Bug workaround - some TPM's don't flush the most
- * recently changed pcr on suspend, so force the flush
- * with an extend to the selected _unused_ non-volatile pcr.
- */
-static int tpm_suspend_pcr;
-module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
-MODULE_PARM_DESC(suspend_pcr,
-                "PCR to use for dummy writes to faciltate flush on suspend.");
-
-static LIST_HEAD(tpm_chip_list);
-static DEFINE_SPINLOCK(driver_lock);
-static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
-
-/*
- * Array with one entry per ordinal defining the maximum amount
- * of time the chip could take to return the result.  The ordinal
- * designation of short, medium or long is defined in a table in
- * TCG Specification TPM Main Part 2 TPM Structures Section 17. The
- * values of the SHORT, MEDIUM, and LONG durations are retrieved
- * from the chip during initialization with a call to tpm_get_timeouts.
- */
-static const u8 tpm_ordinal_duration[TPM_MAX_ORDINAL] = {
-       TPM_UNDEFINED,          /* 0 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 5 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 10 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_LONG,
-       TPM_LONG,
-       TPM_MEDIUM,             /* 15 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_LONG,
-       TPM_SHORT,              /* 20 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_SHORT,              /* 25 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,             /* 30 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 35 */
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 40 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 45 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_LONG,
-       TPM_MEDIUM,             /* 50 */
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 55 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 60 */
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,             /* 65 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 70 */
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 75 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_LONG,               /* 80 */
-       TPM_UNDEFINED,
-       TPM_MEDIUM,
-       TPM_LONG,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 85 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 90 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 95 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 100 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 105 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 110 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 115 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_LONG,               /* 120 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_SHORT,              /* 125 */
-       TPM_SHORT,
-       TPM_LONG,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 130 */
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,          /* 135 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 140 */
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 145 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 150 */
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 155 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 160 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 165 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_LONG,               /* 170 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 175 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 180 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,             /* 185 */
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 190 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 195 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 200 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_SHORT,              /* 205 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,             /* 210 */
-       TPM_UNDEFINED,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,          /* 215 */
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_SHORT,              /* 220 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 225 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 230 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 235 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 240 */
-       TPM_UNDEFINED,
-       TPM_MEDIUM,
-};
-
-static void user_reader_timeout(unsigned long ptr)
-{
-       struct tpm_chip *chip = (struct tpm_chip *) ptr;
-
-       schedule_work(&chip->work);
-}
-
-static void timeout_work(struct work_struct *work)
-{
-       struct tpm_chip *chip = container_of(work, struct tpm_chip, work);
-
-       mutex_lock(&chip->buffer_mutex);
-       atomic_set(&chip->data_pending, 0);
-       memset(chip->data_buffer, 0, TPM_BUFSIZE);
-       mutex_unlock(&chip->buffer_mutex);
-}
-
-/*
- * Returns max number of jiffies to wait
- */
-unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
-                                          u32 ordinal)
-{
-       int duration_idx = TPM_UNDEFINED;
-       int duration = 0;
-       u8 category = (ordinal >> 24) & 0xFF;
-
-       if ((category == TPM_PROTECTED_COMMAND && ordinal < TPM_MAX_ORDINAL) ||
-           (category == TPM_CONNECTION_COMMAND && ordinal < TSC_MAX_ORDINAL))
-               duration_idx = tpm_ordinal_duration[ordinal];
-
-       if (duration_idx != TPM_UNDEFINED)
-               duration = chip->vendor.duration[duration_idx];
-       if (duration <= 0)
-               return 2 * 60 * HZ;
-       else
-               return duration;
-}
-EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
-
-/*
- * Internal kernel interface to transmit TPM commands
- */
-static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
-                           size_t bufsiz)
-{
-       ssize_t rc;
-       u32 count, ordinal;
-       unsigned long stop;
-
-       if (bufsiz > TPM_BUFSIZE)
-               bufsiz = TPM_BUFSIZE;
-
-       count = be32_to_cpu(*((__be32 *) (buf + 2)));
-       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
-       if (count == 0)
-               return -ENODATA;
-       if (count > bufsiz) {
-               dev_err(chip->dev,
-                       "invalid count value %x %zx \n", count, bufsiz);
-               return -E2BIG;
-       }
-
-       mutex_lock(&chip->tpm_mutex);
-
-       if ((rc = chip->vendor.send(chip, (u8 *) buf, count)) < 0) {
-               dev_err(chip->dev,
-                       "tpm_transmit: tpm_send: error %zd\n", rc);
-               goto out;
-       }
-
-       if (chip->vendor.irq)
-               goto out_recv;
-
-       stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
-       do {
-               u8 status = chip->vendor.status(chip);
-               if ((status & chip->vendor.req_complete_mask) ==
-                   chip->vendor.req_complete_val)
-                       goto out_recv;
-
-               if (chip->vendor.req_canceled(chip, status)) {
-                       dev_err(chip->dev, "Operation Canceled\n");
-                       rc = -ECANCELED;
-                       goto out;
-               }
-
-               msleep(TPM_TIMEOUT);    /* CHECK */
-               rmb();
-       } while (time_before(jiffies, stop));
-
-       chip->vendor.cancel(chip);
-       dev_err(chip->dev, "Operation Timed out\n");
-       rc = -ETIME;
-       goto out;
-
-out_recv:
-       rc = chip->vendor.recv(chip, (u8 *) buf, bufsiz);
-       if (rc < 0)
-               dev_err(chip->dev,
-                       "tpm_transmit: tpm_recv: error %zd\n", rc);
-out:
-       mutex_unlock(&chip->tpm_mutex);
-       return rc;
-}
-
-#define TPM_DIGEST_SIZE 20
-#define TPM_RET_CODE_IDX 6
-
-enum tpm_capabilities {
-       TPM_CAP_FLAG = cpu_to_be32(4),
-       TPM_CAP_PROP = cpu_to_be32(5),
-       CAP_VERSION_1_1 = cpu_to_be32(0x06),
-       CAP_VERSION_1_2 = cpu_to_be32(0x1A)
-};
-
-enum tpm_sub_capabilities {
-       TPM_CAP_PROP_PCR = cpu_to_be32(0x101),
-       TPM_CAP_PROP_MANUFACTURER = cpu_to_be32(0x103),
-       TPM_CAP_FLAG_PERM = cpu_to_be32(0x108),
-       TPM_CAP_FLAG_VOL = cpu_to_be32(0x109),
-       TPM_CAP_PROP_OWNER = cpu_to_be32(0x111),
-       TPM_CAP_PROP_TIS_TIMEOUT = cpu_to_be32(0x115),
-       TPM_CAP_PROP_TIS_DURATION = cpu_to_be32(0x120),
-
-};
-
-static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
-                           int len, const char *desc)
-{
-       int err;
-
-       len = tpm_transmit(chip,(u8 *) cmd, len);
-       if (len <  0)
-               return len;
-       else if (len < TPM_HEADER_SIZE)
-               return -EFAULT;
-
-       err = be32_to_cpu(cmd->header.out.return_code);
-       if (err != 0 && desc)
-               dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
-
-       return err;
-}
-
-#define TPM_INTERNAL_RESULT_SIZE 200
-#define TPM_TAG_RQU_COMMAND cpu_to_be16(193)
-#define TPM_ORD_GET_CAP cpu_to_be32(101)
-#define TPM_ORD_GET_RANDOM cpu_to_be32(70)
-
-static const struct tpm_input_header tpm_getcap_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(22),
-       .ordinal = TPM_ORD_GET_CAP
-};
-
-ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap,
-                  const char *desc)
-{
-       struct tpm_cmd_t tpm_cmd;
-       int rc;
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       tpm_cmd.header.in = tpm_getcap_header;
-       if (subcap_id == CAP_VERSION_1_1 || subcap_id == CAP_VERSION_1_2) {
-               tpm_cmd.params.getcap_in.cap = subcap_id;
-               /*subcap field not necessary */
-               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(0);
-               tpm_cmd.header.in.length -= cpu_to_be32(sizeof(__be32));
-       } else {
-               if (subcap_id == TPM_CAP_FLAG_PERM ||
-                   subcap_id == TPM_CAP_FLAG_VOL)
-                       tpm_cmd.params.getcap_in.cap = TPM_CAP_FLAG;
-               else
-                       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-               tpm_cmd.params.getcap_in.subcap = subcap_id;
-       }
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
-       if (!rc)
-               *cap = tpm_cmd.params.getcap_out.cap;
-       return rc;
-}
-
-void tpm_gen_interrupt(struct tpm_chip *chip)
-{
-       struct  tpm_cmd_t tpm_cmd;
-       ssize_t rc;
-
-       tpm_cmd.header.in = tpm_getcap_header;
-       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-                       "attempting to determine the timeouts");
-}
-EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
-
-#define TPM_ORD_STARTUP cpu_to_be32(153)
-#define TPM_ST_CLEAR cpu_to_be16(1)
-#define TPM_ST_STATE cpu_to_be16(2)
-#define TPM_ST_DEACTIVATED cpu_to_be16(3)
-static const struct tpm_input_header tpm_startup_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(12),
-       .ordinal = TPM_ORD_STARTUP
-};
-
-static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
-{
-       struct tpm_cmd_t start_cmd;
-       start_cmd.header.in = tpm_startup_header;
-       start_cmd.params.startup_in.startup_type = startup_type;
-       return transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
-                           "attempting to start the TPM");
-}
-
-int tpm_get_timeouts(struct tpm_chip *chip)
-{
-       struct tpm_cmd_t tpm_cmd;
-       struct timeout_t *timeout_cap;
-       struct duration_t *duration_cap;
-       ssize_t rc;
-       u32 timeout;
-       unsigned int scale = 1;
-
-       tpm_cmd.header.in = tpm_getcap_header;
-       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
-
-       if (rc == TPM_ERR_INVALID_POSTINIT) {
-               /* The TPM is not started, we are the first to talk to it.
-                  Execute a startup command. */
-               dev_info(chip->dev, "Issuing TPM_STARTUP");
-               if (tpm_startup(chip, TPM_ST_CLEAR))
-                       return rc;
-
-               tpm_cmd.header.in = tpm_getcap_header;
-               tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-               tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-               rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-                                 NULL);
-       }
-       if (rc) {
-               dev_err(chip->dev,
-                       "A TPM error (%zd) occurred attempting to determine the timeouts\n",
-                       rc);
-               goto duration;
-       }
-
-       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
-           be32_to_cpu(tpm_cmd.header.out.length)
-           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
-               return -EINVAL;
-
-       timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
-       /* Don't overwrite default if value is 0 */
-       timeout = be32_to_cpu(timeout_cap->a);
-       if (timeout && timeout < 1000) {
-               /* timeouts in msec rather usec */
-               scale = 1000;
-               chip->vendor.timeout_adjusted = true;
-       }
-       if (timeout)
-               chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
-       timeout = be32_to_cpu(timeout_cap->b);
-       if (timeout)
-               chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
-       timeout = be32_to_cpu(timeout_cap->c);
-       if (timeout)
-               chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
-       timeout = be32_to_cpu(timeout_cap->d);
-       if (timeout)
-               chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
-
-duration:
-       tpm_cmd.header.in = tpm_getcap_header;
-       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
-
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-                       "attempting to determine the durations");
-       if (rc)
-               return rc;
-
-       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
-           be32_to_cpu(tpm_cmd.header.out.length)
-           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
-               return -EINVAL;
-
-       duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
-       chip->vendor.duration[TPM_SHORT] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
-       chip->vendor.duration[TPM_MEDIUM] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
-       chip->vendor.duration[TPM_LONG] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
-
-       /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
-        * value wrong and apparently reports msecs rather than usecs. So we
-        * fix up the resulting too-small TPM_SHORT value to make things work.
-        * We also scale the TPM_MEDIUM and -_LONG values by 1000.
-        */
-       if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
-               chip->vendor.duration[TPM_SHORT] = HZ;
-               chip->vendor.duration[TPM_MEDIUM] *= 1000;
-               chip->vendor.duration[TPM_LONG] *= 1000;
-               chip->vendor.duration_adjusted = true;
-               dev_info(chip->dev, "Adjusting TPM timeout parameters.");
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_get_timeouts);
-
-#define TPM_ORD_CONTINUE_SELFTEST 83
-#define CONTINUE_SELFTEST_RESULT_SIZE 10
-
-static struct tpm_input_header continue_selftest_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(10),
-       .ordinal = cpu_to_be32(TPM_ORD_CONTINUE_SELFTEST),
-};
-
-/**
- * tpm_continue_selftest -- run TPM's selftest
- * @chip: TPM chip to use
- *
- * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
- * a TPM error code.
- */
-static int tpm_continue_selftest(struct tpm_chip *chip)
-{
-       int rc;
-       struct tpm_cmd_t cmd;
-
-       cmd.header.in = continue_selftest_header;
-       rc = transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
-                         "continue selftest");
-       return rc;
-}
-
-ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr,
-                       char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
-                        "attempting to determine the permanent enabled state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", !cap.perm_flags.disable);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_enabled);
-
-ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr,
-                       char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
-                        "attempting to determine the permanent active state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", !cap.perm_flags.deactivated);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_active);
-
-ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr,
-                       char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_OWNER, &cap,
-                        "attempting to determine the owner state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", cap.owned);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_owned);
-
-ssize_t tpm_show_temp_deactivated(struct device * dev,
-                               struct device_attribute * attr, char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_FLAG_VOL, &cap,
-                        "attempting to determine the temporary state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", cap.stclear_flags.deactivated);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_temp_deactivated);
-
-/*
- * tpm_chip_find_get - return tpm_chip for given chip number
- */
-static struct tpm_chip *tpm_chip_find_get(int chip_num)
-{
-       struct tpm_chip *pos, *chip = NULL;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
-               if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
-                       continue;
-
-               if (try_module_get(pos->dev->driver->owner)) {
-                       chip = pos;
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       return chip;
-}
-
-#define TPM_ORDINAL_PCRREAD cpu_to_be32(21)
-#define READ_PCR_RESULT_SIZE 30
-static struct tpm_input_header pcrread_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(14),
-       .ordinal = TPM_ORDINAL_PCRREAD
-};
-
-static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
-{
-       int rc;
-       struct tpm_cmd_t cmd;
-
-       cmd.header.in = pcrread_header;
-       cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-       rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
-                         "attempting to read a pcr value");
-
-       if (rc == 0)
-               memcpy(res_buf, cmd.params.pcrread_out.pcr_result,
-                      TPM_DIGEST_SIZE);
-       return rc;
-}
-
-/**
- * tpm_pcr_read - read a pcr value
- * @chip_num:  tpm idx # or ANY
- * @pcr_idx:   pcr idx to retrieve
- * @res_buf:   TPM_PCR value
- *             size of res_buf is 20 bytes (or NULL if you don't care)
- *
- * The TPM driver should be built-in, but for whatever reason it
- * isn't, protect against the chip disappearing, by incrementing
- * the module usage count.
- */
-int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf)
-{
-       struct tpm_chip *chip;
-       int rc;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-       rc = __tpm_pcr_read(chip, pcr_idx, res_buf);
-       tpm_chip_put(chip);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_pcr_read);
-
-/**
- * tpm_pcr_extend - extend pcr value with hash
- * @chip_num:  tpm idx # or AN&
- * @pcr_idx:   pcr idx to extend
- * @hash:      hash value used to extend pcr value
- *
- * The TPM driver should be built-in, but for whatever reason it
- * isn't, protect against the chip disappearing, by incrementing
- * the module usage count.
- */
-#define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
-#define EXTEND_PCR_RESULT_SIZE 34
-static struct tpm_input_header pcrextend_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(34),
-       .ordinal = TPM_ORD_PCR_EXTEND
-};
-
-int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
-{
-       struct tpm_cmd_t cmd;
-       int rc;
-       struct tpm_chip *chip;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-
-       cmd.header.in = pcrextend_header;
-       cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
-       memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
-       rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
-                         "attempting extend a PCR value");
-
-       tpm_chip_put(chip);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_pcr_extend);
-
-/**
- * tpm_do_selftest - have the TPM continue its selftest and wait until it
- *                   can receive further commands
- * @chip: TPM chip to use
- *
- * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
- * a TPM error code.
- */
-int tpm_do_selftest(struct tpm_chip *chip)
-{
-       int rc;
-       unsigned int loops;
-       unsigned int delay_msec = 100;
-       unsigned long duration;
-       struct tpm_cmd_t cmd;
-
-       duration = tpm_calc_ordinal_duration(chip,
-                                            TPM_ORD_CONTINUE_SELFTEST);
-
-       loops = jiffies_to_msecs(duration) / delay_msec;
-
-       rc = tpm_continue_selftest(chip);
-       /* This may fail if there was no TPM driver during a suspend/resume
-        * cycle; some may return 10 (BAD_ORDINAL), others 28 (FAILEDSELFTEST)
-        */
-       if (rc)
-               return rc;
-
-       do {
-               /* Attempt to read a PCR value */
-               cmd.header.in = pcrread_header;
-               cmd.params.pcrread_in.pcr_idx = cpu_to_be32(0);
-               rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE);
-               /* Some buggy TPMs will not respond to tpm_tis_ready() for
-                * around 300ms while the self test is ongoing, keep trying
-                * until the self test duration expires. */
-               if (rc == -ETIME) {
-                       dev_info(chip->dev, HW_ERR "TPM command timed out during continue self test");
-                       msleep(delay_msec);
-                       continue;
-               }
-
-               if (rc < TPM_HEADER_SIZE)
-                       return -EFAULT;
-
-               rc = be32_to_cpu(cmd.header.out.return_code);
-               if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
-                       dev_info(chip->dev,
-                                "TPM is disabled/deactivated (0x%X)\n", rc);
-                       /* TPM is disabled and/or deactivated; driver can
-                        * proceed and TPM does handle commands for
-                        * suspend/resume correctly
-                        */
-                       return 0;
-               }
-               if (rc != TPM_WARN_DOING_SELFTEST)
-                       return rc;
-               msleep(delay_msec);
-       } while (--loops > 0);
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_do_selftest);
-
-int tpm_send(u32 chip_num, void *cmd, size_t buflen)
-{
-       struct tpm_chip *chip;
-       int rc;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-
-       rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
-
-       tpm_chip_put(chip);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_send);
-
-ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
-                     char *buf)
-{
-       cap_t cap;
-       u8 digest[TPM_DIGEST_SIZE];
-       ssize_t rc;
-       int i, j, num_pcrs;
-       char *str = buf;
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_PCR, &cap,
-                       "attempting to determine the number of PCRS");
-       if (rc)
-               return 0;
-
-       num_pcrs = be32_to_cpu(cap.num_pcrs);
-       for (i = 0; i < num_pcrs; i++) {
-               rc = __tpm_pcr_read(chip, i, digest);
-               if (rc)
-                       break;
-               str += sprintf(str, "PCR-%02d: ", i);
-               for (j = 0; j < TPM_DIGEST_SIZE; j++)
-                       str += sprintf(str, "%02X ", digest[j]);
-               str += sprintf(str, "\n");
-       }
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_pcrs);
-
-#define  READ_PUBEK_RESULT_SIZE 314
-#define TPM_ORD_READPUBEK cpu_to_be32(124)
-static struct tpm_input_header tpm_readpubek_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(30),
-       .ordinal = TPM_ORD_READPUBEK
-};
-
-ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
-                      char *buf)
-{
-       u8 *data;
-       struct tpm_cmd_t tpm_cmd;
-       ssize_t err;
-       int i, rc;
-       char *str = buf;
-
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       tpm_cmd.header.in = tpm_readpubek_header;
-       err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
-                       "attempting to read the PUBEK");
-       if (err)
-               goto out;
-
-       /* 
-          ignore header 10 bytes
-          algorithm 32 bits (1 == RSA )
-          encscheme 16 bits
-          sigscheme 16 bits
-          parameters (RSA 12->bytes: keybit, #primes, expbit)  
-          keylenbytes 32 bits
-          256 byte modulus
-          ignore checksum 20 bytes
-        */
-       data = tpm_cmd.params.readpubek_out_buffer;
-       str +=
-           sprintf(str,
-                   "Algorithm: %02X %02X %02X %02X\n"
-                   "Encscheme: %02X %02X\n"
-                   "Sigscheme: %02X %02X\n"
-                   "Parameters: %02X %02X %02X %02X "
-                   "%02X %02X %02X %02X "
-                   "%02X %02X %02X %02X\n"
-                   "Modulus length: %d\n"
-                   "Modulus:\n",
-                   data[0], data[1], data[2], data[3],
-                   data[4], data[5],
-                   data[6], data[7],
-                   data[12], data[13], data[14], data[15],
-                   data[16], data[17], data[18], data[19],
-                   data[20], data[21], data[22], data[23],
-                   be32_to_cpu(*((__be32 *) (data + 24))));
-
-       for (i = 0; i < 256; i++) {
-               str += sprintf(str, "%02X ", data[i + 28]);
-               if ((i + 1) % 16 == 0)
-                       str += sprintf(str, "\n");
-       }
-out:
-       rc = str - buf;
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_pubek);
-
-
-ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
-                     char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-       char *str = buf;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
-                       "attempting to determine the manufacturer");
-       if (rc)
-               return 0;
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(cap.manufacturer_id));
-
-       rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
-                       "attempting to determine the 1.1 version");
-       if (rc)
-               return 0;
-       str += sprintf(str,
-                      "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                      cap.tpm_version.Major, cap.tpm_version.Minor,
-                      cap.tpm_version.revMajor, cap.tpm_version.revMinor);
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_caps);
-
-ssize_t tpm_show_caps_1_2(struct device * dev,
-                         struct device_attribute * attr, char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-       char *str = buf;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
-                       "attempting to determine the manufacturer");
-       if (rc)
-               return 0;
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(cap.manufacturer_id));
-       rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap,
-                        "attempting to determine the 1.2 version");
-       if (rc)
-               return 0;
-       str += sprintf(str,
-                      "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                      cap.tpm_version_1_2.Major, cap.tpm_version_1_2.Minor,
-                      cap.tpm_version_1_2.revMajor,
-                      cap.tpm_version_1_2.revMinor);
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
-
-ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip->vendor.duration[TPM_LONG] == 0)
-               return 0;
-
-       return sprintf(buf, "%d %d %d [%s]\n",
-                      jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
-                      jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
-                      jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
-                      chip->vendor.duration_adjusted
-                      ? "adjusted" : "original");
-}
-EXPORT_SYMBOL_GPL(tpm_show_durations);
-
-ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%d %d %d %d [%s]\n",
-                      jiffies_to_usecs(chip->vendor.timeout_a),
-                      jiffies_to_usecs(chip->vendor.timeout_b),
-                      jiffies_to_usecs(chip->vendor.timeout_c),
-                      jiffies_to_usecs(chip->vendor.timeout_d),
-                      chip->vendor.timeout_adjusted
-                      ? "adjusted" : "original");
-}
-EXPORT_SYMBOL_GPL(tpm_show_timeouts);
-
-ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
-                       const char *buf, size_t count)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-       if (chip == NULL)
-               return 0;
-
-       chip->vendor.cancel(chip);
-       return count;
-}
-EXPORT_SYMBOL_GPL(tpm_store_cancel);
-
-static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, bool check_cancel,
-                                  bool *canceled)
-{
-       u8 status = chip->vendor.status(chip);
-
-       *canceled = false;
-       if ((status & mask) == mask)
-               return true;
-       if (check_cancel && chip->vendor.req_canceled(chip, status)) {
-               *canceled = true;
-               return true;
-       }
-       return false;
-}
-
-int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
-                     wait_queue_head_t *queue, bool check_cancel)
-{
-       unsigned long stop;
-       long rc;
-       u8 status;
-       bool canceled = false;
-
-       /* check current status */
-       status = chip->vendor.status(chip);
-       if ((status & mask) == mask)
-               return 0;
-
-       stop = jiffies + timeout;
-
-       if (chip->vendor.irq) {
-again:
-               timeout = stop - jiffies;
-               if ((long)timeout <= 0)
-                       return -ETIME;
-               rc = wait_event_interruptible_timeout(*queue,
-                       wait_for_tpm_stat_cond(chip, mask, check_cancel,
-                                              &canceled),
-                       timeout);
-               if (rc > 0) {
-                       if (canceled)
-                               return -ECANCELED;
-                       return 0;
-               }
-               if (rc == -ERESTARTSYS && freezing(current)) {
-                       clear_thread_flag(TIF_SIGPENDING);
-                       goto again;
-               }
-       } else {
-               do {
-                       msleep(TPM_TIMEOUT);
-                       status = chip->vendor.status(chip);
-                       if ((status & mask) == mask)
-                               return 0;
-               } while (time_before(jiffies, stop));
-       }
-       return -ETIME;
-}
-EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
-/*
- * Device file system interface to the TPM
- *
- * It's assured that the chip will be opened just once,
- * by the check of is_open variable, which is protected
- * by driver_lock.
- */
-int tpm_open(struct inode *inode, struct file *file)
-{
-       int minor = iminor(inode);
-       struct tpm_chip *chip = NULL, *pos;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
-               if (pos->vendor.miscdev.minor == minor) {
-                       chip = pos;
-                       get_device(chip->dev);
-                       break;
-               }
-       }
-       rcu_read_unlock();
-
-       if (!chip)
-               return -ENODEV;
-
-       if (test_and_set_bit(0, &chip->is_open)) {
-               dev_dbg(chip->dev, "Another process owns this TPM\n");
-               put_device(chip->dev);
-               return -EBUSY;
-       }
-
-       chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL);
-       if (chip->data_buffer == NULL) {
-               clear_bit(0, &chip->is_open);
-               put_device(chip->dev);
-               return -ENOMEM;
-       }
-
-       atomic_set(&chip->data_pending, 0);
-
-       file->private_data = chip;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_open);
-
-/*
- * Called on file close
- */
-int tpm_release(struct inode *inode, struct file *file)
-{
-       struct tpm_chip *chip = file->private_data;
-
-       del_singleshot_timer_sync(&chip->user_read_timer);
-       flush_work(&chip->work);
-       file->private_data = NULL;
-       atomic_set(&chip->data_pending, 0);
-       kzfree(chip->data_buffer);
-       clear_bit(0, &chip->is_open);
-       put_device(chip->dev);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_release);
-
-ssize_t tpm_write(struct file *file, const char __user *buf,
-                 size_t size, loff_t *off)
-{
-       struct tpm_chip *chip = file->private_data;
-       size_t in_size = size;
-       ssize_t out_size;
-
-       /* cannot perform a write until the read has cleared
-          either via tpm_read or a user_read_timer timeout.
-          This also prevents splitted buffered writes from blocking here.
-       */
-       if (atomic_read(&chip->data_pending) != 0)
-               return -EBUSY;
-
-       if (in_size > TPM_BUFSIZE)
-               return -E2BIG;
-
-       mutex_lock(&chip->buffer_mutex);
-
-       if (copy_from_user
-           (chip->data_buffer, (void __user *) buf, in_size)) {
-               mutex_unlock(&chip->buffer_mutex);
-               return -EFAULT;
-       }
-
-       /* atomic tpm command send and result receive */
-       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
-       if (out_size < 0) {
-               mutex_unlock(&chip->buffer_mutex);
-               return out_size;
-       }
-
-       atomic_set(&chip->data_pending, out_size);
-       mutex_unlock(&chip->buffer_mutex);
-
-       /* Set a timeout by which the reader must come claim the result */
-       mod_timer(&chip->user_read_timer, jiffies + (60 * HZ));
-
-       return in_size;
-}
-EXPORT_SYMBOL_GPL(tpm_write);
-
-ssize_t tpm_read(struct file *file, char __user *buf,
-                size_t size, loff_t *off)
-{
-       struct tpm_chip *chip = file->private_data;
-       ssize_t ret_size;
-       int rc;
-
-       del_singleshot_timer_sync(&chip->user_read_timer);
-       flush_work(&chip->work);
-       ret_size = atomic_read(&chip->data_pending);
-       if (ret_size > 0) {     /* relay data */
-               ssize_t orig_ret_size = ret_size;
-               if (size < ret_size)
-                       ret_size = size;
-
-               mutex_lock(&chip->buffer_mutex);
-               rc = copy_to_user(buf, chip->data_buffer, ret_size);
-               memset(chip->data_buffer, 0, orig_ret_size);
-               if (rc)
-                       ret_size = -EFAULT;
-
-               mutex_unlock(&chip->buffer_mutex);
-       }
-
-       atomic_set(&chip->data_pending, 0);
-
-       return ret_size;
-}
-EXPORT_SYMBOL_GPL(tpm_read);
-
-void tpm_remove_hardware(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip == NULL) {
-               dev_err(dev, "No device data found\n");
-               return;
-       }
-
-       spin_lock(&driver_lock);
-       list_del_rcu(&chip->list);
-       spin_unlock(&driver_lock);
-       synchronize_rcu();
-
-       misc_deregister(&chip->vendor.miscdev);
-       sysfs_remove_group(&dev->kobj, chip->vendor.attr_group);
-       tpm_remove_ppi(&dev->kobj);
-       tpm_bios_log_teardown(chip->bios_dir);
-
-       /* write it this way to be explicit (chip->dev == dev) */
-       put_device(chip->dev);
-}
-EXPORT_SYMBOL_GPL(tpm_remove_hardware);
-
-#define TPM_ORD_SAVESTATE cpu_to_be32(152)
-#define SAVESTATE_RESULT_SIZE 10
-
-static struct tpm_input_header savestate_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(10),
-       .ordinal = TPM_ORD_SAVESTATE
-};
-
-/*
- * We are about to suspend. Save the TPM state
- * so that it can be restored.
- */
-int tpm_pm_suspend(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-       struct tpm_cmd_t cmd;
-       int rc, try;
-
-       u8 dummy_hash[TPM_DIGEST_SIZE] = { 0 };
-
-       if (chip == NULL)
-               return -ENODEV;
-
-       /* for buggy tpm, flush pcrs with extend to selected dummy */
-       if (tpm_suspend_pcr) {
-               cmd.header.in = pcrextend_header;
-               cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
-               memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
-                      TPM_DIGEST_SIZE);
-               rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
-                                 "extending dummy pcr before suspend");
-       }
-
-       /* now do the actual savestate */
-       for (try = 0; try < TPM_RETRY; try++) {
-               cmd.header.in = savestate_header;
-               rc = transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
-
-               /*
-                * If the TPM indicates that it is too busy to respond to
-                * this command then retry before giving up.  It can take
-                * several seconds for this TPM to be ready.
-                *
-                * This can happen if the TPM has already been sent the
-                * SaveState command before the driver has loaded.  TCG 1.2
-                * specification states that any communication after SaveState
-                * may cause the TPM to invalidate previously saved state.
-                */
-               if (rc != TPM_WARN_RETRY)
-                       break;
-               msleep(TPM_TIMEOUT_RETRY);
-       }
-
-       if (rc)
-               dev_err(chip->dev,
-                       "Error (%d) sending savestate before suspend\n", rc);
-       else if (try > 0)
-               dev_warn(chip->dev, "TPM savestate took %dms\n",
-                        try * TPM_TIMEOUT_RETRY);
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_pm_suspend);
-
-/*
- * Resume from a power safe. The BIOS already restored
- * the TPM state.
- */
-int tpm_pm_resume(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip == NULL)
-               return -ENODEV;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_pm_resume);
-
-#define TPM_GETRANDOM_RESULT_SIZE      18
-static struct tpm_input_header tpm_getrandom_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(14),
-       .ordinal = TPM_ORD_GET_RANDOM
-};
-
-/**
- * tpm_get_random() - Get random bytes from the tpm's RNG
- * @chip_num: A specific chip number for the request or TPM_ANY_NUM
- * @out: destination buffer for the random bytes
- * @max: the max number of bytes to write to @out
- *
- * Returns < 0 on error and the number of bytes read on success
- */
-int tpm_get_random(u32 chip_num, u8 *out, size_t max)
-{
-       struct tpm_chip *chip;
-       struct tpm_cmd_t tpm_cmd;
-       u32 recd, num_bytes = min_t(u32, max, TPM_MAX_RNG_DATA);
-       int err, total = 0, retries = 5;
-       u8 *dest = out;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-
-       if (!out || !num_bytes || max > TPM_MAX_RNG_DATA)
-               return -EINVAL;
-
-       do {
-               tpm_cmd.header.in = tpm_getrandom_header;
-               tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
-
-               err = transmit_cmd(chip, &tpm_cmd,
-                                  TPM_GETRANDOM_RESULT_SIZE + num_bytes,
-                                  "attempting get random");
-               if (err)
-                       break;
-
-               recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
-               memcpy(dest, tpm_cmd.params.getrandom_out.rng_data, recd);
-
-               dest += recd;
-               total += recd;
-               num_bytes -= recd;
-       } while (retries-- && total < max);
-
-       return total ? total : -EIO;
-}
-EXPORT_SYMBOL_GPL(tpm_get_random);
-
-/* In case vendor provided release function, call it too.*/
-
-void tpm_dev_vendor_release(struct tpm_chip *chip)
-{
-       if (!chip)
-               return;
-
-       if (chip->vendor.release)
-               chip->vendor.release(chip->dev);
-
-       clear_bit(chip->dev_num, dev_mask);
-       kfree(chip->vendor.miscdev.name);
-}
-EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
-
-
-/*
- * Once all references to platform device are down to 0,
- * release all allocated structures.
- */
-void tpm_dev_release(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (!chip)
-               return;
-
-       tpm_dev_vendor_release(chip);
-
-       chip->release(dev);
-       kfree(chip);
-}
-EXPORT_SYMBOL_GPL(tpm_dev_release);
-
-/*
- * Called from tpm_<specific>.c probe function only for devices 
- * the driver has determined it should claim.  Prior to calling
- * this function the specific probe function has called pci_enable_device
- * upon errant exit from this function specific probe function should call
- * pci_disable_device
- */
-struct tpm_chip *tpm_register_hardware(struct device *dev,
-                                       const struct tpm_vendor_specific *entry)
-{
-#define DEVNAME_SIZE 7
-
-       char *devname;
-       struct tpm_chip *chip;
-
-       /* Driver specific per-device data */
-       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-       devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL);
-
-       if (chip == NULL || devname == NULL)
-               goto out_free;
-
-       mutex_init(&chip->buffer_mutex);
-       mutex_init(&chip->tpm_mutex);
-       INIT_LIST_HEAD(&chip->list);
-
-       INIT_WORK(&chip->work, timeout_work);
-
-       setup_timer(&chip->user_read_timer, user_reader_timeout,
-                       (unsigned long)chip);
-
-       memcpy(&chip->vendor, entry, sizeof(struct tpm_vendor_specific));
-
-       chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES);
-
-       if (chip->dev_num >= TPM_NUM_DEVICES) {
-               dev_err(dev, "No available tpm device numbers\n");
-               goto out_free;
-       } else if (chip->dev_num == 0)
-               chip->vendor.miscdev.minor = TPM_MINOR;
-       else
-               chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR;
-
-       set_bit(chip->dev_num, dev_mask);
-
-       scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num);
-       chip->vendor.miscdev.name = devname;
-
-       chip->vendor.miscdev.parent = dev;
-       chip->dev = get_device(dev);
-       chip->release = dev->release;
-       dev->release = tpm_dev_release;
-       dev_set_drvdata(dev, chip);
-
-       if (misc_register(&chip->vendor.miscdev)) {
-               dev_err(chip->dev,
-                       "unable to misc_register %s, minor %d\n",
-                       chip->vendor.miscdev.name,
-                       chip->vendor.miscdev.minor);
-               goto put_device;
-       }
-
-       if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) {
-               misc_deregister(&chip->vendor.miscdev);
-               goto put_device;
-       }
-
-       if (tpm_add_ppi(&dev->kobj)) {
-               misc_deregister(&chip->vendor.miscdev);
-               goto put_device;
-       }
-
-       chip->bios_dir = tpm_bios_log_setup(devname);
-
-       /* Make chip available */
-       spin_lock(&driver_lock);
-       list_add_rcu(&chip->list, &tpm_chip_list);
-       spin_unlock(&driver_lock);
-
-       return chip;
-
-put_device:
-       put_device(chip->dev);
-out_free:
-       kfree(chip);
-       kfree(devname);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(tpm_register_hardware);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h

index a7bfc176ed4316bc8319220f1b73848bc9d74f06..f32847872193ad7acdf67bef8921c9f48d474854 100644 (file)
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -59,8 +59,6 @@ extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr,
                                 char *);
  extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr,
                                 char *);
-extern ssize_t tpm_show_caps_1_2(struct device *, struct device_attribute *attr,
-                               char *);
  extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr,
                                 const char *, size_t);
  extern ssize_t tpm_show_enabled(struct device *, struct device_attribute *attr,
@@ -122,6 +120,7 @@ struct tpm_chip {
         struct device *dev;     /* Device stuff */
  
         int dev_num;            /* /dev/tpm# */
+       char devname[7];
         unsigned long is_open;  /* only one allowed */
         int time_expired;
  
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c

index 99d6820c611db2e0f0f344c9cf701cfc02b0505d..c9a528d25d22001141b5809fa255fbc3d1d42154 100644 (file)
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -202,7 +202,7 @@ static int __init init_atmel(void)
  
         have_region =
             (atmel_request_region
-            (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
+            (base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
  
         pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0);
         if (IS_ERR(pdev)) {
diff --git a/drivers/char/tpm/tpm_eventlog.c b/drivers/char/tpm/tpm_eventlog.c

index 84ddc557b8f8689cd1af8e04ed825e42283fd442..59f7cb28260b4f4c23af010f7615befdd6ed7d23 100644 (file)
--- a/drivers/char/tpm/tpm_eventlog.c
+++ b/drivers/char/tpm/tpm_eventlog.c
@@ -406,7 +406,6 @@ out_tpm:
  out:
         return NULL;
  }
-EXPORT_SYMBOL_GPL(tpm_bios_log_setup);
  
  void tpm_bios_log_teardown(struct dentry **lst)
  {
@@ -415,5 +414,3 @@ void tpm_bios_log_teardown(struct dentry **lst)
         for (i = 0; i < 3; i++)
                 securityfs_remove(lst[i]);
  }
-EXPORT_SYMBOL_GPL(tpm_bios_log_teardown);
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c

new file mode 100644 (file)

index 0000000..c3cd7fe
--- /dev/null
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -0,0 +1,284 @@
+/*
+ * ATMEL I2C TPM AT97SC3204T
+ *
+ * Copyright (C) 2012 V Lab Technologies
+ *  Teddy Reed <teddy@prosauce.org>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ *  Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ * Device driver for ATMEL I2C TPMs.
+ *
+ * Teddy Reed determined the basic I2C command flow, unlike other I2C TPM
+ * devices the raw TCG formatted TPM command data is written via I2C and then
+ * raw TCG formatted TPM command data is returned via I2C.
+ *
+ * TGC status/locality/etc functions seen in the LPC implementation do not
+ * seem to be present.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/>.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+#define I2C_DRIVER_NAME "tpm_i2c_atmel"
+
+#define TPM_I2C_SHORT_TIMEOUT  750     /* ms */
+#define TPM_I2C_LONG_TIMEOUT   2000    /* 2 sec */
+
+#define ATMEL_STS_OK 1
+
+struct priv_data {
+       size_t len;
+       /* This is the amount we read on the first try. 25 was chosen to fit a
+        * fair number of read responses in the buffer so a 2nd retry can be
+        * avoided in small message cases. */
+       u8 buffer[sizeof(struct tpm_output_header) + 25];
+};
+
+static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+
+       priv->len = 0;
+
+       if (len <= 2)
+               return -EIO;
+
+       status = i2c_master_send(client, buf, len);
+
+       dev_dbg(chip->dev,
+               "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__,
+               (int)min_t(size_t, 64, len), buf, len, status);
+       return status;
+}
+
+static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       struct tpm_output_header *hdr =
+               (struct tpm_output_header *)priv->buffer;
+       u32 expected_len;
+       int rc;
+
+       if (priv->len == 0)
+               return -EIO;
+
+       /* Get the message size from the message header, if we didn't get the
+        * whole message in read_status then we need to re-read the
+        * message. */
+       expected_len = be32_to_cpu(hdr->length);
+       if (expected_len > count)
+               return -ENOMEM;
+
+       if (priv->len >= expected_len) {
+               dev_dbg(chip->dev,
+                       "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+                       (int)min_t(size_t, 64, expected_len), buf, count,
+                       expected_len);
+               memcpy(buf, priv->buffer, expected_len);
+               return expected_len;
+       }
+
+       rc = i2c_master_recv(client, buf, expected_len);
+       dev_dbg(chip->dev,
+               "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+               (int)min_t(size_t, 64, expected_len), buf, count,
+               expected_len);
+       return rc;
+}
+
+static void i2c_atmel_cancel(struct tpm_chip *chip)
+{
+       dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported");
+}
+
+static u8 i2c_atmel_read_status(struct tpm_chip *chip)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int rc;
+
+       /* The TPM fails the I2C read until it is ready, so we do the entire
+        * transfer here and buffer it locally. This way the common code can
+        * properly handle the timeouts. */
+       priv->len = 0;
+       memset(priv->buffer, 0, sizeof(priv->buffer));
+
+
+       /* Once the TPM has completed the command the command remains readable
+        * until another command is issued. */
+       rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer));
+       dev_dbg(chip->dev,
+               "%s: sts=%d", __func__, rc);
+       if (rc <= 0)
+               return 0;
+
+       priv->len = rc;
+
+       return ATMEL_STS_OK;
+}
+
+static const struct file_operations i2c_atmel_ops = {
+       .owner = THIS_MODULE,
+       .llseek = no_llseek,
+       .open = tpm_open,
+       .read = tpm_read,
+       .write = tpm_write,
+       .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_atmel_attrs[] = {
+       &dev_attr_pubek.attr,
+       &dev_attr_pcrs.attr,
+       &dev_attr_enabled.attr,
+       &dev_attr_active.attr,
+       &dev_attr_owned.attr,
+       &dev_attr_temp_deactivated.attr,
+       &dev_attr_caps.attr,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr,
+       NULL,
+};
+
+static struct attribute_group i2c_atmel_attr_grp = {
+       .attrs = i2c_atmel_attrs
+};
+
+static bool i2c_atmel_req_canceled(struct tpm_chip *chip, u8 status)
+{
+       return 0;
+}
+
+static const struct tpm_vendor_specific i2c_atmel = {
+       .status = i2c_atmel_read_status,
+       .recv = i2c_atmel_recv,
+       .send = i2c_atmel_send,
+       .cancel = i2c_atmel_cancel,
+       .req_complete_mask = ATMEL_STS_OK,
+       .req_complete_val = ATMEL_STS_OK,
+       .req_canceled = i2c_atmel_req_canceled,
+       .attr_group = &i2c_atmel_attr_grp,
+       .miscdev.fops = &i2c_atmel_ops,
+};
+
+static int i2c_atmel_probe(struct i2c_client *client,
+                          const struct i2c_device_id *id)
+{
+       int rc;
+       struct tpm_chip *chip;
+       struct device *dev = &client->dev;
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               return -ENODEV;
+
+       chip = tpm_register_hardware(dev, &i2c_atmel);
+       if (!chip) {
+               dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+               return -ENODEV;
+       }
+
+       chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+                                        GFP_KERNEL);
+
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.irq = 0;
+
+       /* There is no known way to probe for this device, and all version
+        * information seems to be read via TPM commands. Thus we rely on the
+        * TPM startup process in the common code to detect the device. */
+       if (tpm_get_timeouts(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       if (tpm_do_selftest(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       return 0;
+
+out_err:
+       tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(chip->dev);
+       return rc;
+}
+
+static int i2c_atmel_remove(struct i2c_client *client)
+{
+       struct device *dev = &(client->dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip)
+               tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(dev);
+       kfree(chip);
+       return 0;
+}
+
+static const struct i2c_device_id i2c_atmel_id[] = {
+       {I2C_DRIVER_NAME, 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_atmel_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_atmel_of_match[] = {
+       {.compatible = "atmel,at97sc3204t"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_atmel_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_atmel_driver = {
+       .id_table = i2c_atmel_id,
+       .probe = i2c_atmel_probe,
+       .remove = i2c_atmel_remove,
+       .driver = {
+               .name = I2C_DRIVER_NAME,
+               .owner = THIS_MODULE,
+               .pm = &i2c_atmel_pm_ops,
+               .of_match_table = of_match_ptr(i2c_atmel_of_match),
+       },
+};
+
+module_i2c_driver(i2c_atmel_driver);
+
+MODULE_AUTHOR("Jason Gunthorpe <jgunthorpe@obsidianresearch.com>");
+MODULE_DESCRIPTION("Atmel TPM I2C Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c

index b8735de8ce956a3d19fd3ac5b9a5df77c57d3a22..fefd2aa5c81e4aaa6f9069eb26000f9152acface 100644 (file)
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -581,7 +581,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
  static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
  static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
  static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
  static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
  static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
  static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
@@ -685,7 +685,6 @@ out_vendor:
         chip->dev->release = NULL;
         chip->release = NULL;
         tpm_dev.client = NULL;
-       dev_set_drvdata(chip->dev, chip);
  out_err:
         return rc;
  }
@@ -766,7 +765,6 @@ static int tpm_tis_i2c_remove(struct i2c_client *client)
         chip->dev->release = NULL;
         chip->release = NULL;
         tpm_dev.client = NULL;
-       dev_set_drvdata(chip->dev, chip);
  
         return 0;
  }
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c

new file mode 100644 (file)

index 0000000..6276fea
--- /dev/null
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -0,0 +1,710 @@
+/******************************************************************************
+ * Nuvoton TPM I2C Device Driver Interface for WPCT301/NPCT501,
+ * based on the TCG TPM Interface Spec version 1.2.
+ * Specifications at www.trustedcomputinggroup.org
+ *
+ * Copyright (C) 2011, Nuvoton Technology Corporation.
+ *  Dan Morav <dan.morav@nuvoton.com>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ *  Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/>.
+ *
+ * Nuvoton contact information: APC.Support@nuvoton.com
+ *****************************************************************************/
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+/* I2C interface offsets */
+#define TPM_STS                0x00
+#define TPM_BURST_COUNT        0x01
+#define TPM_DATA_FIFO_W        0x20
+#define TPM_DATA_FIFO_R        0x40
+#define TPM_VID_DID_RID        0x60
+/* TPM command header size */
+#define TPM_HEADER_SIZE        10
+#define TPM_RETRY      5
+/*
+ * I2C bus device maximum buffer size w/o counting I2C address or command
+ * i.e. max size required for I2C write is 34 = addr, command, 32 bytes data
+ */
+#define TPM_I2C_MAX_BUF_SIZE           32
+#define TPM_I2C_RETRY_COUNT            32
+#define TPM_I2C_BUS_DELAY              1       /* msec */
+#define TPM_I2C_RETRY_DELAY_SHORT      2       /* msec */
+#define TPM_I2C_RETRY_DELAY_LONG       10      /* msec */
+
+#define I2C_DRIVER_NAME "tpm_i2c_nuvoton"
+
+struct priv_data {
+       unsigned int intrs;
+};
+
+static s32 i2c_nuvoton_read_buf(struct i2c_client *client, u8 offset, u8 size,
+                               u8 *data)
+{
+       s32 status;
+
+       status = i2c_smbus_read_i2c_block_data(client, offset, size, data);
+       dev_dbg(&client->dev,
+               "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+               offset, size, (int)size, data, status);
+       return status;
+}
+
+static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size,
+                                u8 *data)
+{
+       s32 status;
+
+       status = i2c_smbus_write_i2c_block_data(client, offset, size, data);
+       dev_dbg(&client->dev,
+               "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+               offset, size, (int)size, data, status);
+       return status;
+}
+
+#define TPM_STS_VALID          0x80
+#define TPM_STS_COMMAND_READY  0x40
+#define TPM_STS_GO             0x20
+#define TPM_STS_DATA_AVAIL     0x10
+#define TPM_STS_EXPECT         0x08
+#define TPM_STS_RESPONSE_RETRY 0x02
+#define TPM_STS_ERR_VAL        0x07    /* bit2...bit0 reads always 0 */
+
+#define TPM_I2C_SHORT_TIMEOUT  750     /* ms */
+#define TPM_I2C_LONG_TIMEOUT   2000    /* 2 sec */
+
+/* read TPM_STS register */
+static u8 i2c_nuvoton_read_status(struct tpm_chip *chip)
+{
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+       u8 data;
+
+       status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data);
+       if (status <= 0) {
+               dev_err(chip->dev, "%s() error return %d\n", __func__,
+                       status);
+               data = TPM_STS_ERR_VAL;
+       }
+
+       return data;
+}
+
+/* write byte to TPM_STS register */
+static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data)
+{
+       s32 status;
+       int i;
+
+       /* this causes the current command to be aborted */
+       for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) {
+               status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data);
+               msleep(TPM_I2C_BUS_DELAY);
+       }
+       return status;
+}
+
+/* write commandReady to TPM_STS register */
+static void i2c_nuvoton_ready(struct tpm_chip *chip)
+{
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+
+       /* this causes the current command to be aborted */
+       status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY);
+       if (status < 0)
+               dev_err(chip->dev,
+                       "%s() fail to write TPM_STS.commandReady\n", __func__);
+}
+
+/* read burstCount field from TPM_STS register
+ * return -1 on fail to read */
+static int i2c_nuvoton_get_burstcount(struct i2c_client *client,
+                                     struct tpm_chip *chip)
+{
+       unsigned long stop = jiffies + chip->vendor.timeout_d;
+       s32 status;
+       int burst_count = -1;
+       u8 data;
+
+       /* wait for burstcount to be non-zero */
+       do {
+               /* in I2C burstCount is 1 byte */
+               status = i2c_nuvoton_read_buf(client, TPM_BURST_COUNT, 1,
+                                             &data);
+               if (status > 0 && data > 0) {
+                       burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data);
+                       break;
+               }
+               msleep(TPM_I2C_BUS_DELAY);
+       } while (time_before(jiffies, stop));
+
+       return burst_count;
+}
+
+/*
+ * WPCT301/NPCT501 SINT# supports only dataAvail
+ * any call to this function which is not waiting for dataAvail will
+ * set queue to NULL to avoid waiting for interrupt
+ */
+static bool i2c_nuvoton_check_status(struct tpm_chip *chip, u8 mask, u8 value)
+{
+       u8 status = i2c_nuvoton_read_status(chip);
+       return (status != TPM_STS_ERR_VAL) && ((status & mask) == value);
+}
+
+static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value,
+                                    u32 timeout, wait_queue_head_t *queue)
+{
+       if (chip->vendor.irq && queue) {
+               s32 rc;
+               DEFINE_WAIT(wait);
+               struct priv_data *priv = chip->vendor.priv;
+               unsigned int cur_intrs = priv->intrs;
+
+               enable_irq(chip->vendor.irq);
+               rc = wait_event_interruptible_timeout(*queue,
+                                                     cur_intrs != priv->intrs,
+                                                     timeout);
+               if (rc > 0)
+                       return 0;
+               /* At this point we know that the SINT pin is asserted, so we
+                * do not need to do i2c_nuvoton_check_status */
+       } else {
+               unsigned long ten_msec, stop;
+               bool status_valid;
+
+               /* check current status */
+               status_valid = i2c_nuvoton_check_status(chip, mask, value);
+               if (status_valid)
+                       return 0;
+
+               /* use polling to wait for the event */
+               ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG);
+               stop = jiffies + timeout;
+               do {
+                       if (time_before(jiffies, ten_msec))
+                               msleep(TPM_I2C_RETRY_DELAY_SHORT);
+                       else
+                               msleep(TPM_I2C_RETRY_DELAY_LONG);
+                       status_valid = i2c_nuvoton_check_status(chip, mask,
+                                                               value);
+                       if (status_valid)
+                               return 0;
+               } while (time_before(jiffies, stop));
+       }
+       dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask,
+               value);
+       return -ETIMEDOUT;
+}
+
+/* wait for dataAvail field to be set in the TPM_STS register */
+static int i2c_nuvoton_wait_for_data_avail(struct tpm_chip *chip, u32 timeout,
+                                          wait_queue_head_t *queue)
+{
+       return i2c_nuvoton_wait_for_stat(chip,
+                                        TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+                                        TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+                                        timeout, queue);
+}
+
+/* Read @count bytes into @buf from TPM_RD_FIFO register */
+static int i2c_nuvoton_recv_data(struct i2c_client *client,
+                                struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       s32 rc;
+       int burst_count, bytes2read, size = 0;
+
+       while (size < count &&
+              i2c_nuvoton_wait_for_data_avail(chip,
+                                              chip->vendor.timeout_c,
+                                              &chip->vendor.read_queue) == 0) {
+               burst_count = i2c_nuvoton_get_burstcount(client, chip);
+               if (burst_count < 0) {
+                       dev_err(chip->dev,
+                               "%s() fail to read burstCount=%d\n", __func__,
+                               burst_count);
+                       return -EIO;
+               }
+               bytes2read = min_t(size_t, burst_count, count - size);
+               rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R,
+                                         bytes2read, &buf[size]);
+               if (rc < 0) {
+                       dev_err(chip->dev,
+                               "%s() fail on i2c_nuvoton_read_buf()=%d\n",
+                               __func__, rc);
+                       return -EIO;
+               }
+               dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read);
+               size += bytes2read;
+       }
+
+       return size;
+}
+
+/* Read TPM command results */
+static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       struct device *dev = chip->dev;
+       struct i2c_client *client = to_i2c_client(dev);
+       s32 rc;
+       int expected, status, burst_count, retries, size = 0;
+
+       if (count < TPM_HEADER_SIZE) {
+               i2c_nuvoton_ready(chip);    /* return to idle */
+               dev_err(dev, "%s() count < header size\n", __func__);
+               return -EIO;
+       }
+       for (retries = 0; retries < TPM_RETRY; retries++) {
+               if (retries > 0) {
+                       /* if this is not the first trial, set responseRetry */
+                       i2c_nuvoton_write_status(client,
+                                                TPM_STS_RESPONSE_RETRY);
+               }
+               /*
+                * read first available (> 10 bytes), including:
+                * tag, paramsize, and result
+                */
+               status = i2c_nuvoton_wait_for_data_avail(
+                       chip, chip->vendor.timeout_c, &chip->vendor.read_queue);
+               if (status != 0) {
+                       dev_err(dev, "%s() timeout on dataAvail\n", __func__);
+                       size = -ETIMEDOUT;
+                       continue;
+               }
+               burst_count = i2c_nuvoton_get_burstcount(client, chip);
+               if (burst_count < 0) {
+                       dev_err(dev, "%s() fail to get burstCount\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               size = i2c_nuvoton_recv_data(client, chip, buf,
+                                            burst_count);
+               if (size < TPM_HEADER_SIZE) {
+                       dev_err(dev, "%s() fail to read header\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               /*
+                * convert number of expected bytes field from big endian 32 bit
+                * to machine native
+                */
+               expected = be32_to_cpu(*(__be32 *) (buf + 2));
+               if (expected > count) {
+                       dev_err(dev, "%s() expected > count\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               rc = i2c_nuvoton_recv_data(client, chip, &buf[size],
+                                          expected - size);
+               size += rc;
+               if (rc < 0 || size < expected) {
+                       dev_err(dev, "%s() fail to read remainder of result\n",
+                               __func__);
+                       size = -EIO;
+                       continue;
+               }
+               if (i2c_nuvoton_wait_for_stat(
+                           chip, TPM_STS_VALID | TPM_STS_DATA_AVAIL,
+                           TPM_STS_VALID, chip->vendor.timeout_c,
+                           NULL)) {
+                       dev_err(dev, "%s() error left over data\n", __func__);
+                       size = -ETIMEDOUT;
+                       continue;
+               }
+               break;
+       }
+       i2c_nuvoton_ready(chip);
+       dev_dbg(chip->dev, "%s() -> %d\n", __func__, size);
+       return size;
+}
+
+/*
+ * Send TPM command.
+ *
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       struct device *dev = chip->dev;
+       struct i2c_client *client = to_i2c_client(dev);
+       u32 ordinal;
+       size_t count = 0;
+       int burst_count, bytes2write, retries, rc = -EIO;
+
+       for (retries = 0; retries < TPM_RETRY; retries++) {
+               i2c_nuvoton_ready(chip);
+               if (i2c_nuvoton_wait_for_stat(chip, TPM_STS_COMMAND_READY,
+                                             TPM_STS_COMMAND_READY,
+                                             chip->vendor.timeout_b, NULL)) {
+                       dev_err(dev, "%s() timeout on commandReady\n",
+                               __func__);
+                       rc = -EIO;
+                       continue;
+               }
+               rc = 0;
+               while (count < len - 1) {
+                       burst_count = i2c_nuvoton_get_burstcount(client,
+                                                                chip);
+                       if (burst_count < 0) {
+                               dev_err(dev, "%s() fail get burstCount\n",
+                                       __func__);
+                               rc = -EIO;
+                               break;
+                       }
+                       bytes2write = min_t(size_t, burst_count,
+                                           len - 1 - count);
+                       rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W,
+                                                  bytes2write, &buf[count]);
+                       if (rc < 0) {
+                               dev_err(dev, "%s() fail i2cWriteBuf\n",
+                                       __func__);
+                               break;
+                       }
+                       dev_dbg(dev, "%s(%d):", __func__, bytes2write);
+                       count += bytes2write;
+                       rc = i2c_nuvoton_wait_for_stat(chip,
+                                                      TPM_STS_VALID |
+                                                      TPM_STS_EXPECT,
+                                                      TPM_STS_VALID |
+                                                      TPM_STS_EXPECT,
+                                                      chip->vendor.timeout_c,
+                                                      NULL);
+                       if (rc < 0) {
+                               dev_err(dev, "%s() timeout on Expect\n",
+                                       __func__);
+                               rc = -ETIMEDOUT;
+                               break;
+                       }
+               }
+               if (rc < 0)
+                       continue;
+
+               /* write last byte */
+               rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, 1,
+                                          &buf[count]);
+               if (rc < 0) {
+                       dev_err(dev, "%s() fail to write last byte\n",
+                               __func__);
+                       rc = -EIO;
+                       continue;
+               }
+               dev_dbg(dev, "%s(last): %02x", __func__, buf[count]);
+               rc = i2c_nuvoton_wait_for_stat(chip,
+                                              TPM_STS_VALID | TPM_STS_EXPECT,
+                                              TPM_STS_VALID,
+                                              chip->vendor.timeout_c, NULL);
+               if (rc) {
+                       dev_err(dev, "%s() timeout on Expect to clear\n",
+                               __func__);
+                       rc = -ETIMEDOUT;
+                       continue;
+               }
+               break;
+       }
+       if (rc < 0) {
+               /* retries == TPM_RETRY */
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+       /* execute the TPM command */
+       rc = i2c_nuvoton_write_status(client, TPM_STS_GO);
+       if (rc < 0) {
+               dev_err(dev, "%s() fail to write Go\n", __func__);
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+       rc = i2c_nuvoton_wait_for_data_avail(chip,
+                                            tpm_calc_ordinal_duration(chip,
+                                                                      ordinal),
+                                            &chip->vendor.read_queue);
+       if (rc) {
+               dev_err(dev, "%s() timeout command duration\n", __func__);
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+
+       dev_dbg(dev, "%s() -> %zd\n", __func__, len);
+       return len;
+}
+
+static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status)
+{
+       return (status == TPM_STS_COMMAND_READY);
+}
+
+static const struct file_operations i2c_nuvoton_ops = {
+       .owner = THIS_MODULE,
+       .llseek = no_llseek,
+       .open = tpm_open,
+       .read = tpm_read,
+       .write = tpm_write,
+       .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_nuvoton_attrs[] = {
+       &dev_attr_pubek.attr,
+       &dev_attr_pcrs.attr,
+       &dev_attr_enabled.attr,
+       &dev_attr_active.attr,
+       &dev_attr_owned.attr,
+       &dev_attr_temp_deactivated.attr,
+       &dev_attr_caps.attr,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr,
+       NULL,
+};
+
+static struct attribute_group i2c_nuvoton_attr_grp = {
+       .attrs = i2c_nuvoton_attrs
+};
+
+static const struct tpm_vendor_specific tpm_i2c = {
+       .status = i2c_nuvoton_read_status,
+       .recv = i2c_nuvoton_recv,
+       .send = i2c_nuvoton_send,
+       .cancel = i2c_nuvoton_ready,
+       .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+       .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+       .req_canceled = i2c_nuvoton_req_canceled,
+       .attr_group = &i2c_nuvoton_attr_grp,
+       .miscdev.fops = &i2c_nuvoton_ops,
+};
+
+/* The only purpose for the handler is to signal to any waiting threads that
+ * the interrupt is currently being asserted. The driver does not do any
+ * processing triggered by interrupts, and the chip provides no way to mask at
+ * the source (plus that would be slow over I2C). Run the IRQ as a one-shot,
+ * this means it cannot be shared. */
+static irqreturn_t i2c_nuvoton_int_handler(int dummy, void *dev_id)
+{
+       struct tpm_chip *chip = dev_id;
+       struct priv_data *priv = chip->vendor.priv;
+
+       priv->intrs++;
+       wake_up(&chip->vendor.read_queue);
+       disable_irq_nosync(chip->vendor.irq);
+       return IRQ_HANDLED;
+}
+
+static int get_vid(struct i2c_client *client, u32 *res)
+{
+       static const u8 vid_did_rid_value[] = { 0x50, 0x10, 0xfe };
+       u32 temp;
+       s32 rc;
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+               return -ENODEV;
+       rc = i2c_nuvoton_read_buf(client, TPM_VID_DID_RID, 4, (u8 *)&temp);
+       if (rc < 0)
+               return rc;
+
+       /* check WPCT301 values - ignore RID */
+       if (memcmp(&temp, vid_did_rid_value, sizeof(vid_did_rid_value))) {
+               /*
+                * f/w rev 2.81 has an issue where the VID_DID_RID is not
+                * reporting the right value. so give it another chance at
+                * offset 0x20 (FIFO_W).
+                */
+               rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_W, 4,
+                                         (u8 *) (&temp));
+               if (rc < 0)
+                       return rc;
+
+               /* check WPCT301 values - ignore RID */
+               if (memcmp(&temp, vid_did_rid_value,
+                          sizeof(vid_did_rid_value)))
+                       return -ENODEV;
+       }
+
+       *res = temp;
+       return 0;
+}
+
+static int i2c_nuvoton_probe(struct i2c_client *client,
+                            const struct i2c_device_id *id)
+{
+       int rc;
+       struct tpm_chip *chip;
+       struct device *dev = &client->dev;
+       u32 vid = 0;
+
+       rc = get_vid(client, &vid);
+       if (rc)
+               return rc;
+
+       dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid,
+                (u8) (vid >> 16), (u8) (vid >> 24));
+
+       chip = tpm_register_hardware(dev, &tpm_i2c);
+       if (!chip) {
+               dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+               return -ENODEV;
+       }
+
+       chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+                                        GFP_KERNEL);
+       init_waitqueue_head(&chip->vendor.read_queue);
+       init_waitqueue_head(&chip->vendor.int_queue);
+
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+
+       /*
+        * I2C intfcaps (interrupt capabilitieis) in the chip are hard coded to:
+        *   TPM_INTF_INT_LEVEL_LOW | TPM_INTF_DATA_AVAIL_INT
+        * The IRQ should be set in the i2c_board_info (which is done
+        * automatically in of_i2c_register_devices, for device tree users */
+       chip->vendor.irq = client->irq;
+
+       if (chip->vendor.irq) {
+               dev_dbg(dev, "%s() chip-vendor.irq\n", __func__);
+               rc = devm_request_irq(dev, chip->vendor.irq,
+                                     i2c_nuvoton_int_handler,
+                                     IRQF_TRIGGER_LOW,
+                                     chip->vendor.miscdev.name,
+                                     chip);
+               if (rc) {
+                       dev_err(dev, "%s() Unable to request irq: %d for use\n",
+                               __func__, chip->vendor.irq);
+                       chip->vendor.irq = 0;
+               } else {
+                       /* Clear any pending interrupt */
+                       i2c_nuvoton_ready(chip);
+                       /* - wait for TPM_STS==0xA0 (stsValid, commandReady) */
+                       rc = i2c_nuvoton_wait_for_stat(chip,
+                                                      TPM_STS_COMMAND_READY,
+                                                      TPM_STS_COMMAND_READY,
+                                                      chip->vendor.timeout_b,
+                                                      NULL);
+                       if (rc == 0) {
+                               /*
+                                * TIS is in ready state
+                                * write dummy byte to enter reception state
+                                * TPM_DATA_FIFO_W <- rc (0)
+                                */
+                               rc = i2c_nuvoton_write_buf(client,
+                                                          TPM_DATA_FIFO_W,
+                                                          1, (u8 *) (&rc));
+                               if (rc < 0)
+                                       goto out_err;
+                               /* TPM_STS <- 0x40 (commandReady) */
+                               i2c_nuvoton_ready(chip);
+                       } else {
+                               /*
+                                * timeout_b reached - command was
+                                * aborted. TIS should now be in idle state -
+                                * only TPM_STS_VALID should be set
+                                */
+                               if (i2c_nuvoton_read_status(chip) !=
+                                   TPM_STS_VALID) {
+                                       rc = -EIO;
+                                       goto out_err;
+                               }
+                       }
+               }
+       }
+
+       if (tpm_get_timeouts(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       if (tpm_do_selftest(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       return 0;
+
+out_err:
+       tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(chip->dev);
+       return rc;
+}
+
+static int i2c_nuvoton_remove(struct i2c_client *client)
+{
+       struct device *dev = &(client->dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip)
+               tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(dev);
+       kfree(chip);
+       return 0;
+}
+
+
+static const struct i2c_device_id i2c_nuvoton_id[] = {
+       {I2C_DRIVER_NAME, 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_nuvoton_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_nuvoton_of_match[] = {
+       {.compatible = "nuvoton,npct501"},
+       {.compatible = "winbond,wpct301"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_nuvoton_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_nuvoton_driver = {
+       .id_table = i2c_nuvoton_id,
+       .probe = i2c_nuvoton_probe,
+       .remove = i2c_nuvoton_remove,
+       .driver = {
+               .name = I2C_DRIVER_NAME,
+               .owner = THIS_MODULE,
+               .pm = &i2c_nuvoton_pm_ops,
+               .of_match_table = of_match_ptr(i2c_nuvoton_of_match),
+       },
+};
+
+module_i2c_driver(i2c_nuvoton_driver);
+
+MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)");
+MODULE_DESCRIPTION("Nuvoton TPM I2C Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c

index 5bb8e2ddd3b3b9b3a51557a605e8675a8f5c3e64..a0d6ceb5d00551b2c10f4341d1b5b5bd2d812e0b 100644 (file)
--- a/drivers/char/tpm/tpm_i2c_stm_st33.c
+++ b/drivers/char/tpm/tpm_i2c_stm_st33.c
@@ -584,7 +584,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
  static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
  static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
  static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
  static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
  
  static struct attribute *stm_tpm_attrs[] = {
@@ -746,8 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
  
         tpm_get_timeouts(chip);
  
-       i2c_set_clientdata(client, chip);
-
         dev_info(chip->dev, "TPM I2C Initialized\n");
         return 0;
  _irq_set:
@@ -807,24 +805,18 @@ static int tpm_st33_i2c_remove(struct i2c_client *client)
  #ifdef CONFIG_PM_SLEEP
  /*
   * tpm_st33_i2c_pm_suspend suspend the TPM device
- * Added: Work around when suspend and no tpm application is running, suspend
- * may fail because chip->data_buffer is not set (only set in tpm_open in Linux
- * TPM core)
   * @param: client, the i2c_client drescription (TPM I2C description).
   * @param: mesg, the power management message.
   * @return: 0 in case of success.
   */
  static int tpm_st33_i2c_pm_suspend(struct device *dev)
  {
-       struct tpm_chip *chip = dev_get_drvdata(dev);
         struct st33zp24_platform_data *pin_infos = dev->platform_data;
         int ret = 0;
  
         if (power_mgt) {
                 gpio_set_value(pin_infos->io_lpcpd, 0);
         } else {
-               if (chip->data_buffer == NULL)
-                       chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
                 ret = tpm_pm_suspend(dev);
         }
         return ret;
@@ -849,8 +841,6 @@ static int tpm_st33_i2c_pm_resume(struct device *dev)
                                           TPM_STS_VALID) == TPM_STS_VALID,
                                           chip->vendor.timeout_b);
         } else {
-               if (chip->data_buffer == NULL)
-                       chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
                 ret = tpm_pm_resume(dev);
                 if (!ret)
                         tpm_do_selftest(chip);
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c

index 56b07c35a13e173bf1cc5d10321d30d5a11f259e..2783a42aa73295406bee2ff845f5ae08a2d9c738 100644 (file)
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -98,7 +98,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
  
         if (count < len) {
                 dev_err(ibmvtpm->dev,
-                       "Invalid size in recv: count=%ld, crq_size=%d\n",
+                       "Invalid size in recv: count=%zd, crq_size=%d\n",
                         count, len);
                 return -EIO;
         }
@@ -136,7 +136,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
  
         if (count > ibmvtpm->rtce_size) {
                 dev_err(ibmvtpm->dev,
-                       "Invalid size in send: count=%ld, rtce_size=%d\n",
+                       "Invalid size in send: count=%zd, rtce_size=%d\n",
                         count, ibmvtpm->rtce_size);
                 return -EIO;
         }
@@ -419,7 +419,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
  static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
  static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                    NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
  static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
  static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
  static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c

index 2168d15bc728e22c99a61976de5050c26dc33cbe..8e562dc656016cd9c4dbeaaa3dac0a96a02d0f26 100644 (file)
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c
@@ -452,12 +452,8 @@ int tpm_add_ppi(struct kobject *parent)
  {
         return sysfs_create_group(parent, &ppi_attr_grp);
  }
-EXPORT_SYMBOL_GPL(tpm_add_ppi);
  
  void tpm_remove_ppi(struct kobject *parent)
  {
         sysfs_remove_group(parent, &ppi_attr_grp);
  }
-EXPORT_SYMBOL_GPL(tpm_remove_ppi);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c

index 5796d0157ce0c3bbd82c662bf61f8035e9d9daf8..1b74459c072399109d1730a6d60471e64ffcc48b 100644 (file)
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -448,7 +448,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
  static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
  static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                    NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
  static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
  static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
  static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c

index 94c280d36e8b3bfaea00ee36c4fe3215818b3c86..c8ff4df81779f3b66a3e62abab055b4e404b81e7 100644 (file)
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -351,8 +351,6 @@ static int tpmfront_probe(struct xenbus_device *dev,
  
         tpm_get_timeouts(priv->chip);
  
-       dev_set_drvdata(&dev->dev, priv->chip);
-
         return rv;
  }
  
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c

index 218460fcd2e4f9294d24b84680f173f838c11332..25a70d06c5bf243efe85ff3ed2dfbd2a7cc590df 100644 (file)
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -68,6 +68,9 @@ static void cs_check_cpu(int cpu, unsigned int load)
  
                 dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
  
+               if (dbs_info->requested_freq > policy->max)
+                       dbs_info->requested_freq = policy->max;
+
                 __cpufreq_driver_target(policy, dbs_info->requested_freq,
                         CPUFREQ_RELATION_H);
                 return;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c

index 0806c31e57645a287490c4d851d2760cd9484ca0..e6be63561fa699a28053c7528056dcd08ffb2285 100644 (file)
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -328,10 +328,6 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
                                              dbs_data->cdata->gov_dbs_timer);
                 }
  
-               /*
-                * conservative does not implement micro like ondemand
-                * governor, thus we are bound to jiffes/HZ
-                */
                 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
                         cs_dbs_info->down_skip = 0;
                         cs_dbs_info->enable = 1;
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c

index be6d14307aa81a7fa3e3626953b5a43b514933e2..a0acd0bfba40a361f3ea9eaafc42db928968dea7 100644 (file)
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -53,6 +53,7 @@ static unsigned int omap_getspeed(unsigned int cpu)
  
  static int omap_target(struct cpufreq_policy *policy, unsigned int index)
  {
+       int r, ret;
         struct dev_pm_opp *opp;
         unsigned long freq, volt = 0, volt_old = 0, tol = 0;
         unsigned int old_freq, new_freq;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig

index dd2874ec1927a6dae0e97defce4dfc05aef38d87..446687cc2334ed2f60a0b6f0170382fe733aae01 100644 (file)
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -89,14 +89,15 @@ config AT_HDMAC
           Support the Atmel AHB DMA controller.
  
  config FSL_DMA
-       tristate "Freescale Elo and Elo Plus DMA support"
+       tristate "Freescale Elo series DMA support"
         depends on FSL_SOC
         select DMA_ENGINE
         select ASYNC_TX_ENABLE_CHANNEL_SWITCH
         ---help---
-         Enable support for the Freescale Elo and Elo Plus DMA controllers.
-         The Elo is the DMA controller on some 82xx and 83xx parts, and the
-         Elo Plus is the DMA controller on 85xx and 86xx parts.
+         Enable support for the Freescale Elo series DMA controllers.
+         The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the
+         EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
+         some Txxx and Bxxx parts.
  
  config MPC512X_DMA
         tristate "Freescale MPC512x built-in DMA engine support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c

index e51a9832ef0d06801fd151b376824a7ac3d697b7..16a2aa28f85672689f66c37039a8e72230e71b02 100644 (file)
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1164,42 +1164,12 @@ static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
         kfree(txd);
  }
  
-static void pl08x_unmap_buffers(struct pl08x_txd *txd)
-{
-       struct device *dev = txd->vd.tx.chan->device->dev;
-       struct pl08x_sg *dsg;
-
-       if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_single(dev, dsg->src_addr, dsg->len,
-                                               DMA_TO_DEVICE);
-               else {
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_page(dev, dsg->src_addr, dsg->len,
-                                               DMA_TO_DEVICE);
-               }
-       }
-       if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_single(dev, dsg->dst_addr, dsg->len,
-                                               DMA_FROM_DEVICE);
-               else
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_page(dev, dsg->dst_addr, dsg->len,
-                                               DMA_FROM_DEVICE);
-       }
-}
-
  static void pl08x_desc_free(struct virt_dma_desc *vd)
  {
         struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
         struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
  
-       if (!plchan->slave)
-               pl08x_unmap_buffers(txd);
-
+       dma_descriptor_unmap(txd);
         if (!txd->done)
                 pl08x_release_mux(plchan);
  
@@ -1252,7 +1222,7 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
         size_t bytes = 0;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         /*
@@ -1267,7 +1237,7 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
  
         spin_lock_irqsave(&plchan->vc.lock, flags);
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS) {
+       if (ret != DMA_COMPLETE) {
                 vd = vchan_find_desc(&plchan->vc, cookie);
                 if (vd) {
                         /* On the issued list, so hasn't been processed yet */
@@ -2138,8 +2108,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
         writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
         writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
  
-       ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
-                         DRIVER_NAME, pl08x);
+       ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
         if (ret) {
                 dev_err(&adev->dev, "%s failed to request interrupt %d\n",
                         __func__, adev->irq[0]);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c

index c787f38a186a008a6cf8fa4af1dc9d19cab8f836..e2c04dc81e2a903ea13956e872150c40ed8b00e8 100644 (file)
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -344,31 +344,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
         /* move myself to free_list */
         list_move(&desc->desc_node, &atchan->free_list);
  
-       /* unmap dma addresses (not on slave channels) */
-       if (!atchan->chan_common.private) {
-               struct device *parent = chan2parent(&atchan->chan_common);
-               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                               dma_unmap_single(parent,
-                                               desc->lli.daddr,
-                                               desc->len, DMA_FROM_DEVICE);
-                       else
-                               dma_unmap_page(parent,
-                                               desc->lli.daddr,
-                                               desc->len, DMA_FROM_DEVICE);
-               }
-               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                               dma_unmap_single(parent,
-                                               desc->lli.saddr,
-                                               desc->len, DMA_TO_DEVICE);
-                       else
-                               dma_unmap_page(parent,
-                                               desc->lli.saddr,
-                                               desc->len, DMA_TO_DEVICE);
-               }
-       }
-
+       dma_descriptor_unmap(txd);
         /* for cyclic transfers,
          * no need to replay callback function while stopping */
         if (!atc_chan_is_cyclic(atchan)) {
@@ -1102,7 +1078,7 @@ atc_tx_status(struct dma_chan *chan,
         int bytes = 0;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
         /*
          * There's no point calculating the residue if there's
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c

index 31011d2a26fcfa0510b64c1114c1f87c05762ec3..3c6716e0b78eee2592a8c977550ce036ab4318c5 100644 (file)
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -2369,7 +2369,7 @@ coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
         enum dma_status ret;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         dma_set_residue(txstate, coh901318_get_bytes_left(chan));
@@ -2694,7 +2694,7 @@ static int __init coh901318_probe(struct platform_device *pdev)
         if (irq < 0)
                 return irq;
  
-       err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, IRQF_DISABLED,
+       err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0,
                                "coh901318", base);
         if (err)
                 return err;
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c

index 7c82b92f9b16f18bf1ce4e6bedb988f5d5b70e22..c29dacff66fa951f136657536a628b6743ccda30 100644 (file)
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -141,6 +141,9 @@ struct cppi41_dd {
         const struct chan_queues *queues_rx;
         const struct chan_queues *queues_tx;
         struct chan_queues td_queue;
+
+       /* context for suspend/resume */
+       unsigned int dma_tdfdq;
  };
  
  #define FIST_COMPLETION_QUEUE  93
@@ -263,6 +266,15 @@ static u32 pd_trans_len(u32 val)
         return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
  }
  
+static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
+{
+       u32 desc;
+
+       desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
+       desc &= ~0x1f;
+       return desc;
+}
+
  static irqreturn_t cppi41_irq(int irq, void *data)
  {
         struct cppi41_dd *cdd = data;
@@ -300,8 +312,7 @@ static irqreturn_t cppi41_irq(int irq, void *data)
                         q_num = __fls(val);
                         val &= ~(1 << q_num);
                         q_num += 32 * i;
-                       desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(q_num));
-                       desc &= ~0x1f;
+                       desc = cppi41_pop_desc(cdd, q_num);
                         c = desc_to_chan(cdd, desc);
                         if (WARN_ON(!c)) {
                                 pr_err("%s() q %d desc %08x\n", __func__,
@@ -353,7 +364,7 @@ static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
  
         /* lock */
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (txstate && ret == DMA_SUCCESS)
+       if (txstate && ret == DMA_COMPLETE)
                 txstate->residue = c->residue;
         /* unlock */
  
@@ -517,15 +528,6 @@ static void cppi41_compute_td_desc(struct cppi41_desc *d)
         d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
  }
  
-static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
-{
-       u32 desc;
-
-       desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
-       desc &= ~0x1f;
-       return desc;
-}
-
  static int cppi41_tear_down_chan(struct cppi41_channel *c)
  {
         struct cppi41_dd *cdd = c->cdd;
@@ -561,36 +563,26 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
                 c->td_retry = 100;
         }
  
-       if (!c->td_seen) {
-               unsigned td_comp_queue;
+       if (!c->td_seen || !c->td_desc_seen) {
  
-               if (c->is_tx)
-                       td_comp_queue =  cdd->td_queue.complete;
-               else
-                       td_comp_queue =  c->q_comp_num;
+               desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
+               if (!desc_phys)
+                       desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
  
-               desc_phys = cppi41_pop_desc(cdd, td_comp_queue);
-               if (desc_phys) {
-                       __iormb();
+               if (desc_phys == c->desc_phys) {
+                       c->td_desc_seen = 1;
+
+               } else if (desc_phys == td_desc_phys) {
+                       u32 pd0;
  
-                       if (desc_phys == td_desc_phys) {
-                               u32 pd0;
-                               pd0 = td->pd0;
-                               WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
-                               WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
-                               WARN_ON((pd0 & 0x1f) != c->port_num);
-                       } else {
-                               WARN_ON_ONCE(1);
-                       }
-                       c->td_seen = 1;
-               }
-       }
-       if (!c->td_desc_seen) {
-               desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
-               if (desc_phys) {
                         __iormb();
-                       WARN_ON(c->desc_phys != desc_phys);
-                       c->td_desc_seen = 1;
+                       pd0 = td->pd0;
+                       WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
+                       WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
+                       WARN_ON((pd0 & 0x1f) != c->port_num);
+                       c->td_seen = 1;
+               } else if (desc_phys) {
+                       WARN_ON_ONCE(1);
                 }
         }
         c->td_retry--;
@@ -609,7 +601,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
  
         WARN_ON(!c->td_retry);
         if (!c->td_desc_seen) {
-               desc_phys = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
+               desc_phys = cppi41_pop_desc(cdd, c->q_num);
                 WARN_ON(!desc_phys);
         }
  
@@ -674,14 +666,14 @@ static void cleanup_chans(struct cppi41_dd *cdd)
         }
  }
  
-static int cppi41_add_chans(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
  {
         struct cppi41_channel *cchan;
         int i;
         int ret;
         u32 n_chans;
  
-       ret = of_property_read_u32(pdev->dev.of_node, "#dma-channels",
+       ret = of_property_read_u32(dev->of_node, "#dma-channels",
                         &n_chans);
         if (ret)
                 return ret;
@@ -719,7 +711,7 @@ err:
         return -ENOMEM;
  }
  
-static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
  {
         unsigned int mem_decs;
         int i;
@@ -731,7 +723,7 @@ static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
                 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
                 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
  
-               dma_free_coherent(&pdev->dev, mem_decs, cdd->cd,
+               dma_free_coherent(dev, mem_decs, cdd->cd,
                                 cdd->descs_phys);
         }
  }
@@ -741,19 +733,19 @@ static void disable_sched(struct cppi41_dd *cdd)
         cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
  }
  
-static void deinit_cpii41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
  {
         disable_sched(cdd);
  
-       purge_descs(pdev, cdd);
+       purge_descs(dev, cdd);
  
         cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
         cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
-       dma_free_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
+       dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
                         cdd->scratch_phys);
  }
  
-static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_descs(struct device *dev, struct cppi41_dd *cdd)
  {
         unsigned int desc_size;
         unsigned int mem_decs;
@@ -777,7 +769,7 @@ static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
                 reg |= ilog2(ALLOC_DECS_NUM) - 5;
  
                 BUILD_BUG_ON(DESCS_AREAS != 1);
-               cdd->cd = dma_alloc_coherent(&pdev->dev, mem_decs,
+               cdd->cd = dma_alloc_coherent(dev, mem_decs,
                                 &cdd->descs_phys, GFP_KERNEL);
                 if (!cdd->cd)
                         return -ENOMEM;
@@ -813,12 +805,12 @@ static void init_sched(struct cppi41_dd *cdd)
         cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
  }
  
-static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
  {
         int ret;
  
         BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
-       cdd->qmgr_scratch = dma_alloc_coherent(&pdev->dev, QMGR_SCRATCH_SIZE,
+       cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
                         &cdd->scratch_phys, GFP_KERNEL);
         if (!cdd->qmgr_scratch)
                 return -ENOMEM;
@@ -827,7 +819,7 @@ static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
         cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
         cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
  
-       ret = init_descs(pdev, cdd);
+       ret = init_descs(dev, cdd);
         if (ret)
                 goto err_td;
  
@@ -835,7 +827,7 @@ static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
         init_sched(cdd);
         return 0;
  err_td:
-       deinit_cpii41(pdev, cdd);
+       deinit_cppi41(dev, cdd);
         return ret;
  }
  
@@ -914,11 +906,11 @@ static const struct of_device_id cppi41_dma_ids[] = {
  };
  MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
  
-static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
+static const struct cppi_glue_infos *get_glue_info(struct device *dev)
  {
         const struct of_device_id *of_id;
  
-       of_id = of_match_node(cppi41_dma_ids, pdev->dev.of_node);
+       of_id = of_match_node(cppi41_dma_ids, dev->of_node);
         if (!of_id)
                 return NULL;
         return of_id->data;
@@ -927,11 +919,12 @@ static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
  static int cppi41_dma_probe(struct platform_device *pdev)
  {
         struct cppi41_dd *cdd;
+       struct device *dev = &pdev->dev;
         const struct cppi_glue_infos *glue_info;
         int irq;
         int ret;
  
-       glue_info = get_glue_info(pdev);
+       glue_info = get_glue_info(dev);
         if (!glue_info)
                 return -EINVAL;
  
@@ -946,14 +939,14 @@ static int cppi41_dma_probe(struct platform_device *pdev)
         cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
         cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
         cdd->ddev.device_control = cppi41_dma_control;
-       cdd->ddev.dev = &pdev->dev;
+       cdd->ddev.dev = dev;
         INIT_LIST_HEAD(&cdd->ddev.channels);
         cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
  
-       cdd->usbss_mem = of_iomap(pdev->dev.of_node, 0);
-       cdd->ctrl_mem = of_iomap(pdev->dev.of_node, 1);
-       cdd->sched_mem = of_iomap(pdev->dev.of_node, 2);
-       cdd->qmgr_mem = of_iomap(pdev->dev.of_node, 3);
+       cdd->usbss_mem = of_iomap(dev->of_node, 0);
+       cdd->ctrl_mem = of_iomap(dev->of_node, 1);
+       cdd->sched_mem = of_iomap(dev->of_node, 2);
+       cdd->qmgr_mem = of_iomap(dev->of_node, 3);
  
         if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem ||
                         !cdd->qmgr_mem) {
@@ -961,31 +954,31 @@ static int cppi41_dma_probe(struct platform_device *pdev)
                 goto err_remap;
         }
  
-       pm_runtime_enable(&pdev->dev);
-       ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret)
+       pm_runtime_enable(dev);
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0)
                 goto err_get_sync;
  
         cdd->queues_rx = glue_info->queues_rx;
         cdd->queues_tx = glue_info->queues_tx;
         cdd->td_queue = glue_info->td_queue;
  
-       ret = init_cppi41(pdev, cdd);
+       ret = init_cppi41(dev, cdd);
         if (ret)
                 goto err_init_cppi;
  
-       ret = cppi41_add_chans(pdev, cdd);
+       ret = cppi41_add_chans(dev, cdd);
         if (ret)
                 goto err_chans;
  
-       irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+       irq = irq_of_parse_and_map(dev->of_node, 0);
         if (!irq)
                 goto err_irq;
  
         cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
  
         ret = request_irq(irq, glue_info->isr, IRQF_SHARED,
-                       dev_name(&pdev->dev), cdd);
+                       dev_name(dev), cdd);
         if (ret)
                 goto err_irq;
         cdd->irq = irq;
@@ -994,7 +987,7 @@ static int cppi41_dma_probe(struct platform_device *pdev)
         if (ret)
                 goto err_dma_reg;
  
-       ret = of_dma_controller_register(pdev->dev.of_node,
+       ret = of_dma_controller_register(dev->of_node,
                         cppi41_dma_xlate, &cpp41_dma_info);
         if (ret)
                 goto err_of;
@@ -1009,11 +1002,11 @@ err_irq:
         cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
         cleanup_chans(cdd);
  err_chans:
-       deinit_cpii41(pdev, cdd);
+       deinit_cppi41(dev, cdd);
  err_init_cppi:
-       pm_runtime_put(&pdev->dev);
+       pm_runtime_put(dev);
  err_get_sync:
-       pm_runtime_disable(&pdev->dev);
+       pm_runtime_disable(dev);
         iounmap(cdd->usbss_mem);
         iounmap(cdd->ctrl_mem);
         iounmap(cdd->sched_mem);
@@ -1033,7 +1026,7 @@ static int cppi41_dma_remove(struct platform_device *pdev)
         cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
         free_irq(cdd->irq, cdd);
         cleanup_chans(cdd);
-       deinit_cpii41(pdev, cdd);
+       deinit_cppi41(&pdev->dev, cdd);
         iounmap(cdd->usbss_mem);
         iounmap(cdd->ctrl_mem);
         iounmap(cdd->sched_mem);
@@ -1044,12 +1037,53 @@ static int cppi41_dma_remove(struct platform_device *pdev)
         return 0;
  }
  
+#ifdef CONFIG_PM_SLEEP
+static int cppi41_suspend(struct device *dev)
+{
+       struct cppi41_dd *cdd = dev_get_drvdata(dev);
+
+       cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
+       cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
+       disable_sched(cdd);
+
+       return 0;
+}
+
+static int cppi41_resume(struct device *dev)
+{
+       struct cppi41_dd *cdd = dev_get_drvdata(dev);
+       struct cppi41_channel *c;
+       int i;
+
+       for (i = 0; i < DESCS_AREAS; i++)
+               cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+
+       list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
+               if (!c->is_tx)
+                       cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+       init_sched(cdd);
+
+       cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
+       cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+       cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+       cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+       cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(cppi41_pm_ops, cppi41_suspend, cppi41_resume);
+
  static struct platform_driver cpp41_dma_driver = {
         .probe  = cppi41_dma_probe,
         .remove = cppi41_dma_remove,
         .driver = {
                 .name = "cppi41-dma-engine",
                 .owner = THIS_MODULE,
+               .pm = &cppi41_pm_ops,
                 .of_match_table = of_match_ptr(cppi41_dma_ids),
         },
  };
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c

index b0c0c8268d42bb023ac94ca8f420e22a5a197c37..94c380f0753860c4c4002c19f4217332fa2460f0 100644 (file)
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -491,7 +491,7 @@ static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
         unsigned long flags;
  
         status = dma_cookie_status(c, cookie, state);
-       if (status == DMA_SUCCESS || !state)
+       if (status == DMA_COMPLETE || !state)
                 return status;
  
         spin_lock_irqsave(&chan->vchan.lock, flags);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c

index 9162ac80c18f303ac9a509eb97298eba33d4753b..ea806bdc12ef92418c528be0b950758de59c3ee7 100644 (file)
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -65,6 +65,7 @@
  #include <linux/acpi.h>
  #include <linux/acpi_dma.h>
  #include <linux/of_dma.h>
+#include <linux/mempool.h>
  
  static DEFINE_MUTEX(dma_list_mutex);
  static DEFINE_IDR(dma_idr);
@@ -901,98 +902,132 @@ void dma_async_device_unregister(struct dma_device *device)
  }
  EXPORT_SYMBOL(dma_async_device_unregister);
  
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
-                       void *src, size_t len)
-{
-       struct dma_device *dev = chan->device;
-       struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       unsigned long flags;
+struct dmaengine_unmap_pool {
+       struct kmem_cache *cache;
+       const char *name;
+       mempool_t *pool;
+       size_t size;
+};
  
-       dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
-       dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
-       flags = DMA_CTRL_ACK |
-               DMA_COMPL_SRC_UNMAP_SINGLE |
-               DMA_COMPL_DEST_UNMAP_SINGLE;
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+       __UNMAP_POOL(2),
+       #if IS_ENABLED(CONFIG_ASYNC_TX_DMA)
+       __UNMAP_POOL(16),
+       __UNMAP_POOL(128),
+       __UNMAP_POOL(256),
+       #endif
+};
  
-       if (!tx) {
-               dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-               dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-               return -ENOMEM;
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+       int order = get_count_order(nr);
+
+       switch (order) {
+       case 0 ... 1:
+               return &unmap_pool[0];
+       case 2 ... 4:
+               return &unmap_pool[1];
+       case 5 ... 7:
+               return &unmap_pool[2];
+       case 8:
+               return &unmap_pool[3];
+       default:
+               BUG();
+               return NULL;
         }
+}
  
-       tx->callback = NULL;
-       cookie = tx->tx_submit(tx);
+static void dmaengine_unmap(struct kref *kref)
+{
+       struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+       struct device *dev = unmap->dev;
+       int cnt, i;
+
+       cnt = unmap->to_cnt;
+       for (i = 0; i < cnt; i++)
+               dma_unmap_page(dev, unmap->addr[i], unmap->len,
+                              DMA_TO_DEVICE);
+       cnt += unmap->from_cnt;
+       for (; i < cnt; i++)
+               dma_unmap_page(dev, unmap->addr[i], unmap->len,
+                              DMA_FROM_DEVICE);
+       cnt += unmap->bidi_cnt;
+       for (; i < cnt; i++) {
+               if (unmap->addr[i] == 0)
+                       continue;
+               dma_unmap_page(dev, unmap->addr[i], unmap->len,
+                              DMA_BIDIRECTIONAL);
+       }
+       mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
  
-       preempt_disable();
-       __this_cpu_add(chan->local->bytes_transferred, len);
-       __this_cpu_inc(chan->local->memcpy_count);
-       preempt_enable();
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+       if (unmap)
+               kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
  
-       return cookie;
+static void dmaengine_destroy_unmap_pool(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+               struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+               if (p->pool)
+                       mempool_destroy(p->pool);
+               p->pool = NULL;
+               if (p->cache)
+                       kmem_cache_destroy(p->cache);
+               p->cache = NULL;
+       }
  }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
  
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
-                       unsigned int offset, void *kdata, size_t len)
+static int __init dmaengine_init_unmap_pool(void)
  {
-       struct dma_device *dev = chan->device;
-       struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       unsigned long flags;
+       int i;
  
-       dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
-       dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
-       flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+       for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+               struct dmaengine_unmap_pool *p = &unmap_pool[i];
+               size_t size;
  
-       if (!tx) {
-               dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-               dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-               return -ENOMEM;
+               size = sizeof(struct dmaengine_unmap_data) +
+                      sizeof(dma_addr_t) * p->size;
+
+               p->cache = kmem_cache_create(p->name, size, 0,
+                                            SLAB_HWCACHE_ALIGN, NULL);
+               if (!p->cache)
+                       break;
+               p->pool = mempool_create_slab_pool(1, p->cache);
+               if (!p->pool)
+                       break;
         }
  
-       tx->callback = NULL;
-       cookie = tx->tx_submit(tx);
+       if (i == ARRAY_SIZE(unmap_pool))
+               return 0;
  
-       preempt_disable();
-       __this_cpu_add(chan->local->bytes_transferred, len);
-       __this_cpu_inc(chan->local->memcpy_count);
-       preempt_enable();
+       dmaengine_destroy_unmap_pool();
+       return -ENOMEM;
+}
  
-       return cookie;
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+       struct dmaengine_unmap_data *unmap;
+
+       unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+       if (!unmap)
+               return NULL;
+
+       memset(unmap, 0, sizeof(*unmap));
+       kref_init(&unmap->kref);
+       unmap->dev = dev;
+
+       return unmap;
  }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
  
  /**
   * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
@@ -1015,24 +1050,33 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
  {
         struct dma_device *dev = chan->device;
         struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
+       struct dmaengine_unmap_data *unmap;
         dma_cookie_t cookie;
         unsigned long flags;
  
-       dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
-       dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
-                               DMA_FROM_DEVICE);
+       unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO);
+       if (!unmap)
+               return -ENOMEM;
+
+       unmap->to_cnt = 1;
+       unmap->from_cnt = 1;
+       unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
+                                     DMA_TO_DEVICE);
+       unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
+                                     DMA_FROM_DEVICE);
+       unmap->len = len;
         flags = DMA_CTRL_ACK;
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+       tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
+                                        len, flags);
  
         if (!tx) {
-               dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
-               dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+               dmaengine_unmap_put(unmap);
                 return -ENOMEM;
         }
  
-       tx->callback = NULL;
+       dma_set_unmap(tx, unmap);
         cookie = tx->tx_submit(tx);
+       dmaengine_unmap_put(unmap);
  
         preempt_disable();
         __this_cpu_add(chan->local->bytes_transferred, len);
@@ -1043,6 +1087,52 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
  }
  EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
  
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+                           void *src, size_t len)
+{
+       return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
+                                        (unsigned long) dest & ~PAGE_MASK,
+                                        virt_to_page(src),
+                                        (unsigned long) src & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+                          unsigned int offset, void *kdata, size_t len)
+{
+       return dma_async_memcpy_pg_to_pg(chan, page, offset,
+                                        virt_to_page(kdata),
+                                        (unsigned long) kdata & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
  void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
         struct dma_chan *chan)
  {
@@ -1062,7 +1152,7 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
         unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
  
         if (!tx)
-               return DMA_SUCCESS;
+               return DMA_COMPLETE;
  
         while (tx->cookie == -EBUSY) {
                 if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
@@ -1116,6 +1206,10 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
  
  static int __init dma_bus_init(void)
  {
+       int err = dmaengine_init_unmap_pool();
+
+       if (err)
+               return err;
         return class_register(&dma_devclass);
  }
  arch_initcall(dma_bus_init);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c

index 92f796cdc6ab1dc12c6b895fb2d6e16e5dd92264..20f9a3aaf9266ea6daa71a18f08d258afa8a1a1e 100644 (file)
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -8,6 +8,8 @@
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
  #include <linux/delay.h>
  #include <linux/dma-mapping.h>
  #include <linux/dmaengine.h>
@@ -19,10 +21,6 @@
  #include <linux/random.h>
  #include <linux/slab.h>
  #include <linux/wait.h>
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
  
  static unsigned int test_buf_size = 16384;
  module_param(test_buf_size, uint, S_IRUGO | S_IWUSR);
@@ -68,92 +66,13 @@ module_param(timeout, uint, S_IRUGO | S_IWUSR);
  MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
                  "Pass -1 for infinite timeout");
  
-/* Maximum amount of mismatched bytes in buffer to print */
-#define MAX_ERROR_COUNT                32
-
-/*
- * Initialization patterns. All bytes in the source buffer has bit 7
- * set, all bytes in the destination buffer has bit 7 cleared.
- *
- * Bit 6 is set for all bytes which are to be copied by the DMA
- * engine. Bit 5 is set for all bytes which are to be overwritten by
- * the DMA engine.
- *
- * The remaining bits are the inverse of a counter which increments by
- * one for each byte address.
- */
-#define PATTERN_SRC            0x80
-#define PATTERN_DST            0x00
-#define PATTERN_COPY           0x40
-#define PATTERN_OVERWRITE      0x20
-#define PATTERN_COUNT_MASK     0x1f
-
-enum dmatest_error_type {
-       DMATEST_ET_OK,
-       DMATEST_ET_MAP_SRC,
-       DMATEST_ET_MAP_DST,
-       DMATEST_ET_PREP,
-       DMATEST_ET_SUBMIT,
-       DMATEST_ET_TIMEOUT,
-       DMATEST_ET_DMA_ERROR,
-       DMATEST_ET_DMA_IN_PROGRESS,
-       DMATEST_ET_VERIFY,
-       DMATEST_ET_VERIFY_BUF,
-};
-
-struct dmatest_verify_buffer {
-       unsigned int    index;
-       u8              expected;
-       u8              actual;
-};
-
-struct dmatest_verify_result {
-       unsigned int                    error_count;
-       struct dmatest_verify_buffer    data[MAX_ERROR_COUNT];
-       u8                              pattern;
-       bool                            is_srcbuf;
-};
-
-struct dmatest_thread_result {
-       struct list_head        node;
-       unsigned int            n;
-       unsigned int            src_off;
-       unsigned int            dst_off;
-       unsigned int            len;
-       enum dmatest_error_type type;
-       union {
-               unsigned long                   data;
-               dma_cookie_t                    cookie;
-               enum dma_status                 status;
-               int                             error;
-               struct dmatest_verify_result    *vr;
-       };
-};
-
-struct dmatest_result {
-       struct list_head        node;
-       char                    *name;
-       struct list_head        results;
-};
-
-struct dmatest_info;
-
-struct dmatest_thread {
-       struct list_head        node;
-       struct dmatest_info     *info;
-       struct task_struct      *task;
-       struct dma_chan         *chan;
-       u8                      **srcs;
-       u8                      **dsts;
-       enum dma_transaction_type type;
-       bool                    done;
-};
+static bool noverify;
+module_param(noverify, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
  
-struct dmatest_chan {
-       struct list_head        node;
-       struct dma_chan         *chan;
-       struct list_head        threads;
-};
+static bool verbose;
+module_param(verbose, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)");
  
  /**
   * struct dmatest_params - test parameters.
@@ -177,6 +96,7 @@ struct dmatest_params {
         unsigned int    xor_sources;
         unsigned int    pq_sources;
         int             timeout;
+       bool            noverify;
  };
  
  /**
@@ -184,7 +104,7 @@ struct dmatest_params {
   * @params:            test parameters
   * @lock:              access protection to the fields of this structure
   */
-struct dmatest_info {
+static struct dmatest_info {
         /* Test parameters */
         struct dmatest_params   params;
  
@@ -192,16 +112,95 @@ struct dmatest_info {
         struct list_head        channels;
         unsigned int            nr_channels;
         struct mutex            lock;
+       bool                    did_init;
+} test_info = {
+       .channels = LIST_HEAD_INIT(test_info.channels),
+       .lock = __MUTEX_INITIALIZER(test_info.lock),
+};
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp);
+static int dmatest_run_get(char *val, const struct kernel_param *kp);
+static struct kernel_param_ops run_ops = {
+       .set = dmatest_run_set,
+       .get = dmatest_run_get,
+};
+static bool dmatest_run;
+module_param_cb(run, &run_ops, &dmatest_run, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(run, "Run the test (default: false)");
+
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT                32
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC            0x80
+#define PATTERN_DST            0x00
+#define PATTERN_COPY           0x40
+#define PATTERN_OVERWRITE      0x20
+#define PATTERN_COUNT_MASK     0x1f
  
-       /* debugfs related stuff */
-       struct dentry           *root;
+struct dmatest_thread {
+       struct list_head        node;
+       struct dmatest_info     *info;
+       struct task_struct      *task;
+       struct dma_chan         *chan;
+       u8                      **srcs;
+       u8                      **dsts;
+       enum dma_transaction_type type;
+       bool                    done;
+};
  
-       /* Test results */
-       struct list_head        results;
-       struct mutex            results_lock;
+struct dmatest_chan {
+       struct list_head        node;
+       struct dma_chan         *chan;
+       struct list_head        threads;
  };
  
-static struct dmatest_info test_info;
+static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
+static bool wait;
+
+static bool is_threaded_test_run(struct dmatest_info *info)
+{
+       struct dmatest_chan *dtc;
+
+       list_for_each_entry(dtc, &info->channels, node) {
+               struct dmatest_thread *thread;
+
+               list_for_each_entry(thread, &dtc->threads, node) {
+                       if (!thread->done)
+                               return true;
+               }
+       }
+
+       return false;
+}
+
+static int dmatest_wait_get(char *val, const struct kernel_param *kp)
+{
+       struct dmatest_info *info = &test_info;
+       struct dmatest_params *params = &info->params;
+
+       if (params->iterations)
+               wait_event(thread_wait, !is_threaded_test_run(info));
+       wait = true;
+       return param_get_bool(val, kp);
+}
+
+static struct kernel_param_ops wait_ops = {
+       .get = dmatest_wait_get,
+       .set = param_set_bool,
+};
+module_param_cb(wait, &wait_ops, &wait, S_IRUGO);
+MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)");
  
  static bool dmatest_match_channel(struct dmatest_params *params,
                 struct dma_chan *chan)
@@ -223,7 +222,7 @@ static unsigned long dmatest_random(void)
  {
         unsigned long buf;
  
-       get_random_bytes(&buf, sizeof(buf));
+       prandom_bytes(&buf, sizeof(buf));
         return buf;
  }
  
@@ -262,9 +261,31 @@ static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len,
         }
  }
  
-static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
-               unsigned int start, unsigned int end, unsigned int counter,
-               u8 pattern, bool is_srcbuf)
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+               unsigned int counter, bool is_srcbuf)
+{
+       u8              diff = actual ^ pattern;
+       u8              expected = pattern | (~counter & PATTERN_COUNT_MASK);
+       const char      *thread_name = current->comm;
+
+       if (is_srcbuf)
+               pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+       else if ((pattern & PATTERN_COPY)
+                       && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+               pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+       else if (diff & PATTERN_SRC)
+               pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+       else
+               pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+               unsigned int end, unsigned int counter, u8 pattern,
+               bool is_srcbuf)
  {
         unsigned int i;
         unsigned int error_count = 0;
@@ -272,7 +293,6 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
         u8 expected;
         u8 *buf;
         unsigned int counter_orig = counter;
-       struct dmatest_verify_buffer *vb;
  
         for (; (buf = *bufs); bufs++) {
                 counter = counter_orig;
@@ -280,12 +300,9 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
                         actual = buf[i];
                         expected = pattern | (~counter & PATTERN_COUNT_MASK);
                         if (actual != expected) {
-                               if (error_count < MAX_ERROR_COUNT && vr) {
-                                       vb = &vr->data[error_count];
-                                       vb->index = i;
-                                       vb->expected = expected;
-                                       vb->actual = actual;
-                               }
+                               if (error_count < MAX_ERROR_COUNT)
+                                       dmatest_mismatch(actual, pattern, i,
+                                                        counter, is_srcbuf);
                                 error_count++;
                         }
                         counter++;
@@ -293,7 +310,7 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
         }
  
         if (error_count > MAX_ERROR_COUNT)
-               pr_warning("%s: %u errors suppressed\n",
+               pr_warn("%s: %u errors suppressed\n",
                         current->comm, error_count - MAX_ERROR_COUNT);
  
         return error_count;
@@ -313,20 +330,6 @@ static void dmatest_callback(void *arg)
         wake_up_all(done->wait);
  }
  
-static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len,
-                            unsigned int count)
-{
-       while (count--)
-               dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE);
-}
-
-static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len,
-                            unsigned int count)
-{
-       while (count--)
-               dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL);
-}
-
  static unsigned int min_odd(unsigned int x, unsigned int y)
  {
         unsigned int val = min(x, y);
@@ -334,172 +337,49 @@ static unsigned int min_odd(unsigned int x, unsigned int y)
         return val % 2 ? val : val - 1;
  }
  
-static char *verify_result_get_one(struct dmatest_verify_result *vr,
-               unsigned int i)
+static void result(const char *err, unsigned int n, unsigned int src_off,
+                  unsigned int dst_off, unsigned int len, unsigned long data)
  {
-       struct dmatest_verify_buffer *vb = &vr->data[i];
-       u8 diff = vb->actual ^ vr->pattern;
-       static char buf[512];
-       char *msg;
-
-       if (vr->is_srcbuf)
-               msg = "srcbuf overwritten!";
-       else if ((vr->pattern & PATTERN_COPY)
-                       && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
-               msg = "dstbuf not copied!";
-       else if (diff & PATTERN_SRC)
-               msg = "dstbuf was copied!";
-       else
-               msg = "dstbuf mismatch!";
-
-       snprintf(buf, sizeof(buf) - 1, "%s [0x%x] Expected %02x, got %02x", msg,
-                vb->index, vb->expected, vb->actual);
-
-       return buf;
+       pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+               current->comm, n, err, src_off, dst_off, len, data);
  }
  
-static char *thread_result_get(const char *name,
-               struct dmatest_thread_result *tr)
+static void dbg_result(const char *err, unsigned int n, unsigned int src_off,
+                      unsigned int dst_off, unsigned int len,
+                      unsigned long data)
  {
-       static const char * const messages[] = {
-               [DMATEST_ET_OK]                 = "No errors",
-               [DMATEST_ET_MAP_SRC]            = "src mapping error",
-               [DMATEST_ET_MAP_DST]            = "dst mapping error",
-               [DMATEST_ET_PREP]               = "prep error",
-               [DMATEST_ET_SUBMIT]             = "submit error",
-               [DMATEST_ET_TIMEOUT]            = "test timed out",
-               [DMATEST_ET_DMA_ERROR]          =
-                       "got completion callback (DMA_ERROR)",
-               [DMATEST_ET_DMA_IN_PROGRESS]    =
-                       "got completion callback (DMA_IN_PROGRESS)",
-               [DMATEST_ET_VERIFY]             = "errors",
-               [DMATEST_ET_VERIFY_BUF]         = "verify errors",
-       };
-       static char buf[512];
-
-       snprintf(buf, sizeof(buf) - 1,
-                "%s: #%u: %s with src_off=0x%x ""dst_off=0x%x len=0x%x (%lu)",
-                name, tr->n, messages[tr->type], tr->src_off, tr->dst_off,
-                tr->len, tr->data);
-
-       return buf;
+       pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+                  current->comm, n, err, src_off, dst_off, len, data);
  }
  
-static int thread_result_add(struct dmatest_info *info,
-               struct dmatest_result *r, enum dmatest_error_type type,
-               unsigned int n, unsigned int src_off, unsigned int dst_off,
-               unsigned int len, unsigned long data)
-{
-       struct dmatest_thread_result *tr;
-
-       tr = kzalloc(sizeof(*tr), GFP_KERNEL);
-       if (!tr)
-               return -ENOMEM;
-
-       tr->type = type;
-       tr->n = n;
-       tr->src_off = src_off;
-       tr->dst_off = dst_off;
-       tr->len = len;
-       tr->data = data;
+#define verbose_result(err, n, src_off, dst_off, len, data) ({ \
+       if (verbose) \
+               result(err, n, src_off, dst_off, len, data); \
+       else \
+               dbg_result(err, n, src_off, dst_off, len, data); \
+})
  
-       mutex_lock(&info->results_lock);
-       list_add_tail(&tr->node, &r->results);
-       mutex_unlock(&info->results_lock);
-
-       if (tr->type == DMATEST_ET_OK)
-               pr_debug("%s\n", thread_result_get(r->name, tr));
-       else
-               pr_warn("%s\n", thread_result_get(r->name, tr));
-
-       return 0;
-}
-
-static unsigned int verify_result_add(struct dmatest_info *info,
-               struct dmatest_result *r, unsigned int n,
-               unsigned int src_off, unsigned int dst_off, unsigned int len,
-               u8 **bufs, int whence, unsigned int counter, u8 pattern,
-               bool is_srcbuf)
+static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
  {
-       struct dmatest_verify_result *vr;
-       unsigned int error_count;
-       unsigned int buf_off = is_srcbuf ? src_off : dst_off;
-       unsigned int start, end;
-
-       if (whence < 0) {
-               start = 0;
-               end = buf_off;
-       } else if (whence > 0) {
-               start = buf_off + len;
-               end = info->params.buf_size;
-       } else {
-               start = buf_off;
-               end = buf_off + len;
-       }
+       unsigned long long per_sec = 1000000;
  
-       vr = kmalloc(sizeof(*vr), GFP_KERNEL);
-       if (!vr) {
-               pr_warn("dmatest: No memory to store verify result\n");
-               return dmatest_verify(NULL, bufs, start, end, counter, pattern,
-                                     is_srcbuf);
-       }
-
-       vr->pattern = pattern;
-       vr->is_srcbuf = is_srcbuf;
-
-       error_count = dmatest_verify(vr, bufs, start, end, counter, pattern,
-                                    is_srcbuf);
-       if (error_count) {
-               vr->error_count = error_count;
-               thread_result_add(info, r, DMATEST_ET_VERIFY_BUF, n, src_off,
-                                 dst_off, len, (unsigned long)vr);
-               return error_count;
-       }
-
-       kfree(vr);
-       return 0;
-}
-
-static void result_free(struct dmatest_info *info, const char *name)
-{
-       struct dmatest_result *r, *_r;
-
-       mutex_lock(&info->results_lock);
-       list_for_each_entry_safe(r, _r, &info->results, node) {
-               struct dmatest_thread_result *tr, *_tr;
-
-               if (name && strcmp(r->name, name))
-                       continue;
-
-               list_for_each_entry_safe(tr, _tr, &r->results, node) {
-                       if (tr->type == DMATEST_ET_VERIFY_BUF)
-                               kfree(tr->vr);
-                       list_del(&tr->node);
-                       kfree(tr);
-               }
+       if (runtime <= 0)
+               return 0;
  
-               kfree(r->name);
-               list_del(&r->node);
-               kfree(r);
+       /* drop precision until runtime is 32-bits */
+       while (runtime > UINT_MAX) {
+               runtime >>= 1;
+               per_sec <<= 1;
         }
  
-       mutex_unlock(&info->results_lock);
+       per_sec *= val;
+       do_div(per_sec, runtime);
+       return per_sec;
  }
  
-static struct dmatest_result *result_init(struct dmatest_info *info,
-               const char *name)
+static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
  {
-       struct dmatest_result *r;
-
-       r = kzalloc(sizeof(*r), GFP_KERNEL);
-       if (r) {
-               r->name = kstrdup(name, GFP_KERNEL);
-               INIT_LIST_HEAD(&r->results);
-               mutex_lock(&info->results_lock);
-               list_add_tail(&r->node, &info->results);
-               mutex_unlock(&info->results_lock);
-       }
-       return r;
+       return dmatest_persec(runtime, len >> 10);
  }
  
  /*
@@ -525,7 +405,6 @@ static int dmatest_func(void *data)
         struct dmatest_params   *params;
         struct dma_chan         *chan;
         struct dma_device       *dev;
-       const char              *thread_name;
         unsigned int            src_off, dst_off, len;
         unsigned int            error_count;
         unsigned int            failed_tests = 0;
@@ -538,9 +417,10 @@ static int dmatest_func(void *data)
         int                     src_cnt;
         int                     dst_cnt;
         int                     i;
-       struct dmatest_result   *result;
+       ktime_t                 ktime;
+       s64                     runtime = 0;
+       unsigned long long      total_len = 0;
  
-       thread_name = current->comm;
         set_freezable();
  
         ret = -ENOMEM;
@@ -570,10 +450,6 @@ static int dmatest_func(void *data)
         } else
                 goto err_thread_type;
  
-       result = result_init(info, thread_name);
-       if (!result)
-               goto err_srcs;
-
         thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
         if (!thread->srcs)
                 goto err_srcs;
@@ -597,17 +473,17 @@ static int dmatest_func(void *data)
         set_user_nice(current, 10);
  
         /*
-        * src buffers are freed by the DMAEngine code with dma_unmap_single()
-        * dst buffers are freed by ourselves below
+        * src and dst buffers are freed by ourselves below
          */
-       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
-             | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
+       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
  
+       ktime = ktime_get();
         while (!kthread_should_stop()
                && !(params->iterations && total_tests >= params->iterations)) {
                 struct dma_async_tx_descriptor *tx = NULL;
-               dma_addr_t dma_srcs[src_cnt];
-               dma_addr_t dma_dsts[dst_cnt];
+               struct dmaengine_unmap_data *um;
+               dma_addr_t srcs[src_cnt];
+               dma_addr_t *dsts;
                 u8 align = 0;
  
                 total_tests++;
@@ -626,81 +502,103 @@ static int dmatest_func(void *data)
                         break;
                 }
  
-               len = dmatest_random() % params->buf_size + 1;
+               if (params->noverify) {
+                       len = params->buf_size;
+                       src_off = 0;
+                       dst_off = 0;
+               } else {
+                       len = dmatest_random() % params->buf_size + 1;
+                       len = (len >> align) << align;
+                       if (!len)
+                               len = 1 << align;
+                       src_off = dmatest_random() % (params->buf_size - len + 1);
+                       dst_off = dmatest_random() % (params->buf_size - len + 1);
+
+                       src_off = (src_off >> align) << align;
+                       dst_off = (dst_off >> align) << align;
+
+                       dmatest_init_srcs(thread->srcs, src_off, len,
+                                         params->buf_size);
+                       dmatest_init_dsts(thread->dsts, dst_off, len,
+                                         params->buf_size);
+               }
+
                 len = (len >> align) << align;
                 if (!len)
                         len = 1 << align;
-               src_off = dmatest_random() % (params->buf_size - len + 1);
-               dst_off = dmatest_random() % (params->buf_size - len + 1);
+               total_len += len;
  
-               src_off = (src_off >> align) << align;
-               dst_off = (dst_off >> align) << align;
-
-               dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size);
-               dmatest_init_dsts(thread->dsts, dst_off, len, params->buf_size);
+               um = dmaengine_get_unmap_data(dev->dev, src_cnt+dst_cnt,
+                                             GFP_KERNEL);
+               if (!um) {
+                       failed_tests++;
+                       result("unmap data NULL", total_tests,
+                              src_off, dst_off, len, ret);
+                       continue;
+               }
  
+               um->len = params->buf_size;
                 for (i = 0; i < src_cnt; i++) {
-                       u8 *buf = thread->srcs[i] + src_off;
-
-                       dma_srcs[i] = dma_map_single(dev->dev, buf, len,
-                                                    DMA_TO_DEVICE);
-                       ret = dma_mapping_error(dev->dev, dma_srcs[i]);
+                       unsigned long buf = (unsigned long) thread->srcs[i];
+                       struct page *pg = virt_to_page(buf);
+                       unsigned pg_off = buf & ~PAGE_MASK;
+
+                       um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
+                                                  um->len, DMA_TO_DEVICE);
+                       srcs[i] = um->addr[i] + src_off;
+                       ret = dma_mapping_error(dev->dev, um->addr[i]);
                         if (ret) {
-                               unmap_src(dev->dev, dma_srcs, len, i);
-                               thread_result_add(info, result,
-                                                 DMATEST_ET_MAP_SRC,
-                                                 total_tests, src_off, dst_off,
-                                                 len, ret);
+                               dmaengine_unmap_put(um);
+                               result("src mapping error", total_tests,
+                                      src_off, dst_off, len, ret);
                                 failed_tests++;
                                 continue;
                         }
+                       um->to_cnt++;
                 }
                 /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+               dsts = &um->addr[src_cnt];
                 for (i = 0; i < dst_cnt; i++) {
-                       dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
-                                                    params->buf_size,
-                                                    DMA_BIDIRECTIONAL);
-                       ret = dma_mapping_error(dev->dev, dma_dsts[i]);
+                       unsigned long buf = (unsigned long) thread->dsts[i];
+                       struct page *pg = virt_to_page(buf);
+                       unsigned pg_off = buf & ~PAGE_MASK;
+
+                       dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
+                                              DMA_BIDIRECTIONAL);
+                       ret = dma_mapping_error(dev->dev, dsts[i]);
                         if (ret) {
-                               unmap_src(dev->dev, dma_srcs, len, src_cnt);
-                               unmap_dst(dev->dev, dma_dsts, params->buf_size,
-                                         i);
-                               thread_result_add(info, result,
-                                                 DMATEST_ET_MAP_DST,
-                                                 total_tests, src_off, dst_off,
-                                                 len, ret);
+                               dmaengine_unmap_put(um);
+                               result("dst mapping error", total_tests,
+                                      src_off, dst_off, len, ret);
                                 failed_tests++;
                                 continue;
                         }
+                       um->bidi_cnt++;
                 }
  
                 if (thread->type == DMA_MEMCPY)
                         tx = dev->device_prep_dma_memcpy(chan,
-                                                        dma_dsts[0] + dst_off,
-                                                        dma_srcs[0], len,
-                                                        flags);
+                                                        dsts[0] + dst_off,
+                                                        srcs[0], len, flags);
                 else if (thread->type == DMA_XOR)
                         tx = dev->device_prep_dma_xor(chan,
-                                                     dma_dsts[0] + dst_off,
-                                                     dma_srcs, src_cnt,
+                                                     dsts[0] + dst_off,
+                                                     srcs, src_cnt,
                                                       len, flags);
                 else if (thread->type == DMA_PQ) {
                         dma_addr_t dma_pq[dst_cnt];
  
                         for (i = 0; i < dst_cnt; i++)
-                               dma_pq[i] = dma_dsts[i] + dst_off;
-                       tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+                               dma_pq[i] = dsts[i] + dst_off;
+                       tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
                                                      src_cnt, pq_coefs,
                                                      len, flags);
                 }
  
                 if (!tx) {
-                       unmap_src(dev->dev, dma_srcs, len, src_cnt);
-                       unmap_dst(dev->dev, dma_dsts, params->buf_size,
-                                 dst_cnt);
-                       thread_result_add(info, result, DMATEST_ET_PREP,
-                                         total_tests, src_off, dst_off,
-                                         len, 0);
+                       dmaengine_unmap_put(um);
+                       result("prep error", total_tests, src_off,
+                              dst_off, len, ret);
                         msleep(100);
                         failed_tests++;
                         continue;
@@ -712,9 +610,9 @@ static int dmatest_func(void *data)
                 cookie = tx->tx_submit(tx);
  
                 if (dma_submit_error(cookie)) {
-                       thread_result_add(info, result, DMATEST_ET_SUBMIT,
-                                         total_tests, src_off, dst_off,
-                                         len, cookie);
+                       dmaengine_unmap_put(um);
+                       result("submit error", total_tests, src_off,
+                              dst_off, len, ret);
                         msleep(100);
                         failed_tests++;
                         continue;
@@ -735,59 +633,59 @@ static int dmatest_func(void *data)
                          * free it this time?" dancing.  For now, just
                          * leave it dangling.
                          */
-                       thread_result_add(info, result, DMATEST_ET_TIMEOUT,
-                                         total_tests, src_off, dst_off,
-                                         len, 0);
+                       dmaengine_unmap_put(um);
+                       result("test timed out", total_tests, src_off, dst_off,
+                              len, 0);
                         failed_tests++;
                         continue;
-               } else if (status != DMA_SUCCESS) {
-                       enum dmatest_error_type type = (status == DMA_ERROR) ?
-                               DMATEST_ET_DMA_ERROR : DMATEST_ET_DMA_IN_PROGRESS;
-                       thread_result_add(info, result, type,
-                                         total_tests, src_off, dst_off,
-                                         len, status);
+               } else if (status != DMA_COMPLETE) {
+                       dmaengine_unmap_put(um);
+                       result(status == DMA_ERROR ?
+                              "completion error status" :
+                              "completion busy status", total_tests, src_off,
+                              dst_off, len, ret);
                         failed_tests++;
                         continue;
                 }
  
-               /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
-               unmap_dst(dev->dev, dma_dsts, params->buf_size, dst_cnt);
+               dmaengine_unmap_put(um);
  
-               error_count = 0;
+               if (params->noverify) {
+                       verbose_result("test passed", total_tests, src_off,
+                                      dst_off, len, 0);
+                       continue;
+               }
  
-               pr_debug("%s: verifying source buffer...\n", thread_name);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->srcs, -1,
+               pr_debug("%s: verifying source buffer...\n", current->comm);
+               error_count = dmatest_verify(thread->srcs, 0, src_off,
                                 0, PATTERN_SRC, true);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->srcs, 0,
-                               src_off, PATTERN_SRC | PATTERN_COPY, true);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->srcs, 1,
-                               src_off + len, PATTERN_SRC, true);
-
-               pr_debug("%s: verifying dest buffer...\n", thread_name);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->dsts, -1,
+               error_count += dmatest_verify(thread->srcs, src_off,
+                               src_off + len, src_off,
+                               PATTERN_SRC | PATTERN_COPY, true);
+               error_count += dmatest_verify(thread->srcs, src_off + len,
+                               params->buf_size, src_off + len,
+                               PATTERN_SRC, true);
+
+               pr_debug("%s: verifying dest buffer...\n", current->comm);
+               error_count += dmatest_verify(thread->dsts, 0, dst_off,
                                 0, PATTERN_DST, false);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->dsts, 0,
-                               src_off, PATTERN_SRC | PATTERN_COPY, false);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->dsts, 1,
-                               dst_off + len, PATTERN_DST, false);
+               error_count += dmatest_verify(thread->dsts, dst_off,
+                               dst_off + len, src_off,
+                               PATTERN_SRC | PATTERN_COPY, false);
+               error_count += dmatest_verify(thread->dsts, dst_off + len,
+                               params->buf_size, dst_off + len,
+                               PATTERN_DST, false);
  
                 if (error_count) {
-                       thread_result_add(info, result, DMATEST_ET_VERIFY,
-                                         total_tests, src_off, dst_off,
-                                         len, error_count);
+                       result("data error", total_tests, src_off, dst_off,
+                              len, error_count);
                         failed_tests++;
                 } else {
-                       thread_result_add(info, result, DMATEST_ET_OK,
-                                         total_tests, src_off, dst_off,
-                                         len, 0);
+                       verbose_result("test passed", total_tests, src_off,
+                                      dst_off, len, 0);
                 }
         }
+       runtime = ktime_us_delta(ktime_get(), ktime);
  
         ret = 0;
         for (i = 0; thread->dsts[i]; i++)
@@ -802,20 +700,17 @@ err_srcbuf:
  err_srcs:
         kfree(pq_coefs);
  err_thread_type:
-       pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
-                       thread_name, total_tests, failed_tests, ret);
+       pr_info("%s: summary %u tests, %u failures %llu iops %llu KB/s (%d)\n",
+               current->comm, total_tests, failed_tests,
+               dmatest_persec(runtime, total_tests),
+               dmatest_KBs(runtime, total_len), ret);
  
         /* terminate all transfers on specified channels */
         if (ret)
                 dmaengine_terminate_all(chan);
  
         thread->done = true;
-
-       if (params->iterations > 0)
-               while (!kthread_should_stop()) {
-                       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
-                       interruptible_sleep_on(&wait_dmatest_exit);
-               }
+       wake_up(&thread_wait);
  
         return ret;
  }
@@ -828,9 +723,10 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
  
         list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
                 ret = kthread_stop(thread->task);
-               pr_debug("dmatest: thread %s exited with status %d\n",
-                               thread->task->comm, ret);
+               pr_debug("thread %s exited with status %d\n",
+                        thread->task->comm, ret);
                 list_del(&thread->node);
+               put_task_struct(thread->task);
                 kfree(thread);
         }
  
@@ -861,27 +757,27 @@ static int dmatest_add_threads(struct dmatest_info *info,
         for (i = 0; i < params->threads_per_chan; i++) {
                 thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
                 if (!thread) {
-                       pr_warning("dmatest: No memory for %s-%s%u\n",
-                                  dma_chan_name(chan), op, i);
-
+                       pr_warn("No memory for %s-%s%u\n",
+                               dma_chan_name(chan), op, i);
                         break;
                 }
                 thread->info = info;
                 thread->chan = dtc->chan;
                 thread->type = type;
                 smp_wmb();
-               thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+               thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
                                 dma_chan_name(chan), op, i);
                 if (IS_ERR(thread->task)) {
-                       pr_warning("dmatest: Failed to run thread %s-%s%u\n",
-                                       dma_chan_name(chan), op, i);
+                       pr_warn("Failed to create thread %s-%s%u\n",
+                               dma_chan_name(chan), op, i);
                         kfree(thread);
                         break;
                 }
  
                 /* srcbuf and dstbuf are allocated by the thread itself */
-
+               get_task_struct(thread->task);
                 list_add_tail(&thread->node, &dtc->threads);
+               wake_up_process(thread->task);
         }
  
         return i;
@@ -897,7 +793,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
  
         dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
         if (!dtc) {
-               pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+               pr_warn("No memory for %s\n", dma_chan_name(chan));
                 return -ENOMEM;
         }
  
@@ -917,7 +813,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
                 thread_count += cnt > 0 ? cnt : 0;
         }
  
-       pr_info("dmatest: Started %u threads using %s\n",
+       pr_info("Started %u threads using %s\n",
                 thread_count, dma_chan_name(chan));
  
         list_add_tail(&dtc->node, &info->channels);
@@ -937,20 +833,20 @@ static bool filter(struct dma_chan *chan, void *param)
                 return true;
  }
  
-static int __run_threaded_test(struct dmatest_info *info)
+static void request_channels(struct dmatest_info *info,
+                            enum dma_transaction_type type)
  {
         dma_cap_mask_t mask;
-       struct dma_chan *chan;
-       struct dmatest_params *params = &info->params;
-       int err = 0;
  
         dma_cap_zero(mask);
-       dma_cap_set(DMA_MEMCPY, mask);
+       dma_cap_set(type, mask);
         for (;;) {
+               struct dmatest_params *params = &info->params;
+               struct dma_chan *chan;
+
                 chan = dma_request_channel(mask, filter, params);
                 if (chan) {
-                       err = dmatest_add_channel(info, chan);
-                       if (err) {
+                       if (dmatest_add_channel(info, chan)) {
                                 dma_release_channel(chan);
                                 break; /* add_channel failed, punt */
                         }
@@ -960,22 +856,30 @@ static int __run_threaded_test(struct dmatest_info *info)
                     info->nr_channels >= params->max_channels)
                         break; /* we have all we need */
         }
-       return err;
  }
  
-#ifndef MODULE
-static int run_threaded_test(struct dmatest_info *info)
+static void run_threaded_test(struct dmatest_info *info)
  {
-       int ret;
+       struct dmatest_params *params = &info->params;
  
-       mutex_lock(&info->lock);
-       ret = __run_threaded_test(info);
-       mutex_unlock(&info->lock);
-       return ret;
+       /* Copy test parameters */
+       params->buf_size = test_buf_size;
+       strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
+       strlcpy(params->device, strim(test_device), sizeof(params->device));
+       params->threads_per_chan = threads_per_chan;
+       params->max_channels = max_channels;
+       params->iterations = iterations;
+       params->xor_sources = xor_sources;
+       params->pq_sources = pq_sources;
+       params->timeout = timeout;
+       params->noverify = noverify;
+
+       request_channels(info, DMA_MEMCPY);
+       request_channels(info, DMA_XOR);
+       request_channels(info, DMA_PQ);
  }
-#endif
  
-static void __stop_threaded_test(struct dmatest_info *info)
+static void stop_threaded_test(struct dmatest_info *info)
  {
         struct dmatest_chan *dtc, *_dtc;
         struct dma_chan *chan;
@@ -984,203 +888,86 @@ static void __stop_threaded_test(struct dmatest_info *info)
                 list_del(&dtc->node);
                 chan = dtc->chan;
                 dmatest_cleanup_channel(dtc);
-               pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan));
+               pr_debug("dropped channel %s\n", dma_chan_name(chan));
                 dma_release_channel(chan);
         }
  
         info->nr_channels = 0;
  }
  
-static void stop_threaded_test(struct dmatest_info *info)
+static void restart_threaded_test(struct dmatest_info *info, bool run)
  {
-       mutex_lock(&info->lock);
-       __stop_threaded_test(info);
-       mutex_unlock(&info->lock);
-}
-
-static int __restart_threaded_test(struct dmatest_info *info, bool run)
-{
-       struct dmatest_params *params = &info->params;
+       /* we might be called early to set run=, defer running until all
+        * parameters have been evaluated
+        */
+       if (!info->did_init)
+               return;
  
         /* Stop any running test first */
-       __stop_threaded_test(info);
-
-       if (run == false)
-               return 0;
-
-       /* Clear results from previous run */
-       result_free(info, NULL);
-
-       /* Copy test parameters */
-       params->buf_size = test_buf_size;
-       strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
-       strlcpy(params->device, strim(test_device), sizeof(params->device));
-       params->threads_per_chan = threads_per_chan;
-       params->max_channels = max_channels;
-       params->iterations = iterations;
-       params->xor_sources = xor_sources;
-       params->pq_sources = pq_sources;
-       params->timeout = timeout;
+       stop_threaded_test(info);
  
         /* Run test with new parameters */
-       return __run_threaded_test(info);
-}
-
-static bool __is_threaded_test_run(struct dmatest_info *info)
-{
-       struct dmatest_chan *dtc;
-
-       list_for_each_entry(dtc, &info->channels, node) {
-               struct dmatest_thread *thread;
-
-               list_for_each_entry(thread, &dtc->threads, node) {
-                       if (!thread->done)
-                               return true;
-               }
-       }
-
-       return false;
+       run_threaded_test(info);
  }
  
-static ssize_t dtf_read_run(struct file *file, char __user *user_buf,
-               size_t count, loff_t *ppos)
+static int dmatest_run_get(char *val, const struct kernel_param *kp)
  {
-       struct dmatest_info *info = file->private_data;
-       char buf[3];
+       struct dmatest_info *info = &test_info;
  
         mutex_lock(&info->lock);
-
-       if (__is_threaded_test_run(info)) {
-               buf[0] = 'Y';
+       if (is_threaded_test_run(info)) {
+               dmatest_run = true;
         } else {
-               __stop_threaded_test(info);
-               buf[0] = 'N';
+               stop_threaded_test(info);
+               dmatest_run = false;
         }
-
         mutex_unlock(&info->lock);
-       buf[1] = '\n';
-       buf[2] = 0x00;
-       return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t dtf_write_run(struct file *file, const char __user *user_buf,
-               size_t count, loff_t *ppos)
-{
-       struct dmatest_info *info = file->private_data;
-       char buf[16];
-       bool bv;
-       int ret = 0;
  
-       if (copy_from_user(buf, user_buf, min(count, (sizeof(buf) - 1))))
-               return -EFAULT;
-
-       if (strtobool(buf, &bv) == 0) {
-               mutex_lock(&info->lock);
-
-               if (__is_threaded_test_run(info))
-                       ret = -EBUSY;
-               else
-                       ret = __restart_threaded_test(info, bv);
-
-               mutex_unlock(&info->lock);
-       }
-
-       return ret ? ret : count;
+       return param_get_bool(val, kp);
  }
  
-static const struct file_operations dtf_run_fops = {
-       .read   = dtf_read_run,
-       .write  = dtf_write_run,
-       .open   = simple_open,
-       .llseek = default_llseek,
-};
-
-static int dtf_results_show(struct seq_file *sf, void *data)
+static int dmatest_run_set(const char *val, const struct kernel_param *kp)
  {
-       struct dmatest_info *info = sf->private;
-       struct dmatest_result *result;
-       struct dmatest_thread_result *tr;
-       unsigned int i;
+       struct dmatest_info *info = &test_info;
+       int ret;
  
-       mutex_lock(&info->results_lock);
-       list_for_each_entry(result, &info->results, node) {
-               list_for_each_entry(tr, &result->results, node) {
-                       seq_printf(sf, "%s\n",
-                               thread_result_get(result->name, tr));
-                       if (tr->type == DMATEST_ET_VERIFY_BUF) {
-                               for (i = 0; i < tr->vr->error_count; i++) {
-                                       seq_printf(sf, "\t%s\n",
-                                               verify_result_get_one(tr->vr, i));
-                               }
-                       }
-               }
+       mutex_lock(&info->lock);
+       ret = param_set_bool(val, kp);
+       if (ret) {
+               mutex_unlock(&info->lock);
+               return ret;
         }
  
-       mutex_unlock(&info->results_lock);
-       return 0;
-}
-
-static int dtf_results_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, dtf_results_show, inode->i_private);
-}
-
-static const struct file_operations dtf_results_fops = {
-       .open           = dtf_results_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int dmatest_register_dbgfs(struct dmatest_info *info)
-{
-       struct dentry *d;
-
-       d = debugfs_create_dir("dmatest", NULL);
-       if (IS_ERR(d))
-               return PTR_ERR(d);
-       if (!d)
-               goto err_root;
+       if (is_threaded_test_run(info))
+               ret = -EBUSY;
+       else if (dmatest_run)
+               restart_threaded_test(info, dmatest_run);
  
-       info->root = d;
-
-       /* Run or stop threaded test */
-       debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root, info,
-                           &dtf_run_fops);
-
-       /* Results of test in progress */
-       debugfs_create_file("results", S_IRUGO, info->root, info,
-                           &dtf_results_fops);
-
-       return 0;
+       mutex_unlock(&info->lock);
  
-err_root:
-       pr_err("dmatest: Failed to initialize debugfs\n");
-       return -ENOMEM;
+       return ret;
  }
  
  static int __init dmatest_init(void)
  {
         struct dmatest_info *info = &test_info;
-       int ret;
-
-       memset(info, 0, sizeof(*info));
+       struct dmatest_params *params = &info->params;
  
-       mutex_init(&info->lock);
-       INIT_LIST_HEAD(&info->channels);
+       if (dmatest_run) {
+               mutex_lock(&info->lock);
+               run_threaded_test(info);
+               mutex_unlock(&info->lock);
+       }
  
-       mutex_init(&info->results_lock);
-       INIT_LIST_HEAD(&info->results);
+       if (params->iterations && wait)
+               wait_event(thread_wait, !is_threaded_test_run(info));
  
-       ret = dmatest_register_dbgfs(info);
-       if (ret)
-               return ret;
+       /* module parameters are stable, inittime tests are started,
+        * let userspace take over 'run' control
+        */
+       info->did_init = true;
  
-#ifdef MODULE
         return 0;
-#else
-       return run_threaded_test(info);
-#endif
  }
  /* when compiled-in wait for drivers to load first */
  late_initcall(dmatest_init);
@@ -1189,9 +976,9 @@ static void __exit dmatest_exit(void)
  {
         struct dmatest_info *info = &test_info;
  
-       debugfs_remove_recursive(info->root);
+       mutex_lock(&info->lock);
         stop_threaded_test(info);
-       result_free(info, NULL);
+       mutex_unlock(&info->lock);
  }
  module_exit(dmatest_exit);
  
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c

index 89eb89f222846e0ff5d20cfc5e14619fc05d6600..7516be4677cf7e778ba3bef2482cae9179ff41a3 100644 (file)
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -85,10 +85,6 @@ static struct device *chan2dev(struct dma_chan *chan)
  {
         return &chan->dev->device;
  }
-static struct device *chan2parent(struct dma_chan *chan)
-{
-       return chan->dev->device.parent;
-}
  
  static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
  {
@@ -311,26 +307,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
         list_splice_init(&desc->tx_list, &dwc->free_list);
         list_move(&desc->desc_node, &dwc->free_list);
  
-       if (!is_slave_direction(dwc->direction)) {
-               struct device *parent = chan2parent(&dwc->chan);
-               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                               dma_unmap_single(parent, desc->lli.dar,
-                                       desc->total_len, DMA_FROM_DEVICE);
-                       else
-                               dma_unmap_page(parent, desc->lli.dar,
-                                       desc->total_len, DMA_FROM_DEVICE);
-               }
-               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                               dma_unmap_single(parent, desc->lli.sar,
-                                       desc->total_len, DMA_TO_DEVICE);
-                       else
-                               dma_unmap_page(parent, desc->lli.sar,
-                                       desc->total_len, DMA_TO_DEVICE);
-               }
-       }
-
+       dma_descriptor_unmap(txd);
         spin_unlock_irqrestore(&dwc->lock, flags);
  
         if (callback)
@@ -1098,13 +1075,13 @@ dwc_tx_status(struct dma_chan *chan,
         enum dma_status         ret;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS)
+       if (ret != DMA_COMPLETE)
                 dma_set_residue(txstate, dwc_get_residue(dwc));
  
         if (dwc->paused && ret == DMA_IN_PROGRESS)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c

index bef8a368c8ddcac29f4bd7b4832bd3d0fa4cbdb6..2539ea0cbc6394f918fb849ffd3c6f6ca73f33a0 100644 (file)
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -46,14 +46,21 @@
  #define EDMA_CHANS     64
  #endif /* CONFIG_ARCH_DAVINCI_DA8XX */
  
-/* Max of 16 segments per channel to conserve PaRAM slots */
-#define MAX_NR_SG              16
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be atleast the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires atleast 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG              20
  #define EDMA_MAX_SLOTS         MAX_NR_SG
  #define EDMA_DESCRIPTORS       16
  
  struct edma_desc {
         struct virt_dma_desc            vdesc;
         struct list_head                node;
+       int                             cyclic;
         int                             absync;
         int                             pset_nr;
         int                             processed;
@@ -167,8 +174,13 @@ static void edma_execute(struct edma_chan *echan)
          * then setup a link to the dummy slot, this results in all future
          * events being absorbed and that's OK because we're done
          */
-       if (edesc->processed == edesc->pset_nr)
-               edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
+       if (edesc->processed == edesc->pset_nr) {
+               if (edesc->cyclic)
+                       edma_link(echan->slot[nslots-1], echan->slot[1]);
+               else
+                       edma_link(echan->slot[nslots-1],
+                                 echan->ecc->dummy_slot);
+       }
  
         edma_resume(echan->ch_num);
  
@@ -250,6 +262,117 @@ static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
         return ret;
  }
  
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
+       dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+       enum dma_slave_buswidth dev_width, unsigned int dma_length,
+       enum dma_transfer_direction direction)
+{
+       struct edma_chan *echan = to_edma_chan(chan);
+       struct device *dev = chan->device->dev;
+       int acnt, bcnt, ccnt, cidx;
+       int src_bidx, dst_bidx, src_cidx, dst_cidx;
+       int absync;
+
+       acnt = dev_width;
+       /*
+        * If the maxburst is equal to the fifo width, use
+        * A-synced transfers. This allows for large contiguous
+        * buffer transfers using only one PaRAM set.
+        */
+       if (burst == 1) {
+               /*
+                * For the A-sync case, bcnt and ccnt are the remainder
+                * and quotient respectively of the division of:
+                * (dma_length / acnt) by (SZ_64K -1). This is so
+                * that in case bcnt over flows, we have ccnt to use.
+                * Note: In A-sync tranfer only, bcntrld is used, but it
+                * only applies for sg_dma_len(sg) >= SZ_64K.
+                * In this case, the best way adopted is- bccnt for the
+                * first frame will be the remainder below. Then for
+                * every successive frame, bcnt will be SZ_64K-1. This
+                * is assured as bcntrld = 0xffff in end of function.
+                */
+               absync = false;
+               ccnt = dma_length / acnt / (SZ_64K - 1);
+               bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+               /*
+                * If bcnt is non-zero, we have a remainder and hence an
+                * extra frame to transfer, so increment ccnt.
+                */
+               if (bcnt)
+                       ccnt++;
+               else
+                       bcnt = SZ_64K - 1;
+               cidx = acnt;
+       } else {
+               /*
+                * If maxburst is greater than the fifo address_width,
+                * use AB-synced transfers where A count is the fifo
+                * address_width and B count is the maxburst. In this
+                * case, we are limited to transfers of C count frames
+                * of (address_width * maxburst) where C count is limited
+                * to SZ_64K-1. This places an upper bound on the length
+                * of an SG segment that can be handled.
+                */
+               absync = true;
+               bcnt = burst;
+               ccnt = dma_length / (acnt * bcnt);
+               if (ccnt > (SZ_64K - 1)) {
+                       dev_err(dev, "Exceeded max SG segment size\n");
+                       return -EINVAL;
+               }
+               cidx = acnt * bcnt;
+       }
+
+       if (direction == DMA_MEM_TO_DEV) {
+               src_bidx = acnt;
+               src_cidx = cidx;
+               dst_bidx = 0;
+               dst_cidx = 0;
+       } else if (direction == DMA_DEV_TO_MEM)  {
+               src_bidx = 0;
+               src_cidx = 0;
+               dst_bidx = acnt;
+               dst_cidx = cidx;
+       } else {
+               dev_err(dev, "%s: direction not implemented yet\n", __func__);
+               return -EINVAL;
+       }
+
+       pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+       /* Configure A or AB synchronized transfers */
+       if (absync)
+               pset->opt |= SYNCDIM;
+
+       pset->src = src_addr;
+       pset->dst = dst_addr;
+
+       pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+       pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+       pset->a_b_cnt = bcnt << 16 | acnt;
+       pset->ccnt = ccnt;
+       /*
+        * Only time when (bcntrld) auto reload is required is for
+        * A-sync case, and in this case, a requirement of reload value
+        * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+        * and then later will be populated by edma_execute.
+        */
+       pset->link_bcntrld = 0xffffffff;
+       return absync;
+}
+
  static struct dma_async_tx_descriptor *edma_prep_slave_sg(
         struct dma_chan *chan, struct scatterlist *sgl,
         unsigned int sg_len, enum dma_transfer_direction direction,
@@ -258,23 +381,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
         struct edma_chan *echan = to_edma_chan(chan);
         struct device *dev = chan->device->dev;
         struct edma_desc *edesc;
-       dma_addr_t dev_addr;
+       dma_addr_t src_addr = 0, dst_addr = 0;
         enum dma_slave_buswidth dev_width;
         u32 burst;
         struct scatterlist *sg;
-       int acnt, bcnt, ccnt, src, dst, cidx;
-       int src_bidx, dst_bidx, src_cidx, dst_cidx;
-       int i, nslots;
+       int i, nslots, ret;
  
         if (unlikely(!echan || !sgl || !sg_len))
                 return NULL;
  
         if (direction == DMA_DEV_TO_MEM) {
-               dev_addr = echan->cfg.src_addr;
+               src_addr = echan->cfg.src_addr;
                 dev_width = echan->cfg.src_addr_width;
                 burst = echan->cfg.src_maxburst;
         } else if (direction == DMA_MEM_TO_DEV) {
-               dev_addr = echan->cfg.dst_addr;
+               dst_addr = echan->cfg.dst_addr;
                 dev_width = echan->cfg.dst_addr_width;
                 burst = echan->cfg.dst_maxburst;
         } else {
@@ -307,7 +428,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
                         if (echan->slot[i] < 0) {
                                 kfree(edesc);
                                 dev_err(dev, "Failed to allocate slot\n");
-                               kfree(edesc);
                                 return NULL;
                         }
                 }
@@ -315,64 +435,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
  
         /* Configure PaRAM sets for each SG */
         for_each_sg(sgl, sg, sg_len, i) {
-
-               acnt = dev_width;
-
-               /*
-                * If the maxburst is equal to the fifo width, use
-                * A-synced transfers. This allows for large contiguous
-                * buffer transfers using only one PaRAM set.
-                */
-               if (burst == 1) {
-                       edesc->absync = false;
-                       ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
-                       bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
-                       if (bcnt)
-                               ccnt++;
-                       else
-                               bcnt = SZ_64K - 1;
-                       cidx = acnt;
-               /*
-                * If maxburst is greater than the fifo address_width,
-                * use AB-synced transfers where A count is the fifo
-                * address_width and B count is the maxburst. In this
-                * case, we are limited to transfers of C count frames
-                * of (address_width * maxburst) where C count is limited
-                * to SZ_64K-1. This places an upper bound on the length
-                * of an SG segment that can be handled.
-                */
-               } else {
-                       edesc->absync = true;
-                       bcnt = burst;
-                       ccnt = sg_dma_len(sg) / (acnt * bcnt);
-                       if (ccnt > (SZ_64K - 1)) {
-                               dev_err(dev, "Exceeded max SG segment size\n");
-                               kfree(edesc);
-                               return NULL;
-                       }
-                       cidx = acnt * bcnt;
+               /* Get address for each SG */
+               if (direction == DMA_DEV_TO_MEM)
+                       dst_addr = sg_dma_address(sg);
+               else
+                       src_addr = sg_dma_address(sg);
+
+               ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+                                      dst_addr, burst, dev_width,
+                                      sg_dma_len(sg), direction);
+               if (ret < 0) {
+                       kfree(edesc);
+                       return NULL;
                 }
  
-               if (direction == DMA_MEM_TO_DEV) {
-                       src = sg_dma_address(sg);
-                       dst = dev_addr;
-                       src_bidx = acnt;
-                       src_cidx = cidx;
-                       dst_bidx = 0;
-                       dst_cidx = 0;
-               } else {
-                       src = dev_addr;
-                       dst = sg_dma_address(sg);
-                       src_bidx = 0;
-                       src_cidx = 0;
-                       dst_bidx = acnt;
-                       dst_cidx = cidx;
-               }
-
-               edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
-               /* Configure A or AB synchronized transfers */
-               if (edesc->absync)
-                       edesc->pset[i].opt |= SYNCDIM;
+               edesc->absync = ret;
  
                 /* If this is the last in a current SG set of transactions,
                    enable interrupts so that next set is processed */
@@ -382,17 +459,138 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
                 /* If this is the last set, enable completion interrupt flag */
                 if (i == sg_len - 1)
                         edesc->pset[i].opt |= TCINTEN;
+       }
  
-               edesc->pset[i].src = src;
-               edesc->pset[i].dst = dst;
+       return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
  
-               edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
-               edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+       struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+       size_t period_len, enum dma_transfer_direction direction,
+       unsigned long tx_flags, void *context)
+{
+       struct edma_chan *echan = to_edma_chan(chan);
+       struct device *dev = chan->device->dev;
+       struct edma_desc *edesc;
+       dma_addr_t src_addr, dst_addr;
+       enum dma_slave_buswidth dev_width;
+       u32 burst;
+       int i, ret, nslots;
+
+       if (unlikely(!echan || !buf_len || !period_len))
+               return NULL;
+
+       if (direction == DMA_DEV_TO_MEM) {
+               src_addr = echan->cfg.src_addr;
+               dst_addr = buf_addr;
+               dev_width = echan->cfg.src_addr_width;
+               burst = echan->cfg.src_maxburst;
+       } else if (direction == DMA_MEM_TO_DEV) {
+               src_addr = buf_addr;
+               dst_addr = echan->cfg.dst_addr;
+               dev_width = echan->cfg.dst_addr_width;
+               burst = echan->cfg.dst_maxburst;
+       } else {
+               dev_err(dev, "%s: bad direction?\n", __func__);
+               return NULL;
+       }
+
+       if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+               dev_err(dev, "Undefined slave buswidth\n");
+               return NULL;
+       }
+
+       if (unlikely(buf_len % period_len)) {
+               dev_err(dev, "Period should be multiple of Buffer length\n");
+               return NULL;
+       }
+
+       nslots = (buf_len / period_len) + 1;
+
+       /*
+        * Cyclic DMA users such as audio cannot tolerate delays introduced
+        * by cases where the number of periods is more than the maximum
+        * number of SGs the EDMA driver can handle at a time. For DMA types
+        * such as Slave SGs, such delays are tolerable and synchronized,
+        * but the synchronization is difficult to achieve with Cyclic and
+        * cannot be guaranteed, so we error out early.
+        */
+       if (nslots > MAX_NR_SG)
+               return NULL;
+
+       edesc = kzalloc(sizeof(*edesc) + nslots *
+               sizeof(edesc->pset[0]), GFP_ATOMIC);
+       if (!edesc) {
+               dev_dbg(dev, "Failed to allocate a descriptor\n");
+               return NULL;
+       }
+
+       edesc->cyclic = 1;
+       edesc->pset_nr = nslots;
+
+       dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
+       dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
+       dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
+
+       for (i = 0; i < nslots; i++) {
+               /* Allocate a PaRAM slot, if needed */
+               if (echan->slot[i] < 0) {
+                       echan->slot[i] =
+                               edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+                                               EDMA_SLOT_ANY);
+                       if (echan->slot[i] < 0) {
+                               dev_err(dev, "Failed to allocate slot\n");
+                               return NULL;
+                       }
+               }
+
+               if (i == nslots - 1) {
+                       memcpy(&edesc->pset[i], &edesc->pset[0],
+                              sizeof(edesc->pset[0]));
+                       break;
+               }
+
+               ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+                                      dst_addr, burst, dev_width, period_len,
+                                      direction);
+               if (ret < 0)
+                       return NULL;
  
-               edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
-               edesc->pset[i].ccnt = ccnt;
-               edesc->pset[i].link_bcntrld = 0xffffffff;
+               if (direction == DMA_DEV_TO_MEM)
+                       dst_addr += period_len;
+               else
+                       src_addr += period_len;
  
+               dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+               dev_dbg(dev,
+                       "\n pset[%d]:\n"
+                       "  chnum\t%d\n"
+                       "  slot\t%d\n"
+                       "  opt\t%08x\n"
+                       "  src\t%08x\n"
+                       "  dst\t%08x\n"
+                       "  abcnt\t%08x\n"
+                       "  ccnt\t%08x\n"
+                       "  bidx\t%08x\n"
+                       "  cidx\t%08x\n"
+                       "  lkrld\t%08x\n",
+                       i, echan->ch_num, echan->slot[i],
+                       edesc->pset[i].opt,
+                       edesc->pset[i].src,
+                       edesc->pset[i].dst,
+                       edesc->pset[i].a_b_cnt,
+                       edesc->pset[i].ccnt,
+                       edesc->pset[i].src_dst_bidx,
+                       edesc->pset[i].src_dst_cidx,
+                       edesc->pset[i].link_bcntrld);
+
+               edesc->absync = ret;
+
+               /*
+                * Enable interrupts for every period because callback
+                * has to be called for every period.
+                */
+               edesc->pset[i].opt |= TCINTEN;
         }
  
         return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -406,30 +604,34 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
         unsigned long flags;
         struct edmacc_param p;
  
-       /* Pause the channel */
-       edma_pause(echan->ch_num);
+       edesc = echan->edesc;
+
+       /* Pause the channel for non-cyclic */
+       if (!edesc || (edesc && !edesc->cyclic))
+               edma_pause(echan->ch_num);
  
         switch (ch_status) {
-       case DMA_COMPLETE:
+       case EDMA_DMA_COMPLETE:
                 spin_lock_irqsave(&echan->vchan.lock, flags);
  
-               edesc = echan->edesc;
                 if (edesc) {
-                       if (edesc->processed == edesc->pset_nr) {
+                       if (edesc->cyclic) {
+                               vchan_cyclic_callback(&edesc->vdesc);
+                       } else if (edesc->processed == edesc->pset_nr) {
                                 dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
                                 edma_stop(echan->ch_num);
                                 vchan_cookie_complete(&edesc->vdesc);
+                               edma_execute(echan);
                         } else {
                                 dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+                               edma_execute(echan);
                         }
-
-                       edma_execute(echan);
                 }
  
                 spin_unlock_irqrestore(&echan->vchan.lock, flags);
  
                 break;
-       case DMA_CC_ERROR:
+       case EDMA_DMA_CC_ERROR:
                 spin_lock_irqsave(&echan->vchan.lock, flags);
  
                 edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
@@ -579,7 +781,7 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
         unsigned long flags;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS || !txstate)
+       if (ret == DMA_COMPLETE || !txstate)
                 return ret;
  
         spin_lock_irqsave(&echan->vchan.lock, flags);
@@ -619,6 +821,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
                           struct device *dev)
  {
         dma->device_prep_slave_sg = edma_prep_slave_sg;
+       dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
         dma->device_alloc_chan_resources = edma_alloc_chan_resources;
         dma->device_free_chan_resources = edma_free_chan_resources;
         dma->device_issue_pending = edma_issue_pending;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c

index 591cd8c63abbcb081a4cd2ca264ed118f7f3d782..cb4bf682a70863e6253396eb717cc7e9ae86e453 100644 (file)
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -733,28 +733,6 @@ static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
         spin_unlock_irqrestore(&edmac->lock, flags);
  }
  
-static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
-{
-       struct device *dev = desc->txd.chan->device->dev;
-
-       if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       dma_unmap_single(dev, desc->src_addr, desc->size,
-                                        DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(dev, desc->src_addr, desc->size,
-                                      DMA_TO_DEVICE);
-       }
-       if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       dma_unmap_single(dev, desc->dst_addr, desc->size,
-                                        DMA_FROM_DEVICE);
-               else
-                       dma_unmap_page(dev, desc->dst_addr, desc->size,
-                                      DMA_FROM_DEVICE);
-       }
-}
-
  static void ep93xx_dma_tasklet(unsigned long data)
  {
         struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
@@ -787,13 +765,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
  
         /* Now we can release all the chained descriptors */
         list_for_each_entry_safe(desc, d, &list, node) {
-               /*
-                * For the memcpy channels the API requires us to unmap the
-                * buffers unless requested otherwise.
-                */
-               if (!edmac->chan.private)
-                       ep93xx_dma_unmap_buffers(desc);
-
+               dma_descriptor_unmap(&desc->txd);
                 ep93xx_dma_desc_put(edmac, desc);
         }
  
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c

index 61517dd0d0b73b7077be4acc5cf075792a00d86e..7086a16a55f2ed488573e475e615600c80be767e 100644 (file)
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -870,22 +870,7 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
         /* Run any dependencies */
         dma_run_dependencies(txd);
  
-       /* Unmap the dst buffer, if requested */
-       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
-               else
-                       dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
-       }
-
-       /* Unmap the src buffer, if requested */
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
-       }
-
+       dma_descriptor_unmap(txd);
  #ifdef FSL_DMA_LD_DEBUG
         chan_dbg(chan, "LD %p free\n", desc);
  #endif
@@ -1255,7 +1240,9 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
         WARN_ON(fdev->feature != chan->feature);
  
         chan->dev = fdev->dev;
-       chan->id = ((res.start - 0x100) & 0xfff) >> 7;
+       chan->id = (res.start & 0xfff) < 0x300 ?
+                  ((res.start - 0x100) & 0xfff) >> 7 :
+                  ((res.start - 0x200) & 0xfff) >> 7;
         if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
                 dev_err(fdev->dev, "too many channels for device\n");
                 err = -EINVAL;
@@ -1428,6 +1415,7 @@ static int fsldma_of_remove(struct platform_device *op)
  }
  
  static const struct of_device_id fsldma_of_ids[] = {
+       { .compatible = "fsl,elo3-dma", },
         { .compatible = "fsl,eloplus-dma", },
         { .compatible = "fsl,elo-dma", },
         {}
@@ -1449,7 +1437,7 @@ static struct platform_driver fsldma_of_driver = {
  
  static __init int fsldma_init(void)
  {
-       pr_info("Freescale Elo / Elo Plus DMA driver\n");
+       pr_info("Freescale Elo series DMA driver\n");
         return platform_driver_register(&fsldma_of_driver);
  }
  
@@ -1461,5 +1449,5 @@ static void __exit fsldma_exit(void)
  subsys_initcall(fsldma_init);
  module_exit(fsldma_exit);
  
-MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_DESCRIPTION("Freescale Elo series DMA driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h

index f5c38791fc7466f5d683b2ee49e718d8c29d5ca6..1ffc24484d23cdb0edd1e6011605aff5c9a6eb07 100644 (file)
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -112,7 +112,7 @@ struct fsldma_chan_regs {
  };
  
  struct fsldma_chan;
-#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
  
  struct fsldma_device {
         void __iomem *regs;     /* DGSR register base */
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c

index 55852c02679143f453286f189e68fd777ea1afaa..6f9ac2022abd8b3d23c739bc7face1cf662a39f3 100644 (file)
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -572,9 +572,11 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
  
                 imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
  
-               dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
-                       "dma_length=%d\n", __func__, imxdmac->channel,
-                       d->dest, d->src, d->len);
+               dev_dbg(imxdma->dev,
+                       "%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n",
+                       __func__, imxdmac->channel,
+                       (unsigned long long)d->dest,
+                       (unsigned long long)d->src, d->len);
  
                 break;
         /* Cyclic transfer is the same as slave_sg with special sg configuration. */
@@ -586,20 +588,22 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
                         imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
                                          DMA_CCR(imxdmac->channel));
  
-                       dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
-                               "total length=%d dev_addr=0x%08x (dev2mem)\n",
-                               __func__, imxdmac->channel, d->sg, d->sgcount,
-                               d->len, imxdmac->per_address);
+                       dev_dbg(imxdma->dev,
+                               "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n",
+                               __func__, imxdmac->channel,
+                               d->sg, d->sgcount, d->len,
+                               (unsigned long long)imxdmac->per_address);
                 } else if (d->direction == DMA_MEM_TO_DEV) {
                         imx_dmav1_writel(imxdma, imxdmac->per_address,
                                          DMA_DAR(imxdmac->channel));
                         imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
                                          DMA_CCR(imxdmac->channel));
  
-                       dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
-                               "total length=%d dev_addr=0x%08x (mem2dev)\n",
-                               __func__, imxdmac->channel, d->sg, d->sgcount,
-                               d->len, imxdmac->per_address);
+                       dev_dbg(imxdma->dev,
+                               "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n",
+                               __func__, imxdmac->channel,
+                               d->sg, d->sgcount, d->len,
+                               (unsigned long long)imxdmac->per_address);
                 } else {
                         dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
                                 __func__, imxdmac->channel);
@@ -771,7 +775,7 @@ static int imxdma_alloc_chan_resources(struct dma_chan *chan)
                 desc->desc.tx_submit = imxdma_tx_submit;
                 /* txd.flags will be overwritten in prep funcs */
                 desc->desc.flags = DMA_CTRL_ACK;
-               desc->status = DMA_SUCCESS;
+               desc->status = DMA_COMPLETE;
  
                 list_add_tail(&desc->node, &imxdmac->ld_free);
                 imxdmac->descs_allocated++;
@@ -870,7 +874,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
         int i;
         unsigned int periods = buf_len / period_len;
  
-       dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+       dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n",
                         __func__, imxdmac->channel, buf_len, period_len);
  
         if (list_empty(&imxdmac->ld_free) ||
@@ -926,8 +930,9 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
         struct imxdma_engine *imxdma = imxdmac->imxdma;
         struct imxdma_desc *desc;
  
-       dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
-                       __func__, imxdmac->channel, src, dest, len);
+       dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n",
+               __func__, imxdmac->channel, (unsigned long long)src,
+               (unsigned long long)dest, len);
  
         if (list_empty(&imxdmac->ld_free) ||
             imxdma_chan_is_doing_cyclic(imxdmac))
@@ -956,9 +961,10 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
         struct imxdma_engine *imxdma = imxdmac->imxdma;
         struct imxdma_desc *desc;
  
-       dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
-               "   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
-               imxdmac->channel, xt->src_start, xt->dst_start,
+       dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n"
+               "   src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__,
+               imxdmac->channel, (unsigned long long)xt->src_start,
+               (unsigned long long) xt->dst_start,
                 xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
                 xt->numf, xt->frame_size);
  
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c

index c1fd504cae282491969886b0d0336784229e422d..c75679d420286c522679cdb9c344549c97e7a0c4 100644 (file)
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -638,7 +638,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
         if (error)
                 sdmac->status = DMA_ERROR;
         else
-               sdmac->status = DMA_SUCCESS;
+               sdmac->status = DMA_COMPLETE;
  
         dma_cookie_complete(&sdmac->desc);
         if (sdmac->desc.callback)
@@ -1089,8 +1089,8 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
                         param &= ~BD_CONT;
                 }
  
-               dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
-                               i, count, sg->dma_address,
+               dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+                               i, count, (u64)sg->dma_address,
                                 param & BD_WRAP ? "wrap" : "",
                                 param & BD_INTR ? " intr" : "");
  
@@ -1163,8 +1163,8 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
                 if (i + 1 == num_periods)
                         param |= BD_WRAP;
  
-               dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
-                               i, period_len, dma_addr,
+               dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+                               i, period_len, (u64)dma_addr,
                                 param & BD_WRAP ? "wrap" : "",
                                 param & BD_INTR ? " intr" : "");
  
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c

index a975ebebea8aaf9b8950497eefdcaf3793d930d2..1aab8130efa1c75ae51906938447c91d12a66ddc 100644 (file)
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -309,7 +309,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
                 callback_txd(param_txd);
         }
         if (midc->raw_tfr) {
-               desc->status = DMA_SUCCESS;
+               desc->status = DMA_COMPLETE;
                 if (desc->lli != NULL) {
                         pci_pool_free(desc->lli_pool, desc->lli,
                                                 desc->lli_phys);
@@ -481,7 +481,7 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
         enum dma_status ret;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS) {
+       if (ret != DMA_COMPLETE) {
                 spin_lock_bh(&midc->lock);
                 midc_scan_descriptors(to_middma_device(chan->device), midc);
                 spin_unlock_bh(&midc->lock);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c

index 5ff6fc1819dc6a2e90c035956b23e23c56f9bb5d..1a49c777607c50d313482f3ead21c19572a1cf8d 100644 (file)
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -531,21 +531,6 @@ static void ioat1_cleanup_event(unsigned long data)
         writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
  }
  
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-                   size_t len, struct ioat_dma_descriptor *hw)
-{
-       struct pci_dev *pdev = chan->device->pdev;
-       size_t offset = len - hw->size;
-
-       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-               ioat_unmap(pdev, hw->dst_addr - offset, len,
-                          PCI_DMA_FROMDEVICE, flags, 1);
-
-       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               ioat_unmap(pdev, hw->src_addr - offset, len,
-                          PCI_DMA_TODEVICE, flags, 0);
-}
-
  dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
  {
         dma_addr_t phys_complete;
@@ -602,7 +587,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
                 dump_desc_dbg(ioat, desc);
                 if (tx->cookie) {
                         dma_cookie_complete(tx);
-                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       dma_descriptor_unmap(tx);
                         ioat->active -= desc->hw->tx_cnt;
                         if (tx->callback) {
                                 tx->callback(tx->callback_param);
@@ -733,7 +718,7 @@ ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
         enum dma_status ret;
  
         ret = dma_cookie_status(c, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         device->cleanup_fn((unsigned long) c);
@@ -833,8 +818,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
  
         dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
         dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-       flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-               DMA_PREP_INTERRUPT;
+       flags = DMA_PREP_INTERRUPT;
         tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
                                                    IOAT_TEST_SIZE, flags);
         if (!tx) {
@@ -859,7 +843,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
  
         if (tmo == 0 ||
             dma->device_tx_status(dma_chan, cookie, NULL)
-                                       != DMA_SUCCESS) {
+                                       != DMA_COMPLETE) {
                 dev_err(dev, "Self-test copy timed out, disabling\n");
                 err = -ENODEV;
                 goto unmap_dma;
@@ -885,8 +869,7 @@ static char ioat_interrupt_style[32] = "msix";
  module_param_string(ioat_interrupt_style, ioat_interrupt_style,
                     sizeof(ioat_interrupt_style), 0644);
  MODULE_PARM_DESC(ioat_interrupt_style,
-                "set ioat interrupt style: msix (default), "
-                "msix-single-vector, msi, intx)");
+                "set ioat interrupt style: msix (default), msi, intx");
  
  /**
   * ioat_dma_setup_interrupts - setup interrupt handler
@@ -904,8 +887,6 @@ int ioat_dma_setup_interrupts(struct ioatdma_device *device)
  
         if (!strcmp(ioat_interrupt_style, "msix"))
                 goto msix;
-       if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
-               goto msix_single_vector;
         if (!strcmp(ioat_interrupt_style, "msi"))
                 goto msi;
         if (!strcmp(ioat_interrupt_style, "intx"))
@@ -920,10 +901,8 @@ msix:
                 device->msix_entries[i].entry = i;
  
         err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
-       if (err < 0)
+       if (err)
                 goto msi;
-       if (err > 0)
-               goto msix_single_vector;
  
         for (i = 0; i < msixcnt; i++) {
                 msix = &device->msix_entries[i];
@@ -937,29 +916,13 @@ msix:
                                 chan = ioat_chan_by_index(device, j);
                                 devm_free_irq(dev, msix->vector, chan);
                         }
-                       goto msix_single_vector;
+                       goto msi;
                 }
         }
         intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
         device->irq_mode = IOAT_MSIX;
         goto done;
  
-msix_single_vector:
-       msix = &device->msix_entries[0];
-       msix->entry = 0;
-       err = pci_enable_msix(pdev, device->msix_entries, 1);
-       if (err)
-               goto msi;
-
-       err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
-                              "ioat-msix", device);
-       if (err) {
-               pci_disable_msix(pdev);
-               goto msi;
-       }
-       device->irq_mode = IOAT_MSIX_SINGLE;
-       goto done;
-
  msi:
         err = pci_enable_msi(pdev);
         if (err)
@@ -971,7 +934,7 @@ msi:
                 pci_disable_msi(pdev);
                 goto intx;
         }
-       device->irq_mode = IOAT_MSIX;
+       device->irq_mode = IOAT_MSI;
         goto done;
  
  intx:
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h

index 54fb7b9ff9aaa4afb88c823b3a129a22440a9320..11fb877ddca9a9b0888d23952dea8fb48245b617 100644 (file)
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -52,7 +52,6 @@
  enum ioat_irq_mode {
         IOAT_NOIRQ = 0,
         IOAT_MSIX,
-       IOAT_MSIX_SINGLE,
         IOAT_MSI,
         IOAT_INTX
  };
@@ -83,7 +82,6 @@ struct ioatdma_device {
         struct pci_pool *completion_pool;
  #define MAX_SED_POOLS  5
         struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
-       struct kmem_cache *sed_pool;
         struct dma_device common;
         u8 version;
         struct msix_entry msix_entries[4];
@@ -342,16 +340,6 @@ static inline bool is_ioat_bug(unsigned long err)
         return !!err;
  }
  
-static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
-                             int direction, enum dma_ctrl_flags flags, bool dst)
-{
-       if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
-           (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
-               pci_unmap_single(pdev, addr, len, direction);
-       else
-               pci_unmap_page(pdev, addr, len, direction);
-}
-
  int ioat_probe(struct ioatdma_device *device);
  int ioat_register(struct ioatdma_device *device);
  int ioat1_dma_probe(struct ioatdma_device *dev, int dca);
@@ -363,8 +351,6 @@ void ioat_init_channel(struct ioatdma_device *device,
                        struct ioat_chan_common *chan, int idx);
  enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
                                    struct dma_tx_state *txstate);
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-                   size_t len, struct ioat_dma_descriptor *hw);
  bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
                            dma_addr_t *phys_complete);
  void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c

index b925e1b1d139bddbc6edf86f34d4b943ebfb086c..5d3affe7e976165ec5576ac8c6551dd438af7786 100644 (file)
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -148,7 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
                 tx = &desc->txd;
                 dump_desc_dbg(ioat, desc);
                 if (tx->cookie) {
-                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       dma_descriptor_unmap(tx);
                         dma_cookie_complete(tx);
                         if (tx->callback) {
                                 tx->callback(tx->callback_param);
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h

index 212d584fe4272a37d947cc72b95e836d801b77b1..470292767e68e81e390e1b9065954fd2546e04e6 100644 (file)
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -157,7 +157,6 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
  
  int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
  int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
-void ioat3_dma_remove(struct ioatdma_device *dev);
  struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
  struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
  int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c

index d8ececaf1b57082cc5709aca68714542657b5566..820817e97e626a498561a9e5f0f3a61f22ffc9fa 100644 (file)
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -67,6 +67,8 @@
  #include "dma.h"
  #include "dma_v2.h"
  
+extern struct kmem_cache *ioat3_sed_cache;
+
  /* ioat hardware assumes at least two sources for raid operations */
  #define src_cnt_to_sw(x) ((x) + 2)
  #define src_cnt_to_hw(x) ((x) - 2)
@@ -87,22 +89,8 @@ static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
  static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
                                         0, 1, 2, 3, 4, 5, 6 };
  
-/*
- * technically sources 1 and 2 do not require SED, but the op will have
- * at least 9 descriptors so that's irrelevant.
- */
-static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                     1, 1, 1, 1, 1, 1, 1 };
-
  static void ioat3_eh(struct ioat2_dma_chan *ioat);
  
-static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
-{
-       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
-
-       return raw->field[xor_idx_to_field[idx]];
-}
-
  static void xor_set_src(struct ioat_raw_descriptor *descs[2],
                         dma_addr_t addr, u32 offset, int idx)
  {
@@ -135,12 +123,6 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2],
         pq->coef[idx] = coef;
  }
  
-static int sed_get_pq16_pool_idx(int src_cnt)
-{
-
-       return pq16_idx_to_sed[src_cnt];
-}
-
  static bool is_jf_ioat(struct pci_dev *pdev)
  {
         switch (pdev->device) {
@@ -272,7 +254,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
         struct ioat_sed_ent *sed;
         gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
  
-       sed = kmem_cache_alloc(device->sed_pool, flags);
+       sed = kmem_cache_alloc(ioat3_sed_cache, flags);
         if (!sed)
                 return NULL;
  
@@ -280,7 +262,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
         sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
                                  flags, &sed->dma);
         if (!sed->hw) {
-               kmem_cache_free(device->sed_pool, sed);
+               kmem_cache_free(ioat3_sed_cache, sed);
                 return NULL;
         }
  
@@ -293,165 +275,7 @@ static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *s
                 return;
  
         dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
-       kmem_cache_free(device->sed_pool, sed);
-}
-
-static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
-                           struct ioat_ring_ent *desc, int idx)
-{
-       struct ioat_chan_common *chan = &ioat->base;
-       struct pci_dev *pdev = chan->device->pdev;
-       size_t len = desc->len;
-       size_t offset = len - desc->hw->size;
-       struct dma_async_tx_descriptor *tx = &desc->txd;
-       enum dma_ctrl_flags flags = tx->flags;
-
-       switch (desc->hw->ctl_f.op) {
-       case IOAT_OP_COPY:
-               if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
-                       ioat_dma_unmap(chan, flags, len, desc->hw);
-               break;
-       case IOAT_OP_XOR_VAL:
-       case IOAT_OP_XOR: {
-               struct ioat_xor_descriptor *xor = desc->xor;
-               struct ioat_ring_ent *ext;
-               struct ioat_xor_ext_descriptor *xor_ex = NULL;
-               int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
-               struct ioat_raw_descriptor *descs[2];
-               int i;
-
-               if (src_cnt > 5) {
-                       ext = ioat2_get_ring_ent(ioat, idx + 1);
-                       xor_ex = ext->xor_ex;
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       descs[0] = (struct ioat_raw_descriptor *) xor;
-                       descs[1] = (struct ioat_raw_descriptor *) xor_ex;
-                       for (i = 0; i < src_cnt; i++) {
-                               dma_addr_t src = xor_get_src(descs, i);
-
-                               ioat_unmap(pdev, src - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 0);
-                       }
-
-                       /* dest is a source in xor validate operations */
-                       if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
-                               ioat_unmap(pdev, xor->dst_addr - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 1);
-                               break;
-                       }
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-                       ioat_unmap(pdev, xor->dst_addr - offset, len,
-                                  PCI_DMA_FROMDEVICE, flags, 1);
-               break;
-       }
-       case IOAT_OP_PQ_VAL:
-       case IOAT_OP_PQ: {
-               struct ioat_pq_descriptor *pq = desc->pq;
-               struct ioat_ring_ent *ext;
-               struct ioat_pq_ext_descriptor *pq_ex = NULL;
-               int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
-               struct ioat_raw_descriptor *descs[2];
-               int i;
-
-               if (src_cnt > 3) {
-                       ext = ioat2_get_ring_ent(ioat, idx + 1);
-                       pq_ex = ext->pq_ex;
-               }
-
-               /* in the 'continue' case don't unmap the dests as sources */
-               if (dmaf_p_disabled_continue(flags))
-                       src_cnt--;
-               else if (dmaf_continue(flags))
-                       src_cnt -= 3;
-
-               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       descs[0] = (struct ioat_raw_descriptor *) pq;
-                       descs[1] = (struct ioat_raw_descriptor *) pq_ex;
-                       for (i = 0; i < src_cnt; i++) {
-                               dma_addr_t src = pq_get_src(descs, i);
-
-                               ioat_unmap(pdev, src - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 0);
-                       }
-
-                       /* the dests are sources in pq validate operations */
-                       if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-                               if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                                       ioat_unmap(pdev, pq->p_addr - offset,
-                                                  len, PCI_DMA_TODEVICE, flags, 0);
-                               if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                                       ioat_unmap(pdev, pq->q_addr - offset,
-                                                  len, PCI_DMA_TODEVICE, flags, 0);
-                               break;
-                       }
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                               ioat_unmap(pdev, pq->p_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-                       if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                               ioat_unmap(pdev, pq->q_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-               }
-               break;
-       }
-       case IOAT_OP_PQ_16S:
-       case IOAT_OP_PQ_VAL_16S: {
-               struct ioat_pq_descriptor *pq = desc->pq;
-               int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
-               struct ioat_raw_descriptor *descs[4];
-               int i;
-
-               /* in the 'continue' case don't unmap the dests as sources */
-               if (dmaf_p_disabled_continue(flags))
-                       src_cnt--;
-               else if (dmaf_continue(flags))
-                       src_cnt -= 3;
-
-               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       descs[0] = (struct ioat_raw_descriptor *)pq;
-                       descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
-                       descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
-                       for (i = 0; i < src_cnt; i++) {
-                               dma_addr_t src = pq16_get_src(descs, i);
-
-                               ioat_unmap(pdev, src - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 0);
-                       }
-
-                       /* the dests are sources in pq validate operations */
-                       if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-                               if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                                       ioat_unmap(pdev, pq->p_addr - offset,
-                                                  len, PCI_DMA_TODEVICE,
-                                                  flags, 0);
-                               if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                                       ioat_unmap(pdev, pq->q_addr - offset,
-                                                  len, PCI_DMA_TODEVICE,
-                                                  flags, 0);
-                               break;
-                       }
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                               ioat_unmap(pdev, pq->p_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-                       if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                               ioat_unmap(pdev, pq->q_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-               }
-               break;
-       }
-       default:
-               dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
-                       __func__, desc->hw->ctl_f.op);
-       }
+       kmem_cache_free(ioat3_sed_cache, sed);
  }
  
  static bool desc_has_ext(struct ioat_ring_ent *desc)
@@ -577,7 +401,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
                 tx = &desc->txd;
                 if (tx->cookie) {
                         dma_cookie_complete(tx);
-                       ioat3_dma_unmap(ioat, desc, idx + i);
+                       dma_descriptor_unmap(tx);
                         if (tx->callback) {
                                 tx->callback(tx->callback_param);
                                 tx->callback = NULL;
@@ -807,7 +631,7 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
         enum dma_status ret;
  
         ret = dma_cookie_status(c, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         ioat3_cleanup(ioat);
@@ -1129,9 +953,6 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
         u8 op;
         int i, s, idx, num_descs;
  
-       /* this function only handles src_cnt 9 - 16 */
-       BUG_ON(src_cnt < 9);
-
         /* this function is only called with 9-16 sources */
         op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
  
@@ -1159,8 +980,7 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
  
                 descs[0] = (struct ioat_raw_descriptor *) pq;
  
-               desc->sed = ioat3_alloc_sed(device,
-                                           sed_get_pq16_pool_idx(src_cnt));
+               desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
                 if (!desc->sed) {
                         dev_err(to_dev(chan),
                                 "%s: no free sed entries\n", __func__);
@@ -1218,13 +1038,21 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
         return &desc->txd;
  }
  
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+       if (dmaf_p_disabled_continue(flags))
+               return src_cnt + 1;
+       else if (dmaf_continue(flags))
+               return src_cnt + 3;
+       else
+               return src_cnt;
+}
+
  static struct dma_async_tx_descriptor *
  ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
               unsigned int src_cnt, const unsigned char *scf, size_t len,
               unsigned long flags)
  {
-       struct dma_device *dma = chan->device;
-
         /* specify valid address for disabled result */
         if (flags & DMA_PREP_PQ_DISABLE_P)
                 dst[0] = dst[1];
@@ -1244,7 +1072,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
                 single_source_coef[0] = scf[0];
                 single_source_coef[1] = 0;
  
-               return (src_cnt > 8) && (dma->max_pq > 8) ?
+               return src_cnt_flags(src_cnt, flags) > 8 ?
                         __ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
                                                2, single_source_coef, len,
                                                flags) :
@@ -1252,7 +1080,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
                                              single_source_coef, len, flags);
  
         } else {
-               return (src_cnt > 8) && (dma->max_pq > 8) ?
+               return src_cnt_flags(src_cnt, flags) > 8 ?
                         __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
                                                scf, len, flags) :
                         __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
@@ -1265,8 +1093,6 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
                   unsigned int src_cnt, const unsigned char *scf, size_t len,
                   enum sum_check_flags *pqres, unsigned long flags)
  {
-       struct dma_device *dma = chan->device;
-
         /* specify valid address for disabled result */
         if (flags & DMA_PREP_PQ_DISABLE_P)
                 pq[0] = pq[1];
@@ -1278,7 +1104,7 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
          */
         *pqres = 0;
  
-       return (src_cnt > 8) && (dma->max_pq > 8) ?
+       return src_cnt_flags(src_cnt, flags) > 8 ?
                 __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
                                        flags) :
                 __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
@@ -1289,7 +1115,6 @@ static struct dma_async_tx_descriptor *
  ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
                  unsigned int src_cnt, size_t len, unsigned long flags)
  {
-       struct dma_device *dma = chan->device;
         unsigned char scf[src_cnt];
         dma_addr_t pq[2];
  
@@ -1298,7 +1123,7 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
         flags |= DMA_PREP_PQ_DISABLE_Q;
         pq[1] = dst; /* specify valid address for disabled result */
  
-       return (src_cnt > 8) && (dma->max_pq > 8) ?
+       return src_cnt_flags(src_cnt, flags) > 8 ?
                 __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
                                        flags) :
                 __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
@@ -1310,7 +1135,6 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
                      unsigned int src_cnt, size_t len,
                      enum sum_check_flags *result, unsigned long flags)
  {
-       struct dma_device *dma = chan->device;
         unsigned char scf[src_cnt];
         dma_addr_t pq[2];
  
@@ -1324,8 +1148,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
         flags |= DMA_PREP_PQ_DISABLE_Q;
         pq[1] = pq[0]; /* specify valid address for disabled result */
  
-
-       return (src_cnt > 8) && (dma->max_pq > 8) ?
+       return src_cnt_flags(src_cnt, flags) > 8 ?
                 __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
                                        scf, len, flags) :
                 __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
@@ -1444,9 +1267,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                                            DMA_TO_DEVICE);
         tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
                                       IOAT_NUM_SRC_TEST, PAGE_SIZE,
-                                     DMA_PREP_INTERRUPT |
-                                     DMA_COMPL_SKIP_SRC_UNMAP |
-                                     DMA_COMPL_SKIP_DEST_UNMAP);
+                                     DMA_PREP_INTERRUPT);
  
         if (!tx) {
                 dev_err(dev, "Self-test xor prep failed\n");
@@ -1468,7 +1289,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
  
         tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
  
-       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                 dev_err(dev, "Self-test xor timed out\n");
                 err = -ENODEV;
                 goto dma_unmap;
@@ -1507,9 +1328,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                                            DMA_TO_DEVICE);
         tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
                                           IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                         &xor_val_result, DMA_PREP_INTERRUPT |
-                                         DMA_COMPL_SKIP_SRC_UNMAP |
-                                         DMA_COMPL_SKIP_DEST_UNMAP);
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
         if (!tx) {
                 dev_err(dev, "Self-test zero prep failed\n");
                 err = -ENODEV;
@@ -1530,7 +1349,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
  
         tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
  
-       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                 dev_err(dev, "Self-test validate timed out\n");
                 err = -ENODEV;
                 goto dma_unmap;
@@ -1545,6 +1364,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                 goto free_resources;
         }
  
+       memset(page_address(dest), 0, PAGE_SIZE);
+
         /* test for non-zero parity sum */
         op = IOAT_OP_XOR_VAL;
  
@@ -1554,9 +1375,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                                            DMA_TO_DEVICE);
         tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
                                           IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                         &xor_val_result, DMA_PREP_INTERRUPT |
-                                         DMA_COMPL_SKIP_SRC_UNMAP |
-                                         DMA_COMPL_SKIP_DEST_UNMAP);
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
         if (!tx) {
                 dev_err(dev, "Self-test 2nd zero prep failed\n");
                 err = -ENODEV;
@@ -1577,7 +1396,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
  
         tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
  
-       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                 dev_err(dev, "Self-test 2nd validate timed out\n");
                 err = -ENODEV;
                 goto dma_unmap;
@@ -1630,52 +1449,36 @@ static int ioat3_dma_self_test(struct ioatdma_device *device)
  
  static int ioat3_irq_reinit(struct ioatdma_device *device)
  {
-       int msixcnt = device->common.chancnt;
         struct pci_dev *pdev = device->pdev;
-       int i;
-       struct msix_entry *msix;
-       struct ioat_chan_common *chan;
-       int err = 0;
+       int irq = pdev->irq, i;
+
+       if (!is_bwd_ioat(pdev))
+               return 0;
  
         switch (device->irq_mode) {
         case IOAT_MSIX:
+               for (i = 0; i < device->common.chancnt; i++) {
+                       struct msix_entry *msix = &device->msix_entries[i];
+                       struct ioat_chan_common *chan;
  
-               for (i = 0; i < msixcnt; i++) {
-                       msix = &device->msix_entries[i];
                         chan = ioat_chan_by_index(device, i);
                         devm_free_irq(&pdev->dev, msix->vector, chan);
                 }
  
                 pci_disable_msix(pdev);
                 break;
-
-       case IOAT_MSIX_SINGLE:
-               msix = &device->msix_entries[0];
-               chan = ioat_chan_by_index(device, 0);
-               devm_free_irq(&pdev->dev, msix->vector, chan);
-               pci_disable_msix(pdev);
-               break;
-
         case IOAT_MSI:
-               chan = ioat_chan_by_index(device, 0);
-               devm_free_irq(&pdev->dev, pdev->irq, chan);
                 pci_disable_msi(pdev);
-               break;
-
+               /* fall through */
         case IOAT_INTX:
-               chan = ioat_chan_by_index(device, 0);
-               devm_free_irq(&pdev->dev, pdev->irq, chan);
+               devm_free_irq(&pdev->dev, irq, device);
                 break;
-
         default:
                 return 0;
         }
-
         device->irq_mode = IOAT_NOIRQ;
  
-       err = ioat_dma_setup_interrupts(device);
-
-       return err;
+       return ioat_dma_setup_interrupts(device);
  }
  
  static int ioat3_reset_hw(struct ioat_chan_common *chan)
@@ -1718,14 +1521,12 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan)
         }
  
         err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
-       if (err) {
-               dev_err(&pdev->dev, "Failed to reset!\n");
-               return err;
-       }
-
-       if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
+       if (!err)
                 err = ioat3_irq_reinit(device);
  
+       if (err)
+               dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
         return err;
  }
  
@@ -1835,21 +1636,15 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
                 char pool_name[14];
                 int i;
  
-               /* allocate sw descriptor pool for SED */
-               device->sed_pool = kmem_cache_create("ioat_sed",
-                               sizeof(struct ioat_sed_ent), 0, 0, NULL);
-               if (!device->sed_pool)
-                       return -ENOMEM;
-
                 for (i = 0; i < MAX_SED_POOLS; i++) {
                         snprintf(pool_name, 14, "ioat_hw%d_sed", i);
  
                         /* allocate SED DMA pool */
-                       device->sed_hw_pool[i] = dma_pool_create(pool_name,
+                       device->sed_hw_pool[i] = dmam_pool_create(pool_name,
                                         &pdev->dev,
                                         SED_SIZE * (i + 1), 64, 0);
                         if (!device->sed_hw_pool[i])
-                               goto sed_pool_cleanup;
+                               return -ENOMEM;
  
                 }
         }
@@ -1875,28 +1670,4 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
                 device->dca = ioat3_dca_init(pdev, device->reg_base);
  
         return 0;
-
-sed_pool_cleanup:
-       if (device->sed_pool) {
-               int i;
-               kmem_cache_destroy(device->sed_pool);
-
-               for (i = 0; i < MAX_SED_POOLS; i++)
-                       if (device->sed_hw_pool[i])
-                               dma_pool_destroy(device->sed_hw_pool[i]);
-       }
-
-       return -ENOMEM;
-}
-
-void ioat3_dma_remove(struct ioatdma_device *device)
-{
-       if (device->sed_pool) {
-               int i;
-               kmem_cache_destroy(device->sed_pool);
-
-               for (i = 0; i < MAX_SED_POOLS; i++)
-                       if (device->sed_hw_pool[i])
-                               dma_pool_destroy(device->sed_hw_pool[i]);
-       }
  }
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c

index 2c8d560e6334123097627ab59ac166a47cb1f0d6..1d051cd045dbc43b5d4b53d42008660f62765ddd 100644 (file)
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -123,6 +123,7 @@ module_param(ioat_dca_enabled, int, 0644);
  MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
  
  struct kmem_cache *ioat2_cache;
+struct kmem_cache *ioat3_sed_cache;
  
  #define DRV_NAME "ioatdma"
  
@@ -207,9 +208,6 @@ static void ioat_remove(struct pci_dev *pdev)
         if (!device)
                 return;
  
-       if (device->version >= IOAT_VER_3_0)
-               ioat3_dma_remove(device);
-
         dev_err(&pdev->dev, "Removing dma and dca services\n");
         if (device->dca) {
                 unregister_dca_provider(device->dca, &pdev->dev);
@@ -221,7 +219,7 @@ static void ioat_remove(struct pci_dev *pdev)
  
  static int __init ioat_init_module(void)
  {
-       int err;
+       int err = -ENOMEM;
  
         pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
                 DRV_NAME, IOAT_DMA_VERSION);
@@ -231,9 +229,21 @@ static int __init ioat_init_module(void)
         if (!ioat2_cache)
                 return -ENOMEM;
  
+       ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+       if (!ioat3_sed_cache)
+               goto err_ioat2_cache;
+
         err = pci_register_driver(&ioat_pci_driver);
         if (err)
-               kmem_cache_destroy(ioat2_cache);
+               goto err_ioat3_cache;
+
+       return 0;
+
+ err_ioat3_cache:
+       kmem_cache_destroy(ioat3_sed_cache);
+
+ err_ioat2_cache:
+       kmem_cache_destroy(ioat2_cache);
  
         return err;
  }
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c

index dd8b44a56e5d0f7090b8dd65bce87a73a60c90ef..c56137bc3868da7cdc43bf38fab53a30ca410430 100644 (file)
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -61,80 +61,6 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
         }
  }
  
-static void
-iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-       struct dma_async_tx_descriptor *tx = &desc->async_tx;
-       struct iop_adma_desc_slot *unmap = desc->group_head;
-       struct device *dev = &iop_chan->device->pdev->dev;
-       u32 len = unmap->unmap_len;
-       enum dma_ctrl_flags flags = tx->flags;
-       u32 src_cnt;
-       dma_addr_t addr;
-       dma_addr_t dest;
-
-       src_cnt = unmap->unmap_src_cnt;
-       dest = iop_desc_get_dest_addr(unmap, iop_chan);
-       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               enum dma_data_direction dir;
-
-               if (src_cnt > 1) /* is xor? */
-                       dir = DMA_BIDIRECTIONAL;
-               else
-                       dir = DMA_FROM_DEVICE;
-
-               dma_unmap_page(dev, dest, len, dir);
-       }
-
-       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               while (src_cnt--) {
-                       addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
-                       if (addr == dest)
-                               continue;
-                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-               }
-       }
-       desc->group_head = NULL;
-}
-
-static void
-iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-       struct dma_async_tx_descriptor *tx = &desc->async_tx;
-       struct iop_adma_desc_slot *unmap = desc->group_head;
-       struct device *dev = &iop_chan->device->pdev->dev;
-       u32 len = unmap->unmap_len;
-       enum dma_ctrl_flags flags = tx->flags;
-       u32 src_cnt = unmap->unmap_src_cnt;
-       dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
-       dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
-       int i;
-
-       if (tx->flags & DMA_PREP_CONTINUE)
-               src_cnt -= 3;
-
-       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
-               dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
-               dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
-       }
-
-       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               dma_addr_t addr;
-
-               for (i = 0; i < src_cnt; i++) {
-                       addr = iop_desc_get_src_addr(unmap, iop_chan, i);
-                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-               }
-               if (desc->pq_check_result) {
-                       dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
-                       dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
-               }
-       }
-
-       desc->group_head = NULL;
-}
-
-
  static dma_cookie_t
  iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
         struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
@@ -152,15 +78,9 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
                 if (tx->callback)
                         tx->callback(tx->callback_param);
  
-               /* unmap dma addresses
-                * (unmap_single vs unmap_page?)
-                */
-               if (desc->group_head && desc->unmap_len) {
-                       if (iop_desc_is_pq(desc))
-                               iop_desc_unmap_pq(iop_chan, desc);
-                       else
-                               iop_desc_unmap(iop_chan, desc);
-               }
+               dma_descriptor_unmap(tx);
+               if (desc->group_head)
+                       desc->group_head = NULL;
         }
  
         /* run dependent operations */
@@ -591,7 +511,6 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
         if (sw_desc) {
                 grp_start = sw_desc->group_head;
                 iop_desc_init_interrupt(grp_start, iop_chan);
-               grp_start->unmap_len = 0;
                 sw_desc->async_tx.flags = flags;
         }
         spin_unlock_bh(&iop_chan->lock);
@@ -623,8 +542,6 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
                 iop_desc_set_byte_count(grp_start, iop_chan, len);
                 iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
                 iop_desc_set_memcpy_src_addr(grp_start, dma_src);
-               sw_desc->unmap_src_cnt = 1;
-               sw_desc->unmap_len = len;
                 sw_desc->async_tx.flags = flags;
         }
         spin_unlock_bh(&iop_chan->lock);
@@ -657,8 +574,6 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
                 iop_desc_init_xor(grp_start, src_cnt, flags);
                 iop_desc_set_byte_count(grp_start, iop_chan, len);
                 iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
-               sw_desc->unmap_src_cnt = src_cnt;
-               sw_desc->unmap_len = len;
                 sw_desc->async_tx.flags = flags;
                 while (src_cnt--)
                         iop_desc_set_xor_src_addr(grp_start, src_cnt,
@@ -694,8 +609,6 @@ iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
                 grp_start->xor_check_result = result;
                 pr_debug("\t%s: grp_start->xor_check_result: %p\n",
                         __func__, grp_start->xor_check_result);
-               sw_desc->unmap_src_cnt = src_cnt;
-               sw_desc->unmap_len = len;
                 sw_desc->async_tx.flags = flags;
                 while (src_cnt--)
                         iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
@@ -748,8 +661,6 @@ iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
                         dst[0] = dst[1] & 0x7;
  
                 iop_desc_set_pq_addr(g, dst);
-               sw_desc->unmap_src_cnt = src_cnt;
-               sw_desc->unmap_len = len;
                 sw_desc->async_tx.flags = flags;
                 for (i = 0; i < src_cnt; i++)
                         iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
@@ -804,8 +715,6 @@ iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
                 g->pq_check_result = pqres;
                 pr_debug("\t%s: g->pq_check_result: %p\n",
                         __func__, g->pq_check_result);
-               sw_desc->unmap_src_cnt = src_cnt+2;
-               sw_desc->unmap_len = len;
                 sw_desc->async_tx.flags = flags;
                 while (src_cnt--)
                         iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
@@ -864,7 +773,7 @@ static enum dma_status iop_adma_status(struct dma_chan *chan,
         int ret;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         iop_adma_slot_cleanup(iop_chan);
@@ -983,7 +892,7 @@ static int iop_adma_memcpy_self_test(struct iop_adma_device *device)
         msleep(1);
  
         if (iop_adma_status(dma_chan, cookie, NULL) !=
-                       DMA_SUCCESS) {
+                       DMA_COMPLETE) {
                 dev_err(dma_chan->device->dev,
                         "Self-test copy timed out, disabling\n");
                 err = -ENODEV;
@@ -1083,7 +992,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
         msleep(8);
  
         if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                 dev_err(dma_chan->device->dev,
                         "Self-test xor timed out, disabling\n");
                 err = -ENODEV;
@@ -1129,7 +1038,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
         iop_adma_issue_pending(dma_chan);
         msleep(8);
  
-       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                 dev_err(dma_chan->device->dev,
                         "Self-test zero sum timed out, disabling\n");
                 err = -ENODEV;
@@ -1158,7 +1067,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
         iop_adma_issue_pending(dma_chan);
         msleep(8);
  
-       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                 dev_err(dma_chan->device->dev,
                         "Self-test non-zero sum timed out, disabling\n");
                 err = -ENODEV;
@@ -1254,7 +1163,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
         msleep(8);
  
         if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                 dev_err(dev, "Self-test pq timed out, disabling\n");
                 err = -ENODEV;
                 goto free_resources;
@@ -1291,7 +1200,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
         msleep(8);
  
         if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                 dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
                 err = -ENODEV;
                 goto free_resources;
@@ -1323,7 +1232,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
         msleep(8);
  
         if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                 dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
                 err = -ENODEV;
                 goto free_resources;
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c

index cb9c0bc317e89ed6acebba276ed1be2788cc901e..128ca143486d1b59c0106cb8a177b5a2e1b803fd 100644 (file)
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1232,8 +1232,10 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
         desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
         descnew = desc;
  
-       dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
-               irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+       dev_dbg(dev, "IDMAC irq %d, dma %#llx, next dma %#llx, current %d, curbuf %#x\n",
+               irq, (u64)sg_dma_address(*sg),
+               sgnext ? (u64)sg_dma_address(sgnext) : 0,
+               ichan->active_buffer, curbuf);
  
         /* Find the descriptor of sgnext */
         sgnew = idmac_sg_next(ichan, &descnew, *sg);
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c

index a2c330f5f9521302ed04998dfb38eecb60b2f894..e26075408e9b95a365dfd188cad786593604412f 100644 (file)
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -344,7 +344,7 @@ static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
         size_t bytes = 0;
  
         ret = dma_cookie_status(&c->vc.chan, cookie, state);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         spin_lock_irqsave(&c->vc.lock, flags);
@@ -693,7 +693,7 @@ static int k3_dma_probe(struct platform_device *op)
  
         irq = platform_get_irq(op, 0);
         ret = devm_request_irq(&op->dev, irq,
-                       k3_dma_int_handler, IRQF_DISABLED, DRIVER_NAME, d);
+                       k3_dma_int_handler, 0, DRIVER_NAME, d);
         if (ret)
                 return ret;
  
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c

index ff8d7827f8cbe80e2c66d78db1f50ad6fdd91b5a..dcb1e05149a7664c6e65a214d783080d540aaafd 100644 (file)
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -798,8 +798,7 @@ static void dma_do_tasklet(unsigned long data)
                  * move the descriptors to a temporary list so we can drop
                  * the lock during the entire cleanup operation
                  */
-               list_del(&desc->node);
-               list_add(&desc->node, &chain_cleanup);
+               list_move(&desc->node, &chain_cleanup);
  
                 /*
                  * Look for the first list entry which has the ENDIRQEN flag
@@ -863,7 +862,7 @@ static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev,
  
         if (irq) {
                 ret = devm_request_irq(pdev->dev, irq,
-                       mmp_pdma_chan_handler, IRQF_DISABLED, "pdma", phy);
+                       mmp_pdma_chan_handler, 0, "pdma", phy);
                 if (ret) {
                         dev_err(pdev->dev, "channel request irq fail!\n");
                         return ret;
@@ -970,7 +969,7 @@ static int mmp_pdma_probe(struct platform_device *op)
                 /* all chan share one irq, demux inside */
                 irq = platform_get_irq(op, 0);
                 ret = devm_request_irq(pdev->dev, irq,
-                       mmp_pdma_int_handler, IRQF_DISABLED, "pdma", pdev);
+                       mmp_pdma_int_handler, 0, "pdma", pdev);
                 if (ret)
                         return ret;
         }
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c

index d3b6358e5a27037281f6da2efb6dad3e926b9fb1..3ddacc14a7366611ffb089d21c70fb4ba3c896c5 100644 (file)
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -62,6 +62,11 @@
  #define TDCR_BURSTSZ_16B       (0x3 << 6)
  #define TDCR_BURSTSZ_32B       (0x6 << 6)
  #define TDCR_BURSTSZ_64B       (0x7 << 6)
+#define TDCR_BURSTSZ_SQU_1B            (0x5 << 6)
+#define TDCR_BURSTSZ_SQU_2B            (0x6 << 6)
+#define TDCR_BURSTSZ_SQU_4B            (0x0 << 6)
+#define TDCR_BURSTSZ_SQU_8B            (0x1 << 6)
+#define TDCR_BURSTSZ_SQU_16B   (0x3 << 6)
  #define TDCR_BURSTSZ_SQU_32B   (0x7 << 6)
  #define TDCR_BURSTSZ_128B      (0x5 << 6)
  #define TDCR_DSTDIR_MSK                (0x3 << 4)      /* Dst Direction */
@@ -158,7 +163,7 @@ static void mmp_tdma_disable_chan(struct mmp_tdma_chan *tdmac)
         /* disable irq */
         writel(0, tdmac->reg_base + TDIMR);
  
-       tdmac->status = DMA_SUCCESS;
+       tdmac->status = DMA_COMPLETE;
  }
  
  static void mmp_tdma_resume_chan(struct mmp_tdma_chan *tdmac)
@@ -228,8 +233,31 @@ static int mmp_tdma_config_chan(struct mmp_tdma_chan *tdmac)
                         return -EINVAL;
                 }
         } else if (tdmac->type == PXA910_SQU) {
-               tdcr |= TDCR_BURSTSZ_SQU_32B;
                 tdcr |= TDCR_SSPMOD;
+
+               switch (tdmac->burst_sz) {
+               case 1:
+                       tdcr |= TDCR_BURSTSZ_SQU_1B;
+                       break;
+               case 2:
+                       tdcr |= TDCR_BURSTSZ_SQU_2B;
+                       break;
+               case 4:
+                       tdcr |= TDCR_BURSTSZ_SQU_4B;
+                       break;
+               case 8:
+                       tdcr |= TDCR_BURSTSZ_SQU_8B;
+                       break;
+               case 16:
+                       tdcr |= TDCR_BURSTSZ_SQU_16B;
+                       break;
+               case 32:
+                       tdcr |= TDCR_BURSTSZ_SQU_32B;
+                       break;
+               default:
+                       dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+                       return -EINVAL;
+               }
         }
  
         writel(tdcr, tdmac->reg_base + TDCR);
@@ -324,7 +352,7 @@ static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan)
  
         if (tdmac->irq) {
                 ret = devm_request_irq(tdmac->dev, tdmac->irq,
-                       mmp_tdma_chan_handler, IRQF_DISABLED, "tdma", tdmac);
+                       mmp_tdma_chan_handler, 0, "tdma", tdmac);
                 if (ret)
                         return ret;
         }
@@ -365,7 +393,7 @@ static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
         int num_periods = buf_len / period_len;
         int i = 0, buf = 0;
  
-       if (tdmac->status != DMA_SUCCESS)
+       if (tdmac->status != DMA_COMPLETE)
                 return NULL;
  
         if (period_len > TDMA_MAX_XFER_BYTES) {
@@ -499,7 +527,7 @@ static int mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
         tdmac->idx         = idx;
         tdmac->type        = type;
         tdmac->reg_base    = (unsigned long)tdev->base + idx * 4;
-       tdmac->status = DMA_SUCCESS;
+       tdmac->status = DMA_COMPLETE;
         tdev->tdmac[tdmac->idx] = tdmac;
         tasklet_init(&tdmac->tasklet, dma_do_tasklet, (unsigned long)tdmac);
  
@@ -554,7 +582,7 @@ static int mmp_tdma_probe(struct platform_device *pdev)
         if (irq_num != chan_num) {
                 irq = platform_get_irq(pdev, 0);
                 ret = devm_request_irq(&pdev->dev, irq,
-                       mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
+                       mmp_tdma_int_handler, 0, "tdma", tdev);
                 if (ret)
                         return ret;
         }
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c

index 536dcb8ba5fdfe69ed5f726fc6b5897f00266698..7807f0ef4e209c25ad90db9d32f7bc13955391cf 100644 (file)
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -60,14 +60,6 @@ static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
         return hw_desc->phy_dest_addr;
  }
  
-static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
-                               int src_idx)
-{
-       struct mv_xor_desc *hw_desc = desc->hw_desc;
-       return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)];
-}
-
-
  static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
                                    u32 byte_count)
  {
@@ -278,42 +270,9 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
                         desc->async_tx.callback(
                                 desc->async_tx.callback_param);
  
-               /* unmap dma addresses
-                * (unmap_single vs unmap_page?)
-                */
-               if (desc->group_head && desc->unmap_len) {
-                       struct mv_xor_desc_slot *unmap = desc->group_head;
-                       struct device *dev = mv_chan_to_devp(mv_chan);
-                       u32 len = unmap->unmap_len;
-                       enum dma_ctrl_flags flags = desc->async_tx.flags;
-                       u32 src_cnt;
-                       dma_addr_t addr;
-                       dma_addr_t dest;
-
-                       src_cnt = unmap->unmap_src_cnt;
-                       dest = mv_desc_get_dest_addr(unmap);
-                       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                               enum dma_data_direction dir;
-
-                               if (src_cnt > 1) /* is xor ? */
-                                       dir = DMA_BIDIRECTIONAL;
-                               else
-                                       dir = DMA_FROM_DEVICE;
-                               dma_unmap_page(dev, dest, len, dir);
-                       }
-
-                       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                               while (src_cnt--) {
-                                       addr = mv_desc_get_src_addr(unmap,
-                                                                   src_cnt);
-                                       if (addr == dest)
-                                               continue;
-                                       dma_unmap_page(dev, addr, len,
-                                                      DMA_TO_DEVICE);
-                               }
-                       }
+               dma_descriptor_unmap(&desc->async_tx);
+               if (desc->group_head)
                         desc->group_head = NULL;
-               }
         }
  
         /* run dependent operations */
@@ -749,7 +708,7 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
         enum dma_status ret;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS) {
+       if (ret == DMA_COMPLETE) {
                 mv_xor_clean_completed_slots(mv_chan);
                 return ret;
         }
@@ -874,7 +833,7 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
         msleep(1);
  
         if (mv_xor_status(dma_chan, cookie, NULL) !=
-           DMA_SUCCESS) {
+           DMA_COMPLETE) {
                 dev_err(dma_chan->device->dev,
                         "Self-test copy timed out, disabling\n");
                 err = -ENODEV;
@@ -968,7 +927,7 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
         msleep(8);
  
         if (mv_xor_status(dma_chan, cookie, NULL) !=
-           DMA_SUCCESS) {
+           DMA_COMPLETE) {
                 dev_err(dma_chan->device->dev,
                         "Self-test xor timed out, disabling\n");
                 err = -ENODEV;
@@ -1076,10 +1035,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
         }
  
         mv_chan->mmr_base = xordev->xor_base;
-       if (!mv_chan->mmr_base) {
-               ret = -ENOMEM;
-               goto err_free_dma;
-       }
+       mv_chan->mmr_high_base = xordev->xor_high_base;
         tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
                      mv_chan);
  
@@ -1138,7 +1094,7 @@ static void
  mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
                          const struct mbus_dram_target_info *dram)
  {
-       void __iomem *base = xordev->xor_base;
+       void __iomem *base = xordev->xor_high_base;
         u32 win_enable = 0;
         int i;
  
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h

index 06b067f24c9b33f7592d65a4ece98bf99c3cb776..d0749229c875187a454c498964155c37e765d314 100644 (file)
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -34,13 +34,13 @@
  #define XOR_OPERATION_MODE_MEMCPY      2
  #define XOR_DESCRIPTOR_SWAP            BIT(14)
  
-#define XOR_CURR_DESC(chan)    (chan->mmr_base + 0x210 + (chan->idx * 4))
-#define XOR_NEXT_DESC(chan)    (chan->mmr_base + 0x200 + (chan->idx * 4))
-#define XOR_BYTE_COUNT(chan)   (chan->mmr_base + 0x220 + (chan->idx * 4))
-#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
-#define XOR_BLOCK_SIZE(chan)   (chan->mmr_base + 0x2C0 + (chan->idx * 4))
-#define XOR_INIT_VALUE_LOW(chan)       (chan->mmr_base + 0x2E0)
-#define XOR_INIT_VALUE_HIGH(chan)      (chan->mmr_base + 0x2E4)
+#define XOR_CURR_DESC(chan)    (chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)    (chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)   (chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)   (chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)       (chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan)      (chan->mmr_high_base + 0xE4)
  
  #define XOR_CONFIG(chan)       (chan->mmr_base + 0x10 + (chan->idx * 4))
  #define XOR_ACTIVATION(chan)   (chan->mmr_base + 0x20 + (chan->idx * 4))
@@ -50,11 +50,11 @@
  #define XOR_ERROR_ADDR(chan)   (chan->mmr_base + 0x60)
  #define XOR_INTR_MASK_VALUE    0x3F5
  
-#define WINDOW_BASE(w)         (0x250 + ((w) << 2))
-#define WINDOW_SIZE(w)         (0x270 + ((w) << 2))
-#define WINDOW_REMAP_HIGH(w)   (0x290 + ((w) << 2))
-#define WINDOW_BAR_ENABLE(chan)        (0x240 + ((chan) << 2))
-#define WINDOW_OVERRIDE_CTRL(chan)     (0x2A0 + ((chan) << 2))
+#define WINDOW_BASE(w)         (0x50 + ((w) << 2))
+#define WINDOW_SIZE(w)         (0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)   (0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)        (0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan)     (0xA0 + ((chan) << 2))
  
  struct mv_xor_device {
         void __iomem         *xor_base;
@@ -82,6 +82,7 @@ struct mv_xor_chan {
         int                     pending;
         spinlock_t              lock; /* protects the descriptor slot pool */
         void __iomem            *mmr_base;
+       void __iomem            *mmr_high_base;
         unsigned int            idx;
         int                     irq;
         enum dma_transaction_type       current_type;
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c

index ccd13df841db790ff9eabc9cbc9df79f5f8bb9af..ead491346da70183a3a235687de975f85f89e677 100644 (file)
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -27,6 +27,7 @@
  #include <linux/of.h>
  #include <linux/of_device.h>
  #include <linux/of_dma.h>
+#include <linux/list.h>
  
  #include <asm/irq.h>
  
@@ -57,6 +58,9 @@
         (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70)
  #define HW_APBHX_CHn_SEMA(d, n) \
         (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70)
+#define HW_APBHX_CHn_BAR(d, n) \
+       (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70)
+#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70)
  
  /*
   * ccw bits definitions
@@ -115,7 +119,9 @@ struct mxs_dma_chan {
         int                             desc_count;
         enum dma_status                 status;
         unsigned int                    flags;
+       bool                            reset;
  #define MXS_DMA_SG_LOOP                        (1 << 0)
+#define MXS_DMA_USE_SEMAPHORE          (1 << 1)
  };
  
  #define MXS_DMA_CHANNELS               16
@@ -201,12 +207,47 @@ static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
         struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
         int chan_id = mxs_chan->chan.chan_id;
  
-       if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+       /*
+        * mxs dma channel resets can cause a channel stall. To recover from a
+        * channel stall, we have to reset the whole DMA engine. To avoid this,
+        * we use cyclic DMA with semaphores, that are enhanced in
+        * mxs_dma_int_handler. To reset the channel, we can simply stop writing
+        * into the semaphore counter.
+        */
+       if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+                       mxs_chan->flags & MXS_DMA_SG_LOOP) {
+               mxs_chan->reset = true;
+       } else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) {
                 writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
                         mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
-       else
+       } else {
+               unsigned long elapsed = 0;
+               const unsigned long max_wait = 50000; /* 50ms */
+               void __iomem *reg_dbg1 = mxs_dma->base +
+                               HW_APBX_CHn_DEBUG1(mxs_dma, chan_id);
+
+               /*
+                * On i.MX28 APBX, the DMA channel can stop working if we reset
+                * the channel while it is in READ_FLUSH (0x08) state.
+                * We wait here until we leave the state. Then we trigger the
+                * reset. Waiting a maximum of 50ms, the kernel shouldn't crash
+                * because of this.
+                */
+               while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) {
+                       udelay(100);
+                       elapsed += 100;
+               }
+
+               if (elapsed >= max_wait)
+                       dev_err(&mxs_chan->mxs_dma->pdev->dev,
+                                       "Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n",
+                                       chan_id);
+
                 writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
                         mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+       }
+
+       mxs_chan->status = DMA_COMPLETE;
  }
  
  static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
@@ -219,12 +260,21 @@ static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
                 mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id));
  
         /* write 1 to SEMA to kick off the channel */
-       writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+       if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+                       mxs_chan->flags & MXS_DMA_SG_LOOP) {
+               /* A cyclic DMA consists of at least 2 segments, so initialize
+                * the semaphore with 2 so we have enough time to add 1 to the
+                * semaphore if we need to */
+               writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+       } else {
+               writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+       }
+       mxs_chan->reset = false;
  }
  
  static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
  {
-       mxs_chan->status = DMA_SUCCESS;
+       mxs_chan->status = DMA_COMPLETE;
  }
  
  static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
@@ -272,58 +322,88 @@ static void mxs_dma_tasklet(unsigned long data)
                 mxs_chan->desc.callback(mxs_chan->desc.callback_param);
  }
  
+static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq)
+{
+       int i;
+
+       for (i = 0; i != mxs_dma->nr_channels; ++i)
+               if (mxs_dma->mxs_chans[i].chan_irq == irq)
+                       return i;
+
+       return -EINVAL;
+}
+
  static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
  {
         struct mxs_dma_engine *mxs_dma = dev_id;
-       u32 stat1, stat2;
+       struct mxs_dma_chan *mxs_chan;
+       u32 completed;
+       u32 err;
+       int chan = mxs_dma_irq_to_chan(mxs_dma, irq);
+
+       if (chan < 0)
+               return IRQ_NONE;
  
         /* completion status */
-       stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
-       stat1 &= MXS_DMA_CHANNELS_MASK;
-       writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
+       completed = readl(mxs_dma->base + HW_APBHX_CTRL1);
+       completed = (completed >> chan) & 0x1;
+
+       /* Clear interrupt */
+       writel((1 << chan),
+                       mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
  
         /* error status */
-       stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
-       writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
+       err = readl(mxs_dma->base + HW_APBHX_CTRL2);
+       err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan);
+
+       /*
+        * error status bit is in the upper 16 bits, error irq bit in the lower
+        * 16 bits. We transform it into a simpler error code:
+        * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR
+        */
+       err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan);
+
+       /* Clear error irq */
+       writel((1 << chan),
+                       mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
  
         /*
          * When both completion and error of termination bits set at the
          * same time, we do not take it as an error.  IOW, it only becomes
-        * an error we need to handle here in case of either it's (1) a bus
-        * error or (2) a termination error with no completion.
+        * an error we need to handle here in case of either it's a bus
+        * error or a termination error with no completion. 0x01 is termination
+        * error, so we can subtract err & completed to get the real error case.
          */
-       stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
-               (~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
-
-       /* combine error and completion status for checking */
-       stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
-       while (stat1) {
-               int channel = fls(stat1) - 1;
-               struct mxs_dma_chan *mxs_chan =
-                       &mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
-
-               if (channel >= MXS_DMA_CHANNELS) {
-                       dev_dbg(mxs_dma->dma_device.dev,
-                               "%s: error in channel %d\n", __func__,
-                               channel - MXS_DMA_CHANNELS);
-                       mxs_chan->status = DMA_ERROR;
-                       mxs_dma_reset_chan(mxs_chan);
-               } else {
-                       if (mxs_chan->flags & MXS_DMA_SG_LOOP)
-                               mxs_chan->status = DMA_IN_PROGRESS;
-                       else
-                               mxs_chan->status = DMA_SUCCESS;
-               }
+       err -= err & completed;
  
-               stat1 &= ~(1 << channel);
+       mxs_chan = &mxs_dma->mxs_chans[chan];
  
-               if (mxs_chan->status == DMA_SUCCESS)
-                       dma_cookie_complete(&mxs_chan->desc);
+       if (err) {
+               dev_dbg(mxs_dma->dma_device.dev,
+                       "%s: error in channel %d\n", __func__,
+                       chan);
+               mxs_chan->status = DMA_ERROR;
+               mxs_dma_reset_chan(mxs_chan);
+       } else if (mxs_chan->status != DMA_COMPLETE) {
+               if (mxs_chan->flags & MXS_DMA_SG_LOOP) {
+                       mxs_chan->status = DMA_IN_PROGRESS;
+                       if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE)
+                               writel(1, mxs_dma->base +
+                                       HW_APBHX_CHn_SEMA(mxs_dma, chan));
+               } else {
+                       mxs_chan->status = DMA_COMPLETE;
+               }
+       }
  
-               /* schedule tasklet on this channel */
-               tasklet_schedule(&mxs_chan->tasklet);
+       if (mxs_chan->status == DMA_COMPLETE) {
+               if (mxs_chan->reset)
+                       return IRQ_HANDLED;
+               dma_cookie_complete(&mxs_chan->desc);
         }
  
+       /* schedule tasklet on this channel */
+       tasklet_schedule(&mxs_chan->tasklet);
+
         return IRQ_HANDLED;
  }
  
@@ -523,6 +603,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
  
         mxs_chan->status = DMA_IN_PROGRESS;
         mxs_chan->flags |= MXS_DMA_SG_LOOP;
+       mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE;
  
         if (num_periods > NUM_CCW) {
                 dev_err(mxs_dma->dma_device.dev,
@@ -554,6 +635,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
                 ccw->bits |= CCW_IRQ;
                 ccw->bits |= CCW_HALT_ON_TERM;
                 ccw->bits |= CCW_TERM_FLUSH;
+               ccw->bits |= CCW_DEC_SEM;
                 ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
                                 MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
  
@@ -599,8 +681,24 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
                         dma_cookie_t cookie, struct dma_tx_state *txstate)
  {
         struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+       struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+       u32 residue = 0;
+
+       if (mxs_chan->status == DMA_IN_PROGRESS &&
+                       mxs_chan->flags & MXS_DMA_SG_LOOP) {
+               struct mxs_dma_ccw *last_ccw;
+               u32 bar;
+
+               last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1];
+               residue = last_ccw->xfer_bytes + last_ccw->bufaddr;
+
+               bar = readl(mxs_dma->base +
+                               HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id));
+               residue -= bar;
+       }
  
-       dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
+       dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+                       residue);
  
         return mxs_chan->status;
  }
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c

index ec3fc4fd9160e8aeddf16054cd405f35b43bfef7..2f66cf4e54fe367754378c3c8be213fe20bb8f64 100644 (file)
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -248,7 +248,7 @@ static enum dma_status omap_dma_tx_status(struct dma_chan *chan,
         unsigned long flags;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS || !txstate)
+       if (ret == DMA_COMPLETE || !txstate)
                 return ret;
  
         spin_lock_irqsave(&c->vc.lock, flags);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c

index df8b10fd1726ed466d2b05e814a8feb6c711dfe3..cdf0483b8f2dfb8f746b786fd6bf84b0d5f6657b 100644 (file)
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2268,6 +2268,8 @@ static void pl330_tasklet(unsigned long data)
                         list_move_tail(&desc->node, &pch->dmac->desc_pool);
                 }
  
+               dma_descriptor_unmap(&desc->txd);
+
                 if (callback) {
                         spin_unlock_irqrestore(&pch->lock, flags);
                         callback(callback_param);
@@ -2314,7 +2316,7 @@ bool pl330_filter(struct dma_chan *chan, void *param)
                 return false;
  
         peri_id = chan->private;
-       return *peri_id == (unsigned)param;
+       return *peri_id == (unsigned long)param;
  }
  EXPORT_SYMBOL(pl330_filter);
  
@@ -2926,16 +2928,23 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
  
         amba_set_drvdata(adev, pdmac);
  
-       irq = adev->irq[0];
-       ret = request_irq(irq, pl330_irq_handler, 0,
-                       dev_name(&adev->dev), pi);
-       if (ret)
-               return ret;
+       for (i = 0; i < AMBA_NR_IRQS; i++) {
+               irq = adev->irq[i];
+               if (irq) {
+                       ret = devm_request_irq(&adev->dev, irq,
+                                              pl330_irq_handler, 0,
+                                              dev_name(&adev->dev), pi);
+                       if (ret)
+                               return ret;
+               } else {
+                       break;
+               }
+       }
  
         pi->pcfg.periph_id = adev->periphid;
         ret = pl330_add(pi);
         if (ret)
-               goto probe_err1;
+               return ret;
  
         INIT_LIST_HEAD(&pdmac->desc_pool);
         spin_lock_init(&pdmac->pool_lock);
@@ -3033,8 +3042,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
  
         return 0;
  probe_err3:
-       amba_set_drvdata(adev, NULL);
-
         /* Idle the DMAC */
         list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
                         chan.device_node) {
@@ -3048,8 +3055,6 @@ probe_err3:
         }
  probe_err2:
         pl330_del(pi);
-probe_err1:
-       free_irq(irq, pi);
  
         return ret;
  }
@@ -3059,7 +3064,6 @@ static int pl330_remove(struct amba_device *adev)
         struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
         struct dma_pl330_chan *pch, *_p;
         struct pl330_info *pi;
-       int irq;
  
         if (!pdmac)
                 return 0;
@@ -3068,7 +3072,6 @@ static int pl330_remove(struct amba_device *adev)
                 of_dma_controller_free(adev->dev.of_node);
  
         dma_async_device_unregister(&pdmac->ddma);
-       amba_set_drvdata(adev, NULL);
  
         /* Idle the DMAC */
         list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
@@ -3086,9 +3089,6 @@ static int pl330_remove(struct amba_device *adev)
  
         pl330_del(pi);
  
-       irq = adev->irq[0];
-       free_irq(irq, pi);
-
         return 0;
  }
  
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c

index e24b5ef486b50819957d02c03f475561bccac4df..8da48c6b2a38ccd76f3e127d53e4be5ab7be425c 100644 (file)
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -803,218 +803,6 @@ static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
         local_irq_restore(flags);
  }
  
-/**
- * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
- */
-static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan, int src_idx)
-{
-       struct dma_cdb *dma_hw_desc;
-       struct xor_cb *xor_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               dma_hw_desc = desc->hw_desc;
-               /* May have 0, 1, 2, or 3 sources */
-               switch (dma_hw_desc->opc) {
-               case DMA_CDB_OPC_NO_OP:
-               case DMA_CDB_OPC_DFILL128:
-                       return 0;
-               case DMA_CDB_OPC_DCHECK128:
-                       if (unlikely(src_idx)) {
-                               printk(KERN_ERR "%s: try to get %d source for"
-                                   " DCHECK128\n", __func__, src_idx);
-                               BUG();
-                       }
-                       return le32_to_cpu(dma_hw_desc->sg1l);
-               case DMA_CDB_OPC_MULTICAST:
-               case DMA_CDB_OPC_MV_SG1_SG2:
-                       if (unlikely(src_idx > 2)) {
-                               printk(KERN_ERR "%s: try to get %d source from"
-                                   " DMA descr\n", __func__, src_idx);
-                               BUG();
-                       }
-                       if (src_idx) {
-                               if (le32_to_cpu(dma_hw_desc->sg1u) &
-                                   DMA_CUED_XOR_WIN_MSK) {
-                                       u8 region;
-
-                                       if (src_idx == 1)
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       desc->unmap_len;
-
-                                       region = (le32_to_cpu(
-                                           dma_hw_desc->sg1u)) >>
-                                               DMA_CUED_REGION_OFF;
-
-                                       region &= DMA_CUED_REGION_MSK;
-                                       switch (region) {
-                                       case DMA_RXOR123:
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       (desc->unmap_len << 1);
-                                       case DMA_RXOR124:
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       (desc->unmap_len * 3);
-                                       case DMA_RXOR125:
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       (desc->unmap_len << 2);
-                                       default:
-                                               printk(KERN_ERR
-                                                   "%s: try to"
-                                                   " get src3 for region %02x"
-                                                   "PPC440SPE_DESC_RXOR12?\n",
-                                                   __func__, region);
-                                               BUG();
-                                       }
-                               } else {
-                                       printk(KERN_ERR
-                                               "%s: try to get %d"
-                                               " source for non-cued descr\n",
-                                               __func__, src_idx);
-                                       BUG();
-                               }
-                       }
-                       return le32_to_cpu(dma_hw_desc->sg1l);
-               default:
-                       printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-                               __func__, dma_hw_desc->opc);
-                       BUG();
-               }
-               return le32_to_cpu(dma_hw_desc->sg1l);
-       case PPC440SPE_XOR_ID:
-               /* May have up to 16 sources */
-               xor_hw_desc = desc->hw_desc;
-               return xor_hw_desc->ops[src_idx].l;
-       }
-       return 0;
-}
-
-/**
- * ppc440spe_desc_get_dest_addr - extract the destination address from the
- * descriptor
- */
-static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan, int idx)
-{
-       struct dma_cdb *dma_hw_desc;
-       struct xor_cb *xor_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               dma_hw_desc = desc->hw_desc;
-
-               if (likely(!idx))
-                       return le32_to_cpu(dma_hw_desc->sg2l);
-               return le32_to_cpu(dma_hw_desc->sg3l);
-       case PPC440SPE_XOR_ID:
-               xor_hw_desc = desc->hw_desc;
-               return xor_hw_desc->cbtal;
-       }
-       return 0;
-}
-
-/**
- * ppc440spe_desc_get_src_num - extract the number of source addresses from
- * the descriptor
- */
-static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan)
-{
-       struct dma_cdb *dma_hw_desc;
-       struct xor_cb *xor_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               dma_hw_desc = desc->hw_desc;
-
-               switch (dma_hw_desc->opc) {
-               case DMA_CDB_OPC_NO_OP:
-               case DMA_CDB_OPC_DFILL128:
-                       return 0;
-               case DMA_CDB_OPC_DCHECK128:
-                       return 1;
-               case DMA_CDB_OPC_MV_SG1_SG2:
-               case DMA_CDB_OPC_MULTICAST:
-                       /*
-                        * Only for RXOR operations we have more than
-                        * one source
-                        */
-                       if (le32_to_cpu(dma_hw_desc->sg1u) &
-                           DMA_CUED_XOR_WIN_MSK) {
-                               /* RXOR op, there are 2 or 3 sources */
-                               if (((le32_to_cpu(dma_hw_desc->sg1u) >>
-                                   DMA_CUED_REGION_OFF) &
-                                     DMA_CUED_REGION_MSK) == DMA_RXOR12) {
-                                       /* RXOR 1-2 */
-                                       return 2;
-                               } else {
-                                       /* RXOR 1-2-3/1-2-4/1-2-5 */
-                                       return 3;
-                               }
-                       }
-                       return 1;
-               default:
-                       printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-                               __func__, dma_hw_desc->opc);
-                       BUG();
-               }
-       case PPC440SPE_XOR_ID:
-               /* up to 16 sources */
-               xor_hw_desc = desc->hw_desc;
-               return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
-/**
- * ppc440spe_desc_get_dst_num - get the number of destination addresses in
- * this descriptor
- */
-static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan)
-{
-       struct dma_cdb *dma_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               /* May be 1 or 2 destinations */
-               dma_hw_desc = desc->hw_desc;
-               switch (dma_hw_desc->opc) {
-               case DMA_CDB_OPC_NO_OP:
-               case DMA_CDB_OPC_DCHECK128:
-                       return 0;
-               case DMA_CDB_OPC_MV_SG1_SG2:
-               case DMA_CDB_OPC_DFILL128:
-                       return 1;
-               case DMA_CDB_OPC_MULTICAST:
-                       if (desc->dst_cnt == 2)
-                               return 2;
-                       else
-                               return 1;
-               default:
-                       printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-                               __func__, dma_hw_desc->opc);
-                       BUG();
-               }
-       case PPC440SPE_XOR_ID:
-               /* Always only 1 destination */
-               return 1;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
  /**
   * ppc440spe_desc_get_link - get the address of the descriptor that
   * follows this one
@@ -1707,43 +1495,6 @@ static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
         }
  }
  
-static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
-                                struct ppc440spe_adma_desc_slot *desc)
-{
-       u32 src_cnt, dst_cnt;
-       dma_addr_t addr;
-
-       /*
-        * get the number of sources & destination
-        * included in this descriptor and unmap
-        * them all
-        */
-       src_cnt = ppc440spe_desc_get_src_num(desc, chan);
-       dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
-
-       /* unmap destinations */
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               while (dst_cnt--) {
-                       addr = ppc440spe_desc_get_dest_addr(
-                               desc, chan, dst_cnt);
-                       dma_unmap_page(chan->device->dev,
-                                       addr, desc->unmap_len,
-                                       DMA_FROM_DEVICE);
-               }
-       }
-
-       /* unmap sources */
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               while (src_cnt--) {
-                       addr = ppc440spe_desc_get_src_addr(
-                               desc, chan, src_cnt);
-                       dma_unmap_page(chan->device->dev,
-                                       addr, desc->unmap_len,
-                                       DMA_TO_DEVICE);
-               }
-       }
-}
-
  /**
   * ppc440spe_adma_run_tx_complete_actions - call functions to be called
   * upon completion
@@ -1767,26 +1518,7 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
                         desc->async_tx.callback(
                                 desc->async_tx.callback_param);
  
-               /* unmap dma addresses
-                * (unmap_single vs unmap_page?)
-                *
-                * actually, ppc's dma_unmap_page() functions are empty, so
-                * the following code is just for the sake of completeness
-                */
-               if (chan && chan->needs_unmap && desc->group_head &&
-                    desc->unmap_len) {
-                       struct ppc440spe_adma_desc_slot *unmap =
-                                                       desc->group_head;
-                       /* assume 1 slot per op always */
-                       u32 slot_count = unmap->slot_cnt;
-
-                       /* Run through the group list and unmap addresses */
-                       for (i = 0; i < slot_count; i++) {
-                               BUG_ON(!unmap);
-                               ppc440spe_adma_unmap(chan, unmap);
-                               unmap = unmap->hw_next;
-                       }
-               }
+               dma_descriptor_unmap(&desc->async_tx);
         }
  
         /* run dependent operations */
@@ -3893,7 +3625,7 @@ static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
  
         ppc440spe_chan = to_ppc440spe_adma_chan(chan);
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         ppc440spe_adma_slot_cleanup(ppc440spe_chan);
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c

index 461a91ab70bb4feca82cd27c1582f81c2905bcde..ab26d46bbe1598434625979abeb488d5199992d9 100644 (file)
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -436,7 +436,7 @@ static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan,
         enum dma_status ret;
  
         ret = dma_cookie_status(&c->vc.chan, cookie, state);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         if (!state)
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c

index d94ab592cc1bb21b92c851debe381609b599fa48..2e7b394def8058e4d1216ad8a07c785db4d87048 100644 (file)
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -724,7 +724,7 @@ static enum dma_status shdma_tx_status(struct dma_chan *chan,
          * If we don't find cookie on the queue, it has been aborted and we have
          * to report error
          */
-       if (status != DMA_SUCCESS) {
+       if (status != DMA_COMPLETE) {
                 struct shdma_desc *sdesc;
                 status = DMA_ERROR;
                 list_for_each_entry(sdesc, &schan->ld_queue, node)
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c

index 1069e8869f20762928ecbbe509b2ed294f82ae35..0d765c0e21ec9de8cb4e25ff66d5fd53ee78d95d 100644 (file)
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -685,7 +685,7 @@ MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
  static int sh_dmae_probe(struct platform_device *pdev)
  {
         const struct sh_dmae_pdata *pdata;
-       unsigned long irqflags = IRQF_DISABLED,
+       unsigned long irqflags = 0,
                 chan_flag[SH_DMAE_MAX_CHANNELS] = {};
         int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
         int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
@@ -838,7 +838,7 @@ static int sh_dmae_probe(struct platform_device *pdev)
                                     IORESOURCE_IRQ_SHAREABLE)
                                         chan_flag[irq_cnt] = IRQF_SHARED;
                                 else
-                                       chan_flag[irq_cnt] = IRQF_DISABLED;
+                                       chan_flag[irq_cnt] = 0;
                                 dev_dbg(&pdev->dev,
                                         "Found IRQ %d for channel %d\n",
                                         i, irq_cnt);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c

index 82d2b97ad942f96f2064c0ac58b11141fb85b54c..b8c031b7de4e045d22cfa0e7d495380bc94ed75f 100644 (file)
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -14,6 +14,7 @@
  #include <linux/platform_device.h>
  #include <linux/clk.h>
  #include <linux/delay.h>
+#include <linux/log2.h>
  #include <linux/pm.h>
  #include <linux/pm_runtime.h>
  #include <linux/err.h>
@@ -2626,7 +2627,7 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
         }
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS)
+       if (ret != DMA_COMPLETE)
                 dma_set_residue(txstate, stedma40_residue(chan));
  
         if (d40_is_paused(d40c))
@@ -2796,8 +2797,8 @@ static int d40_set_runtime_config(struct dma_chan *chan,
             src_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
             dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
             dst_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
-           ((src_addr_width > 1) && (src_addr_width & 1)) ||
-           ((dst_addr_width > 1) && (dst_addr_width & 1)))
+           !is_power_of_2(src_addr_width) ||
+           !is_power_of_2(dst_addr_width))
                 return -EINVAL;
  
         cfg->src_info.data_width = src_addr_width;
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c

index 5d4986e5f5fa6b21423084b688bd0a8afbba0c2e..73654e33f13b98c66ebce532646056ecdce79c61 100644 (file)
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -570,7 +570,7 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
  
         list_del(&sgreq->node);
         if (sgreq->last_sg) {
-               dma_desc->dma_status = DMA_SUCCESS;
+               dma_desc->dma_status = DMA_COMPLETE;
                 dma_cookie_complete(&dma_desc->txd);
                 if (!dma_desc->cb_count)
                         list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
@@ -768,7 +768,7 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
         unsigned int residual;
  
         ret = dma_cookie_status(dc, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                 return ret;
  
         spin_lock_irqsave(&tdc->lock, flags);
@@ -1018,7 +1018,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
         return &dma_desc->txd;
  }
  
-struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
+static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
         struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
         size_t period_len, enum dma_transfer_direction direction,
         unsigned long flags, void *context)
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c

index 28af214fce049db85fc02fb903a748fdbef6e0a1..4506a7b4f972319c761ff4ee459a0a38bb7139dd 100644 (file)
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -154,38 +154,6 @@ static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
         return done;
  }
  
-static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
-       bool single)
-{
-       dma_addr_t addr;
-       int len;
-
-       addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) |
-               dma_desc[4];
-
-       len = (dma_desc[3] << 8) | dma_desc[2];
-
-       if (single)
-               dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
-                       DMA_TO_DEVICE);
-       else
-               dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
-                       DMA_TO_DEVICE);
-}
-
-static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
-{
-       struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan,
-               struct timb_dma_chan, chan);
-       u8 *descs;
-
-       for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) {
-               __td_unmap_desc(td_chan, descs, single);
-               if (descs[0] & 0x02)
-                       break;
-       }
-}
-
  static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
         struct scatterlist *sg, bool last)
  {
@@ -293,10 +261,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
  
         list_move(&td_desc->desc_node, &td_chan->free_list);
  
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               __td_unmap_descs(td_desc,
-                       txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
-
+       dma_descriptor_unmap(txd);
         /*
          * The API requires that no submissions are done from a
          * callback, so we don't need to drop the lock here
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c

index 71e8e775189e0df5568d474ea00157a2675f9260..bae6c29f5502ab951f7926bdf621977703bc96b1 100644 (file)
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -419,30 +419,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
         list_splice_init(&desc->tx_list, &dc->free_list);
         list_move(&desc->desc_node, &dc->free_list);
  
-       if (!ds) {
-               dma_addr_t dmaaddr;
-               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       dmaaddr = is_dmac64(dc) ?
-                               desc->hwdesc.DAR : desc->hwdesc32.DAR;
-                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                               dma_unmap_single(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_FROM_DEVICE);
-                       else
-                               dma_unmap_page(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_FROM_DEVICE);
-               }
-               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       dmaaddr = is_dmac64(dc) ?
-                               desc->hwdesc.SAR : desc->hwdesc32.SAR;
-                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                               dma_unmap_single(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_TO_DEVICE);
-                       else
-                               dma_unmap_page(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_TO_DEVICE);
-               }
-       }
-
+       dma_descriptor_unmap(txd);
         /*
          * The API requires that no submissions are done from a
          * callback, so we don't need to drop the lock here
@@ -962,8 +939,8 @@ txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
         enum dma_status ret;
  
         ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
-               return DMA_SUCCESS;
+       if (ret == DMA_COMPLETE)
+               return DMA_COMPLETE;
  
         spin_lock_bh(&dc->lock);
         txx9dmac_scan_descriptors(dc);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c

index 7dd4461502940952ebf6323dbb04e1af7e6a5230..4e10b10d3ddde47332f525285e1c549bd9a2c22f 100644 (file)
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -13,6 +13,7 @@
  #include <linux/acpi_gpio.h>
  #include <linux/idr.h>
  #include <linux/slab.h>
+#include <linux/acpi.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/gpio.h>
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c

index 43959edd4291193a538449c1d4f75bba8d30ea8a..dfff0907f70e53643093c3f6aa8d9258ab8dc5d5 100644 (file)
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -196,7 +196,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
         acpi_handle dhandle;
         int ret;
  
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
         if (!dhandle)
                 return false;
  
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c

index 1b2f41c3f19122c731356aff91f3b490959bde60..6d69a9bad86545c6a8cfc8e8ff86480d462c2132 100644 (file)
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -638,7 +638,7 @@ static void intel_didl_outputs(struct drm_device *dev)
         u32 temp;
         int i = 0;
  
-       handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+       handle = ACPI_HANDLE(&dev->pdev->dev);
         if (!handle || acpi_bus_get_device(handle, &acpi_dev))
                 return;
  
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c

index e286e132c7e7d744d88d99522cf2ce733b4899a6..129120473f6c67bb0d90fcc366e3f140c558f79d 100644 (file)
--- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
@@ -116,7 +116,7 @@ mxm_shadow_dsm(struct nouveau_mxm *mxm, u8 version)
         acpi_handle handle;
         int ret;
  
-       handle = DEVICE_ACPI_HANDLE(&device->pdev->dev);
+       handle = ACPI_HANDLE(&device->pdev->dev);
         if (!handle)
                 return false;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c

index 07273a2ae62f2df8dd2b0f41916feeb4d89bd3cd..95c740454049ad1b4a4cf23c2bc63d7038c7a362 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -256,7 +256,7 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
         acpi_handle dhandle;
         int retval = 0;
  
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
         if (!dhandle)
                 return false;
  
@@ -414,7 +414,7 @@ bool nouveau_acpi_rom_supported(struct pci_dev *pdev)
         if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected)
                 return false;
  
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
         if (!dhandle)
                 return false;
  
@@ -448,7 +448,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
                 return NULL;
         }
  
-       handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+       handle = ACPI_HANDLE(&dev->pdev->dev);
         if (!handle)
                 return NULL;
  
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c

index 10f98c7742d8c87289e8bbf677857c781fcebbee..98a9074b306b640644f576e928d4e28701495bb4 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -369,7 +369,7 @@ int radeon_atif_handler(struct radeon_device *rdev,
                 return NOTIFY_DONE;
  
         /* Check pending SBIOS requests */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
         count = radeon_atif_get_sbios_requests(handle, &req);
  
         if (count <= 0)
@@ -556,7 +556,7 @@ int radeon_acpi_pcie_notify_device_ready(struct radeon_device *rdev)
         struct radeon_atcs *atcs = &rdev->atcs;
  
         /* Get the device handle */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
         if (!handle)
                 return -EINVAL;
  
@@ -596,7 +596,7 @@ int radeon_acpi_pcie_performance_request(struct radeon_device *rdev,
         u32 retry = 3;
  
         /* Get the device handle */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
         if (!handle)
                 return -EINVAL;
  
@@ -699,7 +699,7 @@ int radeon_acpi_init(struct radeon_device *rdev)
         int ret;
  
         /* Get the device handle */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
  
         /* No need to proceed if we're sure that ATIF is not supported */
         if (!ASIC_IS_AVIVO(rdev) || !rdev->bios || !handle)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c

index 6153ec18943aabb7034010ff1c1abc2ba0a761fe..9d302eaeea1587b6fdb5439119b6f45c0de961db 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -8,8 +8,7 @@
   */
  #include <linux/vga_switcheroo.h>
  #include <linux/slab.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/acpi.h>
  #include <linux/pci.h>
  
  #include "radeon_acpi.h"
@@ -447,7 +446,7 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
         acpi_handle dhandle, atpx_handle;
         acpi_status status;
  
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
         if (!dhandle)
                 return false;
  
@@ -493,7 +492,7 @@ static int radeon_atpx_init(void)
   */
  static int radeon_atpx_get_client_id(struct pci_dev *pdev)
  {
-       if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+       if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
                 return VGA_SWITCHEROO_IGD;
         else
                 return VGA_SWITCHEROO_DIS;
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c

index c155d6f3fa68cad15102af67bfd348677c74d6fa..b3633d9a531703a1cd4189c061a0907e89ee1085 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -185,7 +185,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
                 return false;
  
         while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
-               dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+               dhandle = ACPI_HANDLE(&pdev->dev);
                 if (!dhandle)
                         continue;
  
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c

index ae48d18ee315819d2b8fa359fc4c0269665f6ec5..5f7e55f4b7f052e29f754f78cabe8dd6a4593fb4 100644 (file)
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1008,7 +1008,7 @@ static int i2c_hid_probe(struct i2c_client *client,
         hid->hid_get_raw_report = i2c_hid_get_raw_report;
         hid->hid_output_raw_report = i2c_hid_output_raw_report;
         hid->dev.parent = &client->dev;
-       ACPI_HANDLE_SET(&hid->dev, ACPI_HANDLE(&client->dev));
+       ACPI_COMPANION_SET(&hid->dev, ACPI_COMPANION(&client->dev));
         hid->bus = BUS_I2C;
         hid->version = le16_to_cpu(ihid->hdesc.bcdVersion);
         hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c

index 5923cfa390c86de6528559c229ace0a4b39b402c..d74c0b34248ea6c38472cc401571d8f519844c4d 100644 (file)
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -615,6 +615,22 @@ void i2c_unlock_adapter(struct i2c_adapter *adapter)
  }
  EXPORT_SYMBOL_GPL(i2c_unlock_adapter);
  
+static void i2c_dev_set_name(struct i2c_adapter *adap,
+                            struct i2c_client *client)
+{
+       struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+
+       if (adev) {
+               dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
+               return;
+       }
+
+       /* For 10-bit clients, add an arbitrary offset to avoid collisions */
+       dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
+                    client->addr | ((client->flags & I2C_CLIENT_TEN)
+                                    ? 0xa000 : 0));
+}
+
  /**
   * i2c_new_device - instantiate an i2c device
   * @adap: the adapter managing the device
@@ -671,12 +687,9 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
         client->dev.bus = &i2c_bus_type;
         client->dev.type = &i2c_client_type;
         client->dev.of_node = info->of_node;
-       ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle);
+       ACPI_COMPANION_SET(&client->dev, info->acpi_node.companion);
  
-       /* For 10-bit clients, add an arbitrary offset to avoid collisions */
-       dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
-                    client->addr | ((client->flags & I2C_CLIENT_TEN)
-                                    ? 0xa000 : 0));
+       i2c_dev_set_name(adap, client);
         status = device_register(&client->dev);
         if (status)
                 goto out_err;
@@ -1100,7 +1113,7 @@ static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
                 return AE_OK;
  
         memset(&info, 0, sizeof(info));
-       info.acpi_node.handle = handle;
+       info.acpi_node.companion = adev;
         info.irq = -1;
  
         INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c

index 140c8ef505291129d6299d2b4d4931d3b728589e..d9e1f7ccfe6f086df549160a88ca191e52fca32f 100644 (file)
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -7,6 +7,7 @@
   * Copyright (C) 2006 Hannes Reinecke
   */
  
+#include <linux/acpi.h>
  #include <linux/ata.h>
  #include <linux/delay.h>
  #include <linux/device.h>
@@ -19,8 +20,6 @@
  #include <linux/dmi.h>
  #include <linux/module.h>
  
-#include <acpi/acpi_bus.h>
-
  #define REGS_PER_GTF           7
  
  struct GTM_buffer {
@@ -128,7 +127,7 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
  
         DEBPRINT("ENTER: pci %02x:%02x.%01x\n", bus, devnum, func);
  
-       dev_handle = DEVICE_ACPI_HANDLE(dev);
+       dev_handle = ACPI_HANDLE(dev);
         if (!dev_handle) {
                 DEBPRINT("no acpi handle for device\n");
                 goto err;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c

index 3226ce98fb184df9c0c5666129c1b699cbcc5027..cbd4e9abc47e8f47f512915d224f916166921110 100644 (file)
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1,7 +1,7 @@
  /*
   * intel_idle.c - native hardware idle loop for modern Intel processors
   *
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2013, Intel Corporation.
   * Len Brown <len.brown@intel.com>
   *
   * This program is free software; you can redistribute it and/or modify it
@@ -329,6 +329,22 @@ static struct cpuidle_state atom_cstates[] __initdata = {
         {
                 .enter = NULL }
  };
+static struct cpuidle_state avn_cstates[CPUIDLE_STATE_MAX] = {
+       {
+               .name = "C1-AVN",
+               .desc = "MWAIT 0x00",
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 2,
+               .target_residency = 2,
+               .enter = &intel_idle },
+       {
+               .name = "C6-AVN",
+               .desc = "MWAIT 0x51",
+               .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 15,
+               .target_residency = 45,
+               .enter = &intel_idle },
+};
  
  /**
   * intel_idle
@@ -462,6 +478,11 @@ static const struct idle_cpu idle_cpu_hsw = {
         .disable_promotion_to_c1e = true,
  };
  
+static const struct idle_cpu idle_cpu_avn = {
+       .state_table = avn_cstates,
+       .disable_promotion_to_c1e = true,
+};
+
  #define ICPU(model, cpu) \
         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
  
@@ -483,6 +504,7 @@ static const struct x86_cpu_id intel_idle_ids[] = {
         ICPU(0x3f, idle_cpu_hsw),
         ICPU(0x45, idle_cpu_hsw),
         ICPU(0x46, idle_cpu_hsw),
+       ICPU(0x4D, idle_cpu_avn),
         {}
  };
  MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 8766eabb0014a075e5ace35860806fb3bf31be3f..b6b7a2866c9e99533afd59a0a142712f58febe63 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -112,7 +112,7 @@ static inline int speed_max(struct mddev *mddev)
  
  static struct ctl_table_header *raid_table_header;
  
-static ctl_table raid_table[] = {
+static struct ctl_table raid_table[] = {
         {
                 .procname       = "speed_limit_min",
                 .data           = &sysctl_speed_limit_min,
@@ -130,7 +130,7 @@ static ctl_table raid_table[] = {
         { }
  };
  
-static ctl_table raid_dir_table[] = {
+static struct ctl_table raid_dir_table[] = {
         {
                 .procname       = "raid",
                 .maxlen         = 0,
@@ -140,7 +140,7 @@ static ctl_table raid_dir_table[] = {
         { }
  };
  
-static ctl_table raid_root_table[] = {
+static struct ctl_table raid_root_table[] = {
         {
                 .procname       = "dev",
                 .maxlen         = 0,
@@ -562,11 +562,19 @@ static struct mddev * mddev_find(dev_t unit)
         goto retry;
  }
  
-static inline int mddev_lock(struct mddev * mddev)
+static inline int __must_check mddev_lock(struct mddev * mddev)
  {
         return mutex_lock_interruptible(&mddev->reconfig_mutex);
  }
  
+/* Sometimes we need to take the lock in a situation where
+ * failure due to interrupts is not acceptable.
+ */
+static inline void mddev_lock_nointr(struct mddev * mddev)
+{
+       mutex_lock(&mddev->reconfig_mutex);
+}
+
  static inline int mddev_is_locked(struct mddev *mddev)
  {
         return mutex_is_locked(&mddev->reconfig_mutex);
@@ -2978,7 +2986,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
                 for_each_mddev(mddev, tmp) {
                         struct md_rdev *rdev2;
  
-                       mddev_lock(mddev);
+                       mddev_lock_nointr(mddev);
                         rdev_for_each(rdev2, mddev)
                                 if (rdev->bdev == rdev2->bdev &&
                                     rdev != rdev2 &&
@@ -2994,7 +3002,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
                                 break;
                         }
                 }
-               mddev_lock(my_mddev);
+               mddev_lock_nointr(my_mddev);
                 if (overlap) {
                         /* Someone else could have slipped in a size
                          * change here, but doing so is just silly.
@@ -3580,6 +3588,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
                 mddev->in_sync = 1;
                 del_timer_sync(&mddev->safemode_timer);
         }
+       blk_set_stacking_limits(&mddev->queue->limits);
         pers->run(mddev);
         set_bit(MD_CHANGE_DEVS, &mddev->flags);
         mddev_resume(mddev);
@@ -5258,7 +5267,7 @@ static void __md_stop_writes(struct mddev *mddev)
  
  void md_stop_writes(struct mddev *mddev)
  {
-       mddev_lock(mddev);
+       mddev_lock_nointr(mddev);
         __md_stop_writes(mddev);
         mddev_unlock(mddev);
  }
@@ -5291,20 +5300,35 @@ EXPORT_SYMBOL_GPL(md_stop);
  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
  {
         int err = 0;
+       int did_freeze = 0;
+
+       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+               did_freeze = 1;
+               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
+       if (mddev->sync_thread) {
+               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+               /* Thread might be blocked waiting for metadata update
+                * which will now never happen */
+               wake_up_process(mddev->sync_thread->tsk);
+       }
+       mddev_unlock(mddev);
+       wait_event(resync_wait, mddev->sync_thread == NULL);
+       mddev_lock_nointr(mddev);
+
         mutex_lock(&mddev->open_mutex);
-       if (atomic_read(&mddev->openers) > !!bdev) {
+       if (atomic_read(&mddev->openers) > !!bdev ||
+           mddev->sync_thread ||
+           (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
                 printk("md: %s still in use.\n",mdname(mddev));
+               if (did_freeze) {
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                       md_wakeup_thread(mddev->thread);
+               }
                 err = -EBUSY;
                 goto out;
         }
-       if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-               /* Someone opened the device since we flushed it
-                * so page cache could be dirty and it is too late
-                * to flush.  So abort
-                */
-               mutex_unlock(&mddev->open_mutex);
-               return -EBUSY;
-       }
         if (mddev->pers) {
                 __md_stop_writes(mddev);
  
@@ -5315,7 +5339,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
                 set_disk_ro(mddev->gendisk, 1);
                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                 sysfs_notify_dirent_safe(mddev->sysfs_state);
-               err = 0;        
+               err = 0;
         }
  out:
         mutex_unlock(&mddev->open_mutex);
@@ -5331,20 +5355,34 @@ static int do_md_stop(struct mddev * mddev, int mode,
  {
         struct gendisk *disk = mddev->gendisk;
         struct md_rdev *rdev;
+       int did_freeze = 0;
+
+       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+               did_freeze = 1;
+               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
+       if (mddev->sync_thread) {
+               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+               /* Thread might be blocked waiting for metadata update
+                * which will now never happen */
+               wake_up_process(mddev->sync_thread->tsk);
+       }
+       mddev_unlock(mddev);
+       wait_event(resync_wait, mddev->sync_thread == NULL);
+       mddev_lock_nointr(mddev);
  
         mutex_lock(&mddev->open_mutex);
         if (atomic_read(&mddev->openers) > !!bdev ||
-           mddev->sysfs_active) {
+           mddev->sysfs_active ||
+           mddev->sync_thread ||
+           (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
                 printk("md: %s still in use.\n",mdname(mddev));
                 mutex_unlock(&mddev->open_mutex);
-               return -EBUSY;
-       }
-       if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-               /* Someone opened the device since we flushed it
-                * so page cache could be dirty and it is too late
-                * to flush.  So abort
-                */
-               mutex_unlock(&mddev->open_mutex);
+               if (did_freeze) {
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                       md_wakeup_thread(mddev->thread);
+               }
                 return -EBUSY;
         }
         if (mddev->pers) {
@@ -6551,7 +6589,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                                 wait_event(mddev->sb_wait,
                                            !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
                                            !test_bit(MD_CHANGE_PENDING, &mddev->flags));
-                               mddev_lock(mddev);
+                               mddev_lock_nointr(mddev);
                         }
                 } else {
                         err = -EROFS;
@@ -7361,9 +7399,6 @@ void md_do_sync(struct md_thread *thread)
                 mddev->curr_resync = 2;
  
         try_again:
-               if (kthread_should_stop())
-                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
                 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                         goto skip;
                 for_each_mddev(mddev2, tmp) {
@@ -7388,7 +7423,7 @@ void md_do_sync(struct md_thread *thread)
                                  * be caught by 'softlockup'
                                  */
                                 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-                               if (!kthread_should_stop() &&
+                               if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
                                     mddev2->curr_resync >= mddev->curr_resync) {
                                         printk(KERN_INFO "md: delaying %s of %s"
                                                " until %s has finished (they"
@@ -7464,7 +7499,7 @@ void md_do_sync(struct md_thread *thread)
         last_check = 0;
  
         if (j>2) {
-               printk(KERN_INFO 
+               printk(KERN_INFO
                        "md: resuming %s of %s from checkpoint.\n",
                        desc, mdname(mddev));
                 mddev->curr_resync = j;
@@ -7501,7 +7536,8 @@ void md_do_sync(struct md_thread *thread)
                         sysfs_notify(&mddev->kobj, NULL, "sync_completed");
                 }
  
-               while (j >= mddev->resync_max && !kthread_should_stop()) {
+               while (j >= mddev->resync_max &&
+                      !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                         /* As this condition is controlled by user-space,
                          * we can block indefinitely, so use '_interruptible'
                          * to avoid triggering warnings.
@@ -7509,17 +7545,18 @@ void md_do_sync(struct md_thread *thread)
                         flush_signals(current); /* just in case */
                         wait_event_interruptible(mddev->recovery_wait,
                                                  mddev->resync_max > j
-                                                || kthread_should_stop());
+                                                || test_bit(MD_RECOVERY_INTR,
+                                                            &mddev->recovery));
                 }
  
-               if (kthread_should_stop())
-                       goto interrupted;
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
  
                 sectors = mddev->pers->sync_request(mddev, j, &skipped,
                                                   currspeed < speed_min(mddev));
                 if (sectors == 0) {
                         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       goto out;
+                       break;
                 }
  
                 if (!skipped) { /* actual IO requested */
@@ -7556,10 +7593,8 @@ void md_do_sync(struct md_thread *thread)
                         last_mark = next;
                 }
  
-
-               if (kthread_should_stop())
-                       goto interrupted;
-
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
  
                 /*
                  * this loop exits only if either when we are slower than
@@ -7582,11 +7617,12 @@ void md_do_sync(struct md_thread *thread)
                         }
                 }
         }
-       printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
+       printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
+              test_bit(MD_RECOVERY_INTR, &mddev->recovery)
+              ? "interrupted" : "done");
         /*
          * this also signals 'finished resyncing' to md_stop
          */
- out:
         blk_finish_plug(&plug);
         wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
  
@@ -7640,16 +7676,6 @@ void md_do_sync(struct md_thread *thread)
         set_bit(MD_RECOVERY_DONE, &mddev->recovery);
         md_wakeup_thread(mddev->thread);
         return;
-
- interrupted:
-       /*
-        * got a signal, exit.
-        */
-       printk(KERN_INFO
-              "md: md_do_sync() got signal ... exiting\n");
-       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-       goto out;
-
  }
  EXPORT_SYMBOL_GPL(md_do_sync);
  
@@ -7894,6 +7920,7 @@ void md_reap_sync_thread(struct mddev *mddev)
  
         /* resync has finished, collect result */
         md_unregister_thread(&mddev->sync_thread);
+       wake_up(&resync_wait);
         if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
             !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
                 /* success...*/
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index af6681b19776d2f695030452a9d6ab42821ed0df..1e5a540995e932852df5ff484a96bfcc8636a432 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -66,7 +66,8 @@
   */
  static int max_queued_requests = 1024;
  
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector);
  static void lower_barrier(struct r1conf *conf);
  
  static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -84,10 +85,12 @@ static void r1bio_pool_free(void *r1_bio, void *data)
  }
  
  #define RESYNC_BLOCK_SIZE (64*1024)
-//#define RESYNC_BLOCK_SIZE PAGE_SIZE
+#define RESYNC_DEPTH 32
  #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
  #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-#define RESYNC_WINDOW (2048*1024)
+#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
+#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
+#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
  
  static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
  {
@@ -225,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
         struct bio *bio = r1_bio->master_bio;
         int done;
         struct r1conf *conf = r1_bio->mddev->private;
+       sector_t start_next_window = r1_bio->start_next_window;
+       sector_t bi_sector = bio->bi_sector;
  
         if (bio->bi_phys_segments) {
                 unsigned long flags;
@@ -232,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
                 bio->bi_phys_segments--;
                 done = (bio->bi_phys_segments == 0);
                 spin_unlock_irqrestore(&conf->device_lock, flags);
+               /*
+                * make_request() might be waiting for
+                * bi_phys_segments to decrease
+                */
+               wake_up(&conf->wait_barrier);
         } else
                 done = 1;
  
@@ -243,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
                  * Wake up any possible resync thread that waits for the device
                  * to go idle.
                  */
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bi_sector);
         }
  }
  
@@ -814,8 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
   *    there is no normal IO happeing.  It must arrange to call
   *    lower_barrier when the particular background IO completes.
   */
-#define RESYNC_DEPTH 32
-
  static void raise_barrier(struct r1conf *conf)
  {
         spin_lock_irq(&conf->resync_lock);
@@ -827,9 +835,19 @@ static void raise_barrier(struct r1conf *conf)
         /* block any new IO from starting */
         conf->barrier++;
  
-       /* Now wait for all pending IO to complete */
+       /* For these conditions we must wait:
+        * A: while the array is in frozen state
+        * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+        *    the max count which allowed.
+        * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+        *    next resync will reach to the window which normal bios are
+        *    handling.
+        */
         wait_event_lock_irq(conf->wait_barrier,
-                           !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+                           !conf->array_frozen &&
+                           conf->barrier < RESYNC_DEPTH &&
+                           (conf->start_next_window >=
+                            conf->next_resync + RESYNC_SECTORS),
                             conf->resync_lock);
  
         spin_unlock_irq(&conf->resync_lock);
@@ -845,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
         wake_up(&conf->wait_barrier);
  }
  
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
  {
+       bool wait = false;
+
+       if (conf->array_frozen || !bio)
+               wait = true;
+       else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+                       wait = true;
+               else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+                               >= bio_end_sector(bio)) ||
+                        (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                               <= bio->bi_sector))
+                       wait = false;
+               else
+                       wait = true;
+       }
+
+       return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
+{
+       sector_t sector = 0;
+
         spin_lock_irq(&conf->resync_lock);
-       if (conf->barrier) {
+       if (need_to_wait_for_sync(conf, bio)) {
                 conf->nr_waiting++;
                 /* Wait for the barrier to drop.
                  * However if there are already pending
@@ -860,22 +901,67 @@ static void wait_barrier(struct r1conf *conf)
                  * count down.
                  */
                 wait_event_lock_irq(conf->wait_barrier,
-                                   !conf->barrier ||
-                                   (conf->nr_pending &&
+                                   !conf->array_frozen &&
+                                   (!conf->barrier ||
+                                   ((conf->start_next_window <
+                                     conf->next_resync + RESYNC_SECTORS) &&
                                      current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    !bio_list_empty(current->bio_list))),
                                     conf->resync_lock);
                 conf->nr_waiting--;
         }
+
+       if (bio && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                   <= bio->bi_sector) {
+                       if (conf->start_next_window == MaxSector)
+                               conf->start_next_window =
+                                       conf->next_resync +
+                                       NEXT_NORMALIO_DISTANCE;
+
+                       if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+                           <= bio->bi_sector)
+                               conf->next_window_requests++;
+                       else
+                               conf->current_window_requests++;
+               }
+               if (bio->bi_sector >= conf->start_next_window)
+                       sector = conf->start_next_window;
+       }
+
         conf->nr_pending++;
         spin_unlock_irq(&conf->resync_lock);
+       return sector;
  }
  
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector)
  {
         unsigned long flags;
+
         spin_lock_irqsave(&conf->resync_lock, flags);
         conf->nr_pending--;
+       if (start_next_window) {
+               if (start_next_window == conf->start_next_window) {
+                       if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+                           <= bi_sector)
+                               conf->next_window_requests--;
+                       else
+                               conf->current_window_requests--;
+               } else
+                       conf->current_window_requests--;
+
+               if (!conf->current_window_requests) {
+                       if (conf->next_window_requests) {
+                               conf->current_window_requests =
+                                       conf->next_window_requests;
+                               conf->next_window_requests = 0;
+                               conf->start_next_window +=
+                                       NEXT_NORMALIO_DISTANCE;
+                       } else
+                               conf->start_next_window = MaxSector;
+               }
+       }
         spin_unlock_irqrestore(&conf->resync_lock, flags);
         wake_up(&conf->wait_barrier);
  }
@@ -884,8 +970,7 @@ static void freeze_array(struct r1conf *conf, int extra)
  {
         /* stop syncio and normal IO and wait for everything to
          * go quite.
-        * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+extra
+        * We wait until nr_pending match nr_queued+extra
          * This is called in the context of one normal IO request
          * that has failed. Thus any sync request that might be pending
          * will be blocked by nr_pending, and we need to wait for
@@ -895,8 +980,7 @@ static void freeze_array(struct r1conf *conf, int extra)
          * we continue.
          */
         spin_lock_irq(&conf->resync_lock);
-       conf->barrier++;
-       conf->nr_waiting++;
+       conf->array_frozen = 1;
         wait_event_lock_irq_cmd(conf->wait_barrier,
                                 conf->nr_pending == conf->nr_queued+extra,
                                 conf->resync_lock,
@@ -907,8 +991,7 @@ static void unfreeze_array(struct r1conf *conf)
  {
         /* reverse the effect of the freeze */
         spin_lock_irq(&conf->resync_lock);
-       conf->barrier--;
-       conf->nr_waiting--;
+       conf->array_frozen = 0;
         wake_up(&conf->wait_barrier);
         spin_unlock_irq(&conf->resync_lock);
  }
@@ -1013,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
         int first_clone;
         int sectors_handled;
         int max_sectors;
+       sector_t start_next_window;
  
         /*
          * Register the new request and wait if the reconstruction
@@ -1042,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                 finish_wait(&conf->wait_barrier, &w);
         }
  
-       wait_barrier(conf);
+       start_next_window = wait_barrier(conf, bio);
  
         bitmap = mddev->bitmap;
  
@@ -1163,6 +1247,7 @@ read_again:
  
         disks = conf->raid_disks * 2;
   retry_write:
+       r1_bio->start_next_window = start_next_window;
         blocked_rdev = NULL;
         rcu_read_lock();
         max_sectors = r1_bio->sectors;
@@ -1231,14 +1316,24 @@ read_again:
         if (unlikely(blocked_rdev)) {
                 /* Wait for this device to become unblocked */
                 int j;
+               sector_t old = start_next_window;
  
                 for (j = 0; j < i; j++)
                         if (r1_bio->bios[j])
                                 rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                 r1_bio->state = 0;
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bio->bi_sector);
                 md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
+               start_next_window = wait_barrier(conf, bio);
+               /*
+                * We must make sure the multi r1bios of bio have
+                * the same value of bi_phys_segments
+                */
+               if (bio->bi_phys_segments && old &&
+                   old != start_next_window)
+                       /* Wait for the former r1bio(s) to complete */
+                       wait_event(conf->wait_barrier,
+                                  bio->bi_phys_segments == 1);
                 goto retry_write;
         }
  
@@ -1438,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
  
  static void close_sync(struct r1conf *conf)
  {
-       wait_barrier(conf);
-       allow_barrier(conf);
+       wait_barrier(conf, NULL);
+       allow_barrier(conf, 0, 0);
  
         mempool_destroy(conf->r1buf_pool);
         conf->r1buf_pool = NULL;
+
+       conf->next_resync = 0;
+       conf->start_next_window = MaxSector;
  }
  
  static int raid1_spare_active(struct mddev *mddev)
@@ -2714,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
         conf->pending_count = 0;
         conf->recovery_disabled = mddev->recovery_disabled - 1;
  
+       conf->start_next_window = MaxSector;
+       conf->current_window_requests = conf->next_window_requests = 0;
+
         err = -EIO;
         for (i = 0; i < conf->raid_disks * 2; i++) {
  
@@ -2871,8 +2972,8 @@ static int stop(struct mddev *mddev)
                            atomic_read(&bitmap->behind_writes) == 0);
         }
  
-       raise_barrier(conf);
-       lower_barrier(conf);
+       freeze_array(conf, 0);
+       unfreeze_array(conf);
  
         md_unregister_thread(&mddev->thread);
         if (conf->r1bio_pool)
@@ -3031,10 +3132,10 @@ static void raid1_quiesce(struct mddev *mddev, int state)
                 wake_up(&conf->wait_barrier);
                 break;
         case 1:
-               raise_barrier(conf);
+               freeze_array(conf, 0);
                 break;
         case 0:
-               lower_barrier(conf);
+               unfreeze_array(conf);
                 break;
         }
  }
@@ -3051,7 +3152,8 @@ static void *raid1_takeover(struct mddev *mddev)
                 mddev->new_chunk_sectors = 0;
                 conf = setup_conf(mddev);
                 if (!IS_ERR(conf))
-                       conf->barrier = 1;
+                       /* Array must appear to be quiesced */
+                       conf->array_frozen = 1;
                 return conf;
         }
         return ERR_PTR(-EINVAL);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h

index 0ff3715fb7eba5ec4fed61a9922276b07b363aff..9bebca7bff2fbc4ec4780031190e6666f7abf56d 100644 (file)
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -41,6 +41,19 @@ struct r1conf {
          */
         sector_t                next_resync;
  
+       /* When raid1 starts resync, we divide array into four partitions
+        * |---------|--------------|---------------------|-------------|
+        *        next_resync   start_next_window       end_window
+        * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
+        * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
+        * current_window_requests means the count of normalIO between
+        *   start_next_window and end_window.
+        * next_window_requests means the count of normalIO after end_window.
+        * */
+       sector_t                start_next_window;
+       int                     current_window_requests;
+       int                     next_window_requests;
+
         spinlock_t              device_lock;
  
         /* list of 'struct r1bio' that need to be processed by raid1d,
@@ -65,6 +78,7 @@ struct r1conf {
         int                     nr_waiting;
         int                     nr_queued;
         int                     barrier;
+       int                     array_frozen;
  
         /* Set to 1 if a full sync is needed, (fresh device added).
          * Cleared when a sync completes.
@@ -111,6 +125,7 @@ struct r1bio {
                                                  * in this BehindIO request
                                                  */
         sector_t                sector;
+       sector_t                start_next_window;
         int                     sectors;
         unsigned long           state;
         struct mddev            *mddev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 7c3508abb5e178fe310cae9d2c98352efb34f2af..c504e8389e69e3ab9ad717b9f83d0c19c2008d68 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4384,7 +4384,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 md_wakeup_thread(mddev->thread);
                 wait_event(mddev->sb_wait, mddev->flags == 0 ||
-                          kthread_should_stop());
+                          test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+                       allow_barrier(conf);
+                       return sectors_done;
+               }
                 conf->reshape_safe = mddev->reshape_position;
                 allow_barrier(conf);
         }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 7f0e17a27aebcd3b448dca34290ab1c735c89eb1..47da0af6322be1bd7930f902c96c57875799a358 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -85,6 +85,42 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
         return &conf->stripe_hashtbl[hash];
  }
  
+static inline int stripe_hash_locks_hash(sector_t sect)
+{
+       return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK;
+}
+
+static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+{
+       spin_lock_irq(conf->hash_locks + hash);
+       spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+{
+       spin_unlock(&conf->device_lock);
+       spin_unlock_irq(conf->hash_locks + hash);
+}
+
+static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+       int i;
+       local_irq_disable();
+       spin_lock(conf->hash_locks);
+       for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+               spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
+       spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+       int i;
+       spin_unlock(&conf->device_lock);
+       for (i = NR_STRIPE_HASH_LOCKS; i; i--)
+               spin_unlock(conf->hash_locks + i - 1);
+       local_irq_enable();
+}
+
  /* bio's attached to a stripe+device for I/O are linked together in bi_sector
   * order without overlap.  There may be several bio's per stripe+device, and
   * a bio could span several devices.
@@ -249,7 +285,8 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
         }
  }
  
-static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
+                             struct list_head *temp_inactive_list)
  {
         BUG_ON(!list_empty(&sh->lru));
         BUG_ON(atomic_read(&conf->active_stripes)==0);
@@ -278,23 +315,68 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
                             < IO_THRESHOLD)
                                 md_wakeup_thread(conf->mddev->thread);
                 atomic_dec(&conf->active_stripes);
-               if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
-                       list_add_tail(&sh->lru, &conf->inactive_list);
-                       wake_up(&conf->wait_for_stripe);
-                       if (conf->retry_read_aligned)
-                               md_wakeup_thread(conf->mddev->thread);
-               }
+               if (!test_bit(STRIPE_EXPANDING, &sh->state))
+                       list_add_tail(&sh->lru, temp_inactive_list);
         }
  }
  
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
+                            struct list_head *temp_inactive_list)
  {
         if (atomic_dec_and_test(&sh->count))
-               do_release_stripe(conf, sh);
+               do_release_stripe(conf, sh, temp_inactive_list);
+}
+
+/*
+ * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list
+ *
+ * Be careful: Only one task can add/delete stripes from temp_inactive_list at
+ * given time. Adding stripes only takes device lock, while deleting stripes
+ * only takes hash lock.
+ */
+static void release_inactive_stripe_list(struct r5conf *conf,
+                                        struct list_head *temp_inactive_list,
+                                        int hash)
+{
+       int size;
+       bool do_wakeup = false;
+       unsigned long flags;
+
+       if (hash == NR_STRIPE_HASH_LOCKS) {
+               size = NR_STRIPE_HASH_LOCKS;
+               hash = NR_STRIPE_HASH_LOCKS - 1;
+       } else
+               size = 1;
+       while (size) {
+               struct list_head *list = &temp_inactive_list[size - 1];
+
+               /*
+                * We don't hold any lock here yet, get_active_stripe() might
+                * remove stripes from the list
+                */
+               if (!list_empty_careful(list)) {
+                       spin_lock_irqsave(conf->hash_locks + hash, flags);
+                       if (list_empty(conf->inactive_list + hash) &&
+                           !list_empty(list))
+                               atomic_dec(&conf->empty_inactive_list_nr);
+                       list_splice_tail_init(list, conf->inactive_list + hash);
+                       do_wakeup = true;
+                       spin_unlock_irqrestore(conf->hash_locks + hash, flags);
+               }
+               size--;
+               hash--;
+       }
+
+       if (do_wakeup) {
+               wake_up(&conf->wait_for_stripe);
+               if (conf->retry_read_aligned)
+                       md_wakeup_thread(conf->mddev->thread);
+       }
  }
  
  /* should hold conf->device_lock already */
-static int release_stripe_list(struct r5conf *conf)
+static int release_stripe_list(struct r5conf *conf,
+                              struct list_head *temp_inactive_list)
  {
         struct stripe_head *sh;
         int count = 0;
@@ -303,6 +385,8 @@ static int release_stripe_list(struct r5conf *conf)
         head = llist_del_all(&conf->released_stripes);
         head = llist_reverse_order(head);
         while (head) {
+               int hash;
+
                 sh = llist_entry(head, struct stripe_head, release_list);
                 head = llist_next(head);
                 /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
@@ -313,7 +397,8 @@ static int release_stripe_list(struct r5conf *conf)
                  * again, the count is always > 1. This is true for
                  * STRIPE_ON_UNPLUG_LIST bit too.
                  */
-               __release_stripe(conf, sh);
+               hash = sh->hash_lock_index;
+               __release_stripe(conf, sh, &temp_inactive_list[hash]);
                 count++;
         }
  
@@ -324,9 +409,12 @@ static void release_stripe(struct stripe_head *sh)
  {
         struct r5conf *conf = sh->raid_conf;
         unsigned long flags;
+       struct list_head list;
+       int hash;
         bool wakeup;
  
-       if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
+       if (unlikely(!conf->mddev->thread) ||
+               test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
                 goto slow_path;
         wakeup = llist_add(&sh->release_list, &conf->released_stripes);
         if (wakeup)
@@ -336,8 +424,11 @@ slow_path:
         local_irq_save(flags);
         /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
         if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
-               do_release_stripe(conf, sh);
+               INIT_LIST_HEAD(&list);
+               hash = sh->hash_lock_index;
+               do_release_stripe(conf, sh, &list);
                 spin_unlock(&conf->device_lock);
+               release_inactive_stripe_list(conf, &list, hash);
         }
         local_irq_restore(flags);
  }
@@ -362,18 +453,21 @@ static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
  
  
  /* find an idle stripe, make sure it is unhashed, and return it. */
-static struct stripe_head *get_free_stripe(struct r5conf *conf)
+static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash)
  {
         struct stripe_head *sh = NULL;
         struct list_head *first;
  
-       if (list_empty(&conf->inactive_list))
+       if (list_empty(conf->inactive_list + hash))
                 goto out;
-       first = conf->inactive_list.next;
+       first = (conf->inactive_list + hash)->next;
         sh = list_entry(first, struct stripe_head, lru);
         list_del_init(first);
         remove_hash(sh);
         atomic_inc(&conf->active_stripes);
+       BUG_ON(hash != sh->hash_lock_index);
+       if (list_empty(conf->inactive_list + hash))
+               atomic_inc(&conf->empty_inactive_list_nr);
  out:
         return sh;
  }
@@ -416,7 +510,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
  static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
  {
         struct r5conf *conf = sh->raid_conf;
-       int i;
+       int i, seq;
  
         BUG_ON(atomic_read(&sh->count) != 0);
         BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
@@ -426,7 +520,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
                 (unsigned long long)sh->sector);
  
         remove_hash(sh);
-
+retry:
+       seq = read_seqcount_begin(&conf->gen_lock);
         sh->generation = conf->generation - previous;
         sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
         sh->sector = sector;
@@ -448,6 +543,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
                 dev->flags = 0;
                 raid5_build_block(sh, i, previous);
         }
+       if (read_seqcount_retry(&conf->gen_lock, seq))
+               goto retry;
         insert_hash(conf, sh);
         sh->cpu = smp_processor_id();
  }
@@ -552,29 +649,31 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                   int previous, int noblock, int noquiesce)
  {
         struct stripe_head *sh;
+       int hash = stripe_hash_locks_hash(sector);
  
         pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
  
-       spin_lock_irq(&conf->device_lock);
+       spin_lock_irq(conf->hash_locks + hash);
  
         do {
                 wait_event_lock_irq(conf->wait_for_stripe,
                                     conf->quiesce == 0 || noquiesce,
-                                   conf->device_lock);
+                                   *(conf->hash_locks + hash));
                 sh = __find_stripe(conf, sector, conf->generation - previous);
                 if (!sh) {
                         if (!conf->inactive_blocked)
-                               sh = get_free_stripe(conf);
+                               sh = get_free_stripe(conf, hash);
                         if (noblock && sh == NULL)
                                 break;
                         if (!sh) {
                                 conf->inactive_blocked = 1;
-                               wait_event_lock_irq(conf->wait_for_stripe,
-                                                   !list_empty(&conf->inactive_list) &&
-                                                   (atomic_read(&conf->active_stripes)
-                                                    < (conf->max_nr_stripes *3/4)
-                                                    || !conf->inactive_blocked),
-                                                   conf->device_lock);
+                               wait_event_lock_irq(
+                                       conf->wait_for_stripe,
+                                       !list_empty(conf->inactive_list + hash) &&
+                                       (atomic_read(&conf->active_stripes)
+                                        < (conf->max_nr_stripes * 3 / 4)
+                                        || !conf->inactive_blocked),
+                                       *(conf->hash_locks + hash));
                                 conf->inactive_blocked = 0;
                         } else
                                 init_stripe(sh, sector, previous);
@@ -585,9 +684,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                     && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
                                     && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
                         } else {
+                               spin_lock(&conf->device_lock);
                                 if (!test_bit(STRIPE_HANDLE, &sh->state))
                                         atomic_inc(&conf->active_stripes);
                                 if (list_empty(&sh->lru) &&
+                                   !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
                                     !test_bit(STRIPE_EXPANDING, &sh->state))
                                         BUG();
                                 list_del_init(&sh->lru);
@@ -595,6 +696,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                         sh->group->stripes_cnt--;
                                         sh->group = NULL;
                                 }
+                               spin_unlock(&conf->device_lock);
                         }
                 }
         } while (sh == NULL);
@@ -602,7 +704,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
         if (sh)
                 atomic_inc(&sh->count);
  
-       spin_unlock_irq(&conf->device_lock);
+       spin_unlock_irq(conf->hash_locks + hash);
         return sh;
  }
  
@@ -758,7 +860,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                 bi->bi_sector = (sh->sector
                                                  + rdev->data_offset);
                         if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
-                               bi->bi_rw |= REQ_FLUSH;
+                               bi->bi_rw |= REQ_NOMERGE;
  
                         bi->bi_vcnt = 1;
                         bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -1582,7 +1684,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
         put_cpu();
  }
  
-static int grow_one_stripe(struct r5conf *conf)
+static int grow_one_stripe(struct r5conf *conf, int hash)
  {
         struct stripe_head *sh;
         sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
@@ -1598,6 +1700,7 @@ static int grow_one_stripe(struct r5conf *conf)
                 kmem_cache_free(conf->slab_cache, sh);
                 return 0;
         }
+       sh->hash_lock_index = hash;
         /* we just created an active stripe so... */
         atomic_set(&sh->count, 1);
         atomic_inc(&conf->active_stripes);
@@ -1610,6 +1713,7 @@ static int grow_stripes(struct r5conf *conf, int num)
  {
         struct kmem_cache *sc;
         int devs = max(conf->raid_disks, conf->previous_raid_disks);
+       int hash;
  
         if (conf->mddev->gendisk)
                 sprintf(conf->cache_name[0],
@@ -1627,9 +1731,13 @@ static int grow_stripes(struct r5conf *conf, int num)
                 return 1;
         conf->slab_cache = sc;
         conf->pool_size = devs;
-       while (num--)
-               if (!grow_one_stripe(conf))
+       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
+       while (num--) {
+               if (!grow_one_stripe(conf, hash))
                         return 1;
+               conf->max_nr_stripes++;
+               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
+       }
         return 0;
  }
  
@@ -1687,6 +1795,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
         int err;
         struct kmem_cache *sc;
         int i;
+       int hash, cnt;
  
         if (newsize <= conf->pool_size)
                 return 0; /* never bother to shrink */
@@ -1726,19 +1835,29 @@ static int resize_stripes(struct r5conf *conf, int newsize)
          * OK, we have enough stripes, start collecting inactive
          * stripes and copying them over
          */
+       hash = 0;
+       cnt = 0;
         list_for_each_entry(nsh, &newstripes, lru) {
-               spin_lock_irq(&conf->device_lock);
-               wait_event_lock_irq(conf->wait_for_stripe,
-                                   !list_empty(&conf->inactive_list),
-                                   conf->device_lock);
-               osh = get_free_stripe(conf);
-               spin_unlock_irq(&conf->device_lock);
+               lock_device_hash_lock(conf, hash);
+               wait_event_cmd(conf->wait_for_stripe,
+                                   !list_empty(conf->inactive_list + hash),
+                                   unlock_device_hash_lock(conf, hash),
+                                   lock_device_hash_lock(conf, hash));
+               osh = get_free_stripe(conf, hash);
+               unlock_device_hash_lock(conf, hash);
                 atomic_set(&nsh->count, 1);
                 for(i=0; i<conf->pool_size; i++)
                         nsh->dev[i].page = osh->dev[i].page;
                 for( ; i<newsize; i++)
                         nsh->dev[i].page = NULL;
+               nsh->hash_lock_index = hash;
                 kmem_cache_free(conf->slab_cache, osh);
+               cnt++;
+               if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
+                   !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
+                       hash++;
+                       cnt = 0;
+               }
         }
         kmem_cache_destroy(conf->slab_cache);
  
@@ -1797,13 +1916,13 @@ static int resize_stripes(struct r5conf *conf, int newsize)
         return err;
  }
  
-static int drop_one_stripe(struct r5conf *conf)
+static int drop_one_stripe(struct r5conf *conf, int hash)
  {
         struct stripe_head *sh;
  
-       spin_lock_irq(&conf->device_lock);
-       sh = get_free_stripe(conf);
-       spin_unlock_irq(&conf->device_lock);
+       spin_lock_irq(conf->hash_locks + hash);
+       sh = get_free_stripe(conf, hash);
+       spin_unlock_irq(conf->hash_locks + hash);
         if (!sh)
                 return 0;
         BUG_ON(atomic_read(&sh->count));
@@ -1815,8 +1934,10 @@ static int drop_one_stripe(struct r5conf *conf)
  
  static void shrink_stripes(struct r5conf *conf)
  {
-       while (drop_one_stripe(conf))
-               ;
+       int hash;
+       for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
+               while (drop_one_stripe(conf, hash))
+                       ;
  
         if (conf->slab_cache)
                 kmem_cache_destroy(conf->slab_cache);
@@ -1921,6 +2042,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
                                mdname(conf->mddev), bdn);
                 else
                         retry = 1;
+               if (set_bad && test_bit(In_sync, &rdev->flags)
+                   && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+                       retry = 1;
                 if (retry)
                         if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
                                 set_bit(R5_ReadError, &sh->dev[i].flags);
@@ -3900,7 +4024,8 @@ static void raid5_activate_delayed(struct r5conf *conf)
         }
  }
  
-static void activate_bit_delay(struct r5conf *conf)
+static void activate_bit_delay(struct r5conf *conf,
+       struct list_head *temp_inactive_list)
  {
         /* device_lock is held */
         struct list_head head;
@@ -3908,9 +4033,11 @@ static void activate_bit_delay(struct r5conf *conf)
         list_del_init(&conf->bitmap_list);
         while (!list_empty(&head)) {
                 struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
+               int hash;
                 list_del_init(&sh->lru);
                 atomic_inc(&sh->count);
-               __release_stripe(conf, sh);
+               hash = sh->hash_lock_index;
+               __release_stripe(conf, sh, &temp_inactive_list[hash]);
         }
  }
  
@@ -3926,7 +4053,7 @@ int md_raid5_congested(struct mddev *mddev, int bits)
                 return 1;
         if (conf->quiesce)
                 return 1;
-       if (list_empty_careful(&conf->inactive_list))
+       if (atomic_read(&conf->empty_inactive_list_nr))
                 return 1;
  
         return 0;
@@ -4256,6 +4383,7 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
  struct raid5_plug_cb {
         struct blk_plug_cb      cb;
         struct list_head        list;
+       struct list_head        temp_inactive_list[NR_STRIPE_HASH_LOCKS];
  };
  
  static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
@@ -4266,6 +4394,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
         struct mddev *mddev = cb->cb.data;
         struct r5conf *conf = mddev->private;
         int cnt = 0;
+       int hash;
  
         if (cb->list.next && !list_empty(&cb->list)) {
                 spin_lock_irq(&conf->device_lock);
@@ -4283,11 +4412,14 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
                          * STRIPE_ON_RELEASE_LIST could be set here. In that
                          * case, the count is always > 1 here
                          */
-                       __release_stripe(conf, sh);
+                       hash = sh->hash_lock_index;
+                       __release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
                         cnt++;
                 }
                 spin_unlock_irq(&conf->device_lock);
         }
+       release_inactive_stripe_list(conf, cb->temp_inactive_list,
+                                    NR_STRIPE_HASH_LOCKS);
         if (mddev->queue)
                 trace_block_unplug(mddev->queue, cnt, !from_schedule);
         kfree(cb);
@@ -4308,8 +4440,12 @@ static void release_stripe_plug(struct mddev *mddev,
  
         cb = container_of(blk_cb, struct raid5_plug_cb, cb);
  
-       if (cb->list.next == NULL)
+       if (cb->list.next == NULL) {
+               int i;
                 INIT_LIST_HEAD(&cb->list);
+               for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+                       INIT_LIST_HEAD(cb->temp_inactive_list + i);
+       }
  
         if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
                 list_add_tail(&sh->lru, &cb->list);
@@ -4692,14 +4828,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
             time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
                 /* Cannot proceed until we've updated the superblock... */
                 wait_event(conf->wait_for_overlap,
-                          atomic_read(&conf->reshape_stripes)==0);
+                          atomic_read(&conf->reshape_stripes)==0
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (atomic_read(&conf->reshape_stripes) != 0)
+                       return 0;
                 mddev->reshape_position = conf->reshape_progress;
                 mddev->curr_resync_completed = sector_nr;
                 conf->reshape_checkpoint = jiffies;
                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 md_wakeup_thread(mddev->thread);
                 wait_event(mddev->sb_wait, mddev->flags == 0 ||
-                          kthread_should_stop());
+                          test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       return 0;
                 spin_lock_irq(&conf->device_lock);
                 conf->reshape_safe = mddev->reshape_position;
                 spin_unlock_irq(&conf->device_lock);
@@ -4782,7 +4923,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
             >= mddev->resync_max - mddev->curr_resync_completed) {
                 /* Cannot proceed until we've updated the superblock... */
                 wait_event(conf->wait_for_overlap,
-                          atomic_read(&conf->reshape_stripes) == 0);
+                          atomic_read(&conf->reshape_stripes) == 0
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (atomic_read(&conf->reshape_stripes) != 0)
+                       goto ret;
                 mddev->reshape_position = conf->reshape_progress;
                 mddev->curr_resync_completed = sector_nr;
                 conf->reshape_checkpoint = jiffies;
@@ -4790,13 +4934,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
                 md_wakeup_thread(mddev->thread);
                 wait_event(mddev->sb_wait,
                            !test_bit(MD_CHANGE_DEVS, &mddev->flags)
-                          || kthread_should_stop());
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       goto ret;
                 spin_lock_irq(&conf->device_lock);
                 conf->reshape_safe = mddev->reshape_position;
                 spin_unlock_irq(&conf->device_lock);
                 wake_up(&conf->wait_for_overlap);
                 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
         }
+ret:
         return reshape_sectors;
  }
  
@@ -4954,27 +5101,45 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
  }
  
  static int handle_active_stripes(struct r5conf *conf, int group,
-                                struct r5worker *worker)
+                                struct r5worker *worker,
+                                struct list_head *temp_inactive_list)
  {
         struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
-       int i, batch_size = 0;
+       int i, batch_size = 0, hash;
+       bool release_inactive = false;
  
         while (batch_size < MAX_STRIPE_BATCH &&
                         (sh = __get_priority_stripe(conf, group)) != NULL)
                 batch[batch_size++] = sh;
  
-       if (batch_size == 0)
-               return batch_size;
+       if (batch_size == 0) {
+               for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+                       if (!list_empty(temp_inactive_list + i))
+                               break;
+               if (i == NR_STRIPE_HASH_LOCKS)
+                       return batch_size;
+               release_inactive = true;
+       }
         spin_unlock_irq(&conf->device_lock);
  
+       release_inactive_stripe_list(conf, temp_inactive_list,
+                                    NR_STRIPE_HASH_LOCKS);
+
+       if (release_inactive) {
+               spin_lock_irq(&conf->device_lock);
+               return 0;
+       }
+
         for (i = 0; i < batch_size; i++)
                 handle_stripe(batch[i]);
  
         cond_resched();
  
         spin_lock_irq(&conf->device_lock);
-       for (i = 0; i < batch_size; i++)
-               __release_stripe(conf, batch[i]);
+       for (i = 0; i < batch_size; i++) {
+               hash = batch[i]->hash_lock_index;
+               __release_stripe(conf, batch[i], &temp_inactive_list[hash]);
+       }
         return batch_size;
  }
  
@@ -4995,9 +5160,10 @@ static void raid5_do_work(struct work_struct *work)
         while (1) {
                 int batch_size, released;
  
-               released = release_stripe_list(conf);
+               released = release_stripe_list(conf, worker->temp_inactive_list);
  
-               batch_size = handle_active_stripes(conf, group_id, worker);
+               batch_size = handle_active_stripes(conf, group_id, worker,
+                                                  worker->temp_inactive_list);
                 worker->working = false;
                 if (!batch_size && !released)
                         break;
@@ -5036,7 +5202,7 @@ static void raid5d(struct md_thread *thread)
                 struct bio *bio;
                 int batch_size, released;
  
-               released = release_stripe_list(conf);
+               released = release_stripe_list(conf, conf->temp_inactive_list);
  
                 if (
                     !list_empty(&conf->bitmap_list)) {
@@ -5046,7 +5212,7 @@ static void raid5d(struct md_thread *thread)
                         bitmap_unplug(mddev->bitmap);
                         spin_lock_irq(&conf->device_lock);
                         conf->seq_write = conf->seq_flush;
-                       activate_bit_delay(conf);
+                       activate_bit_delay(conf, conf->temp_inactive_list);
                 }
                 raid5_activate_delayed(conf);
  
@@ -5060,7 +5226,8 @@ static void raid5d(struct md_thread *thread)
                         handled++;
                 }
  
-               batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
+               batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,
+                                                  conf->temp_inactive_list);
                 if (!batch_size && !released)
                         break;
                 handled += batch_size;
@@ -5096,22 +5263,29 @@ raid5_set_cache_size(struct mddev *mddev, int size)
  {
         struct r5conf *conf = mddev->private;
         int err;
+       int hash;
  
         if (size <= 16 || size > 32768)
                 return -EINVAL;
+       hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
         while (size < conf->max_nr_stripes) {
-               if (drop_one_stripe(conf))
+               if (drop_one_stripe(conf, hash))
                         conf->max_nr_stripes--;
                 else
                         break;
+               hash--;
+               if (hash < 0)
+                       hash = NR_STRIPE_HASH_LOCKS - 1;
         }
         err = md_allow_write(mddev);
         if (err)
                 return err;
+       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
         while (size > conf->max_nr_stripes) {
-               if (grow_one_stripe(conf))
+               if (grow_one_stripe(conf, hash))
                         conf->max_nr_stripes++;
                 else break;
+               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
         }
         return 0;
  }
@@ -5199,15 +5373,18 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
                 return 0;
  }
  
-static int alloc_thread_groups(struct r5conf *conf, int cnt);
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+                              int *group_cnt,
+                              int *worker_cnt_per_group,
+                              struct r5worker_group **worker_groups);
  static ssize_t
  raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
  {
         struct r5conf *conf = mddev->private;
         unsigned long new;
         int err;
-       struct r5worker_group *old_groups;
-       int old_group_cnt;
+       struct r5worker_group *new_groups, *old_groups;
+       int group_cnt, worker_cnt_per_group;
  
         if (len >= PAGE_SIZE)
                 return -EINVAL;
@@ -5223,14 +5400,19 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
         mddev_suspend(mddev);
  
         old_groups = conf->worker_groups;
-       old_group_cnt = conf->worker_cnt_per_group;
+       if (old_groups)
+               flush_workqueue(raid5_wq);
+
+       err = alloc_thread_groups(conf, new,
+                                 &group_cnt, &worker_cnt_per_group,
+                                 &new_groups);
+       if (!err) {
+               spin_lock_irq(&conf->device_lock);
+               conf->group_cnt = group_cnt;
+               conf->worker_cnt_per_group = worker_cnt_per_group;
+               conf->worker_groups = new_groups;
+               spin_unlock_irq(&conf->device_lock);
  
-       conf->worker_groups = NULL;
-       err = alloc_thread_groups(conf, new);
-       if (err) {
-               conf->worker_groups = old_groups;
-               conf->worker_cnt_per_group = old_group_cnt;
-       } else {
                 if (old_groups)
                         kfree(old_groups[0].workers);
                 kfree(old_groups);
@@ -5260,40 +5442,47 @@ static struct attribute_group raid5_attrs_group = {
         .attrs = raid5_attrs,
  };
  
-static int alloc_thread_groups(struct r5conf *conf, int cnt)
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+                              int *group_cnt,
+                              int *worker_cnt_per_group,
+                              struct r5worker_group **worker_groups)
  {
-       int i, j;
+       int i, j, k;
         ssize_t size;
         struct r5worker *workers;
  
-       conf->worker_cnt_per_group = cnt;
+       *worker_cnt_per_group = cnt;
         if (cnt == 0) {
-               conf->worker_groups = NULL;
+               *group_cnt = 0;
+               *worker_groups = NULL;
                 return 0;
         }
-       conf->group_cnt = num_possible_nodes();
+       *group_cnt = num_possible_nodes();
         size = sizeof(struct r5worker) * cnt;
-       workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
-       conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
-                               conf->group_cnt, GFP_NOIO);
-       if (!conf->worker_groups || !workers) {
+       workers = kzalloc(size * *group_cnt, GFP_NOIO);
+       *worker_groups = kzalloc(sizeof(struct r5worker_group) *
+                               *group_cnt, GFP_NOIO);
+       if (!*worker_groups || !workers) {
                 kfree(workers);
-               kfree(conf->worker_groups);
-               conf->worker_groups = NULL;
+               kfree(*worker_groups);
                 return -ENOMEM;
         }
  
-       for (i = 0; i < conf->group_cnt; i++) {
+       for (i = 0; i < *group_cnt; i++) {
                 struct r5worker_group *group;
  
-               group = &conf->worker_groups[i];
+               group = worker_groups[i];
                 INIT_LIST_HEAD(&group->handle_list);
                 group->conf = conf;
                 group->workers = workers + i * cnt;
  
                 for (j = 0; j < cnt; j++) {
-                       group->workers[j].group = group;
-                       INIT_WORK(&group->workers[j].work, raid5_do_work);
+                       struct r5worker *worker = group->workers + j;
+                       worker->group = group;
+                       INIT_WORK(&worker->work, raid5_do_work);
+
+                       for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++)
+                               INIT_LIST_HEAD(worker->temp_inactive_list + k);
                 }
         }
  
@@ -5444,6 +5633,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         struct md_rdev *rdev;
         struct disk_info *disk;
         char pers_name[6];
+       int i;
+       int group_cnt, worker_cnt_per_group;
+       struct r5worker_group *new_group;
  
         if (mddev->new_level != 5
             && mddev->new_level != 4
@@ -5478,7 +5670,12 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         if (conf == NULL)
                 goto abort;
         /* Don't enable multi-threading by default*/
-       if (alloc_thread_groups(conf, 0))
+       if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
+                                &new_group)) {
+               conf->group_cnt = group_cnt;
+               conf->worker_cnt_per_group = worker_cnt_per_group;
+               conf->worker_groups = new_group;
+       } else
                 goto abort;
         spin_lock_init(&conf->device_lock);
         seqcount_init(&conf->gen_lock);
@@ -5488,7 +5685,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         INIT_LIST_HEAD(&conf->hold_list);
         INIT_LIST_HEAD(&conf->delayed_list);
         INIT_LIST_HEAD(&conf->bitmap_list);
-       INIT_LIST_HEAD(&conf->inactive_list);
         init_llist_head(&conf->released_stripes);
         atomic_set(&conf->active_stripes, 0);
         atomic_set(&conf->preread_active_stripes, 0);
@@ -5514,6 +5710,21 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                 goto abort;
  
+       /* We init hash_locks[0] separately to that it can be used
+        * as the reference lock in the spin_lock_nest_lock() call
+        * in lock_all_device_hash_locks_irq in order to convince
+        * lockdep that we know what we are doing.
+        */
+       spin_lock_init(conf->hash_locks);
+       for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+               spin_lock_init(conf->hash_locks + i);
+
+       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+               INIT_LIST_HEAD(conf->inactive_list + i);
+
+       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+               INIT_LIST_HEAD(conf->temp_inactive_list + i);
+
         conf->level = mddev->new_level;
         if (raid5_alloc_percpu(conf) != 0)
                 goto abort;
@@ -5554,7 +5765,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         else
                 conf->max_degraded = 1;
         conf->algorithm = mddev->new_layout;
-       conf->max_nr_stripes = NR_STRIPES;
         conf->reshape_progress = mddev->reshape_position;
         if (conf->reshape_progress != MaxSector) {
                 conf->prev_chunk_sectors = mddev->chunk_sectors;
@@ -5563,7 +5773,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
  
         memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
                  max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
-       if (grow_stripes(conf, conf->max_nr_stripes)) {
+       atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
+       if (grow_stripes(conf, NR_STRIPES)) {
                 printk(KERN_ERR
                        "md/raid:%s: couldn't allocate %dkB for buffers\n",
                        mdname(mddev), memory);
@@ -6369,12 +6580,18 @@ static int raid5_start_reshape(struct mddev *mddev)
         if (!mddev->sync_thread) {
                 mddev->recovery = 0;
                 spin_lock_irq(&conf->device_lock);
+               write_seqcount_begin(&conf->gen_lock);
                 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+               mddev->new_chunk_sectors =
+                       conf->chunk_sectors = conf->prev_chunk_sectors;
+               mddev->new_layout = conf->algorithm = conf->prev_algo;
                 rdev_for_each(rdev, mddev)
                         rdev->new_data_offset = rdev->data_offset;
                 smp_wmb();
+               conf->generation --;
                 conf->reshape_progress = MaxSector;
                 mddev->reshape_position = MaxSector;
+               write_seqcount_end(&conf->gen_lock);
                 spin_unlock_irq(&conf->device_lock);
                 return -EAGAIN;
         }
@@ -6462,27 +6679,28 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                 break;
  
         case 1: /* stop all writes */
-               spin_lock_irq(&conf->device_lock);
+               lock_all_device_hash_locks_irq(conf);
                 /* '2' tells resync/reshape to pause so that all
                  * active stripes can drain
                  */
                 conf->quiesce = 2;
-               wait_event_lock_irq(conf->wait_for_stripe,
+               wait_event_cmd(conf->wait_for_stripe,
                                     atomic_read(&conf->active_stripes) == 0 &&
                                     atomic_read(&conf->active_aligned_reads) == 0,
-                                   conf->device_lock);
+                                   unlock_all_device_hash_locks_irq(conf),
+                                   lock_all_device_hash_locks_irq(conf));
                 conf->quiesce = 1;
-               spin_unlock_irq(&conf->device_lock);
+               unlock_all_device_hash_locks_irq(conf);
                 /* allow reshape to continue */
                 wake_up(&conf->wait_for_overlap);
                 break;
  
         case 0: /* re-enable writes */
-               spin_lock_irq(&conf->device_lock);
+               lock_all_device_hash_locks_irq(conf);
                 conf->quiesce = 0;
                 wake_up(&conf->wait_for_stripe);
                 wake_up(&conf->wait_for_overlap);
-               spin_unlock_irq(&conf->device_lock);
+               unlock_all_device_hash_locks_irq(conf);
                 break;
         }
  }
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h

index b42e6b462edad8967e68bfa263c43edd1787953d..01ad8ae8f57830a04de4e1e30b731f74660bda62 100644 (file)
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -205,6 +205,7 @@ struct stripe_head {
         short                   pd_idx;         /* parity disk index */
         short                   qd_idx;         /* 'Q' disk index for raid6 */
         short                   ddf_layout;/* use DDF ordering to calculate Q */
+       short                   hash_lock_index;
         unsigned long           state;          /* state flags */
         atomic_t                count;        /* nr of active thread/requests */
         int                     bm_seq; /* sequence number for bitmap flushes */
@@ -367,9 +368,18 @@ struct disk_info {
         struct md_rdev  *rdev, *replacement;
  };
  
+/* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
+ * This is because we sometimes take all the spinlocks
+ * and creating that much locking depth can cause
+ * problems.
+ */
+#define NR_STRIPE_HASH_LOCKS 8
+#define STRIPE_HASH_LOCKS_MASK (NR_STRIPE_HASH_LOCKS - 1)
+
  struct r5worker {
         struct work_struct work;
         struct r5worker_group *group;
+       struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
         bool working;
  };
  
@@ -382,6 +392,8 @@ struct r5worker_group {
  
  struct r5conf {
         struct hlist_head       *stripe_hashtbl;
+       /* only protect corresponding hash list and inactive_list */
+       spinlock_t              hash_locks[NR_STRIPE_HASH_LOCKS];
         struct mddev            *mddev;
         int                     chunk_sectors;
         int                     level, algorithm;
@@ -462,7 +474,8 @@ struct r5conf {
          * Free stripes pool
          */
         atomic_t                active_stripes;
-       struct list_head        inactive_list;
+       struct list_head        inactive_list[NR_STRIPE_HASH_LOCKS];
+       atomic_t                empty_inactive_list_nr;
         struct llist_head       released_stripes;
         wait_queue_head_t       wait_for_stripe;
         wait_queue_head_t       wait_for_overlap;
@@ -477,6 +490,7 @@ struct r5conf {
          * the new thread here until we fully activate the array.
          */
         struct md_thread        *thread;
+       struct list_head        temp_inactive_list[NR_STRIPE_HASH_LOCKS];
         struct r5worker_group   *worker_groups;
         int                     group_cnt;
         int                     worker_cnt_per_group;
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c

index 36513e896413c01a3b209e153e75c680aac36bc6..65cab70fefcb067e3de0a404d47bd41feaafd38b 100644 (file)
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -341,8 +341,7 @@ static void deinterlace_issue_dma(struct deinterlace_ctx *ctx, int op,
         ctx->xt->dir = DMA_MEM_TO_MEM;
         ctx->xt->src_sgl = false;
         ctx->xt->dst_sgl = true;
-       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
-               DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP;
+       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
  
         tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags);
         if (tx == NULL) {
diff --git a/drivers/media/platform/timblogiw.c b/drivers/media/platform/timblogiw.c

index 6a74ce040d288eb06cbd6684727358277f89481b..ccdadd623a3aae3ffabd49be553709d5a9511ec6 100644 (file)
--- a/drivers/media/platform/timblogiw.c
+++ b/drivers/media/platform/timblogiw.c
@@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
  
         desc = dmaengine_prep_slave_sg(fh->chan,
                 buf->sg, sg_elems, DMA_DEV_TO_MEM,
-               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+               DMA_PREP_INTERRUPT);
         if (!desc) {
                 spin_lock_irq(&fh->queue_lock);
                 list_del_init(&vb->queue);
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c

index 08b18f3f5264aa8052cea030143d5bc5c4b76cd0..9e2b985293fc08cf30bef9a9455fcade1eb095cb 100644 (file)
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -633,8 +633,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
         struct dma_async_tx_descriptor *tx;
         dma_cookie_t cookie;
         dma_addr_t dst, src;
-       unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
-                                 DMA_COMPL_SKIP_SRC_UNMAP;
+       unsigned long dma_flags = 0;
  
         dst_sg = buf->vb.sglist;
         dst_nents = buf->vb.sglen;
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c

index ef8956568c3a2978b90cab02a9dc0ed56c76254f..157b570ba343e4648b796e7330e7b75bc052d00a 100644 (file)
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -308,8 +308,7 @@ static void sdio_acpi_set_handle(struct sdio_func *func)
         struct mmc_host *host = func->card->host;
         u64 addr = (host->slotno << 16) | func->num;
  
-       ACPI_HANDLE_SET(&func->dev,
-                       acpi_get_child(ACPI_HANDLE(host->parent), addr));
+       acpi_preset_companion(&func->dev, ACPI_HANDLE(host->parent), addr);
  }
  #else
  static inline void sdio_acpi_set_handle(struct sdio_func *func) {}
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c

index d78a97d4153a98234998080b40c4cb94f50a0876..59f08c44abdbc9be920ea62974d19bcdc7884889 100644 (file)
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -375,8 +375,7 @@ static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
  
         dma_dev = host->dma_chan->device;
  
-       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-               DMA_COMPL_SKIP_DEST_UNMAP;
+       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
  
         phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
         if (dma_mapping_error(dma_dev->dev, phys_addr)) {
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c

index 3dc1a7564d8725d62085b16cb7c0544e138858b2..8b2752263db9a5549742bb36c3dcee48999b8b62 100644 (file)
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -573,8 +573,6 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len,
         dma_dev = chan->device;
         dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction);
  
-       flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
-
         if (direction == DMA_TO_DEVICE) {
                 dma_src = dma_addr;
                 dma_dst = host->data_pa;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c

index 0951f7aca1eff6671f187d03b203952926a1ecea..822616e3c3754118ab2e09eada44d59a24c3954c 100644 (file)
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -459,8 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
                 sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
  
         ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-               &ctl->sg, 1, DMA_MEM_TO_DEV,
-               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+               &ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
         if (!ctl->adesc)
                 return NETDEV_TX_BUSY;
  
@@ -571,8 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
                 sg_dma_len(sg) = DMA_BUFFER_SIZE;
  
                 ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-                       sg, 1, DMA_DEV_TO_MEM,
-                       DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+                       sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
  
                 if (!ctl->adesc)
                         goto out;
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c

index 12a9e83c008b402f0f7acde4e80c742c7795348e..d0222f13d154808f3cfa4ed3cab26cd1a7b899ee 100644 (file)
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1034,10 +1034,9 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
         struct dma_chan *chan = qp->dma_chan;
         struct dma_device *device;
         size_t pay_off, buff_off;
-       dma_addr_t src, dest;
+       struct dmaengine_unmap_data *unmap;
         dma_cookie_t cookie;
         void *buf = entry->buf;
-       unsigned long flags;
  
         entry->len = len;
  
@@ -1045,35 +1044,49 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
                 goto err;
  
         if (len < copy_bytes) 
-               goto err1;
+               goto err_wait;
  
         device = chan->device;
         pay_off = (size_t) offset & ~PAGE_MASK;
         buff_off = (size_t) buf & ~PAGE_MASK;
  
         if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
-               goto err1;
+               goto err_wait;
  
-       dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE);
-       if (dma_mapping_error(device->dev, dest))
-               goto err1;
+       unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
+       if (!unmap)
+               goto err_wait;
  
-       src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE);
-       if (dma_mapping_error(device->dev, src))
-               goto err2;
+       unmap->len = len;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
+                                     pay_off, len, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
  
-       flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE |
-               DMA_PREP_INTERRUPT;
-       txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+       unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
+                                     buff_off, len, DMA_FROM_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[1]))
+               goto err_get_unmap;
+
+       unmap->from_cnt = 1;
+
+       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                            unmap->addr[0], len,
+                                            DMA_PREP_INTERRUPT);
         if (!txd)
-               goto err3;
+               goto err_get_unmap;
  
         txd->callback = ntb_rx_copy_callback;
         txd->callback_param = entry;
+       dma_set_unmap(txd, unmap);
  
         cookie = dmaengine_submit(txd);
         if (dma_submit_error(cookie))
-               goto err3;
+               goto err_set_unmap;
+
+       dmaengine_unmap_put(unmap);
  
         qp->last_cookie = cookie;
  
@@ -1081,11 +1094,11 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
  
         return;
  
-err3:
-       dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
-err2:
-       dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE);
-err1:
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
+err_wait:
         /* If the callbacks come out of order, the writing of the index to the
          * last completed will be out of order.  This may result in the
          * receive stalling forever.
@@ -1245,12 +1258,12 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
         struct dma_chan *chan = qp->dma_chan;
         struct dma_device *device;
         size_t dest_off, buff_off;
-       dma_addr_t src, dest;
+       struct dmaengine_unmap_data *unmap;
+       dma_addr_t dest;
         dma_cookie_t cookie;
         void __iomem *offset;
         size_t len = entry->len;
         void *buf = entry->buf;
-       unsigned long flags;
  
         offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
         hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1273,28 +1286,41 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
         if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
                 goto err;
  
-       src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE);
-       if (dma_mapping_error(device->dev, src))
+       unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+       if (!unmap)
                 goto err;
  
-       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT;
-       txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+       unmap->len = len;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
+                                     buff_off, len, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
+
+       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+                                            DMA_PREP_INTERRUPT);
         if (!txd)
-               goto err1;
+               goto err_get_unmap;
  
         txd->callback = ntb_tx_copy_callback;
         txd->callback_param = entry;
+       dma_set_unmap(txd, unmap);
  
         cookie = dmaengine_submit(txd);
         if (dma_submit_error(cookie))
-               goto err1;
+               goto err_set_unmap;
+
+       dmaengine_unmap_put(unmap);
  
         dma_async_issue_pending(chan);
         qp->tx_async++;
  
         return;
-err1:
-       dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
  err:
         ntb_memcpy_tx(entry, offset);
         qp->tx_memcpy++;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c

index 1ce8ee054f1aa89ba83fb1001761e9200262a666..a94d850ae228c377387d036fd0f3af7e656130c6 100644 (file)
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -367,7 +367,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
                 string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
         }
  
-       handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       handle = ACPI_HANDLE(&pdev->dev);
         if (!handle) {
                 /*
                  * This hotplug controller was not listed in the ACPI name
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h

index 26100f510b1087f45bbe39b79dcf9649acf38e9d..1592dbe4f90461dbfa82be205f334eb0e8ecc6f6 100644 (file)
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -176,7 +176,6 @@ u8 acpiphp_get_latch_status(struct acpiphp_slot *slot);
  u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot);
  
  /* variables */
-extern bool acpiphp_debug;
  extern bool acpiphp_disabled;
  
  #endif /* _ACPIPHP_H */
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c

index ead7c534095e885572c85c6963d57742dcab7bec..cff7cadfc2e4b27344d79ed479f419a7a012f5e0 100644 (file)
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -54,7 +54,7 @@ int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
  {
         if (slot_detection_mode != PCIEHP_DETECT_ACPI)
                 return 0;
-       if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
+       if (acpi_pci_detect_ejectable(ACPI_HANDLE(&dev->dev)))
                 return 0;
         return -ENODEV;
  }
@@ -96,7 +96,7 @@ static int __init dummy_probe(struct pcie_device *dev)
                         dup_slot_id++;
         }
         list_add_tail(&slot->list, &dummy_slots);
-       handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       handle = ACPI_HANDLE(&pdev->dev);
         if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
                 acpi_slot_detected = 1;
         return -ENODEV;         /* dummy driver always returns error */
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c

index b2781dfe60e9e3cbc79095de46b96f8421958275..5b05a68cca6c73aaf50c7f66ccec5923e4f3d431 100644 (file)
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -9,6 +9,7 @@
   * Work to add BIOS PROM support was completed by Mike Habeck.
   */
  
+#include <linux/acpi.h>
  #include <linux/init.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
@@ -29,7 +30,6 @@
  #include <asm/sn/sn_feature_sets.h>
  #include <asm/sn/sn_sal.h>
  #include <asm/sn/types.h>
-#include <linux/acpi.h>
  #include <asm/sn/acpi.h>
  
  #include "../pci.h"
@@ -414,7 +414,7 @@ static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
                 acpi_handle rethandle;
                 acpi_status ret;
  
-               phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+               phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
  
                 if (acpi_bus_get_device(phandle, &pdevice)) {
                         dev_dbg(&slot->pci_bus->self->dev,
@@ -495,7 +495,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
  
         /* free the ACPI resources for the slot */
         if (SN_ACPI_BASE_SUPPORT() &&
-            PCI_CONTROLLER(slot->pci_bus)->acpi_handle) {
+            PCI_CONTROLLER(slot->pci_bus)->companion) {
                 unsigned long long adr;
                 struct acpi_device *device;
                 acpi_handle phandle;
@@ -504,7 +504,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
                 acpi_status ret;
  
                 /* Get the rootbus node pointer */
-               phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+               phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
  
                 acpi_scan_lock_acquire();
                 /*
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c

index 1b90579b233ae8c2d7db5efe00cc022c42fbb73a..50ce6809829836c38d4aedfa58aeb642b126d9df 100644 (file)
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -37,7 +37,7 @@ static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
         char *type;
         struct resource *res;
  
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
         if (!handle)
                 return -EINVAL;
  
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c

index dfd1f59de729c6293416d789fb6f665dc09bf701..f166126e28d17ad4b43846beb361d0a5c10fa78e 100644 (file)
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -173,14 +173,14 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
  
  static bool acpi_pci_power_manageable(struct pci_dev *dev)
  {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
  
         return handle ? acpi_bus_power_manageable(handle) : false;
  }
  
  static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
  {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
         static const u8 state_conv[] = {
                 [PCI_D0] = ACPI_STATE_D0,
                 [PCI_D1] = ACPI_STATE_D1,
@@ -217,7 +217,7 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
  
  static bool acpi_pci_can_wakeup(struct pci_dev *dev)
  {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
  
         return handle ? acpi_bus_can_wakeup(handle) : false;
  }
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c

index edaed6f4da6cebfbe1c0e2b1b40ac94a039af9dc..d51f45aa669e5ff9184daab1df72776e8165aa84 100644 (file)
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -263,7 +263,7 @@ device_has_dsm(struct device *dev)
         acpi_handle handle;
         struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
  
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
  
         if (!handle)
                 return FALSE;
@@ -295,7 +295,7 @@ acpilabel_show(struct device *dev, struct device_attribute *attr, char *buf)
         acpi_handle handle;
         int length;
  
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
  
         if (!handle)
                 return -1;
@@ -316,7 +316,7 @@ acpiindex_show(struct device *dev, struct device_attribute *attr, char *buf)
         acpi_handle handle;
         int length;
  
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
  
         if (!handle)
                 return -1;
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c

index 605a9be5512907238e520d080c55e4637afcbea0..b9429fbf1cd82a403c65bddd09acc6e11078b66a 100644 (file)
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -519,7 +519,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
  
         gmux_data->power_state = VGA_SWITCHEROO_ON;
  
-       gmux_data->dhandle = DEVICE_ACPI_HANDLE(&pnp->dev);
+       gmux_data->dhandle = ACPI_HANDLE(&pnp->dev);
         if (!gmux_data->dhandle) {
                 pr_err("Cannot find acpi handle for pnp device %s\n",
                        dev_name(&pnp->dev));
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c

index 747826d99059955f8941d592ef2f7734cd38fd08..14655a0f0431b35bd9a0988b669c510a162155e3 100644 (file)
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -89,7 +89,7 @@ static int pnpacpi_set_resources(struct pnp_dev *dev)
  
         pnp_dbg(&dev->dev, "set resources\n");
  
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
         if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                 dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                 return -ENODEV;
@@ -122,7 +122,7 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
  
         dev_dbg(&dev->dev, "disable resources\n");
  
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
         if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                 dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                 return 0;
@@ -144,7 +144,7 @@ static bool pnpacpi_can_wakeup(struct pnp_dev *dev)
         struct acpi_device *acpi_dev;
         acpi_handle handle;
  
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
         if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                 dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                 return false;
@@ -159,7 +159,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
         acpi_handle handle;
         int error = 0;
  
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
         if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                 dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                 return 0;
@@ -194,7 +194,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
  static int pnpacpi_resume(struct pnp_dev *dev)
  {
         struct acpi_device *acpi_dev;
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
         int error = 0;
  
         if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig

index 15f166a470a7f3fe5eb0e45dd2be80c2a384a76d..0077302221164e62df209626e6595a5a9505e4ee 100644 (file)
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -626,7 +626,7 @@ comment "Platform RTC drivers"
  
  config RTC_DRV_CMOS
         tristate "PC-style 'CMOS'"
-       depends on X86 || ALPHA || ARM || M32R || ATARI || PPC || MIPS || SPARC64
+       depends on X86 || ARM || M32R || ATARI || PPC || MIPS || SPARC64
         default y if X86
         help
           Say "yes" here to get direct support for the real time clock
@@ -643,6 +643,14 @@ config RTC_DRV_CMOS
           This driver can also be built as a module. If so, the module
           will be called rtc-cmos.
  
+config RTC_DRV_ALPHA
+       bool "Alpha PC-style CMOS"
+       depends on ALPHA
+       default y
+       help
+         Direct support for the real-time clock found on every Alpha
+         system, specifically MC146818 compatibles.  If in doubt, say Y.
+
  config RTC_DRV_VRTC
         tristate "Virtual RTC for Intel MID platforms"
         depends on X86_INTEL_MID
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c

index 8b2cd8a5a2ffe3d5928d2bc7f026f3fcceaa1dca..c0da95e95702123d403bf58ed4b894a104eeb2ec 100644 (file)
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -428,6 +428,14 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
         return 0;
  }
  
+static void at91_rtc_shutdown(struct platform_device *pdev)
+{
+       /* Disable all interrupts */
+       at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM |
+                                       AT91_RTC_SECEV | AT91_RTC_TIMEV |
+                                       AT91_RTC_CALEV);
+}
+
  #ifdef CONFIG_PM_SLEEP
  
  /* AT91RM9200 RTC Power management control */
@@ -466,6 +474,7 @@ static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume);
  
  static struct platform_driver at91_rtc_driver = {
         .remove         = __exit_p(at91_rtc_remove),
+       .shutdown       = at91_rtc_shutdown,
         .driver         = {
                 .name   = "at91_rtc",
                 .owner  = THIS_MODULE,
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c

index b9f0192758d6d929aab86d087c443adc46154e66..6d207afec8cbdb578c9e5428d6018dff1d93690b 100644 (file)
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -150,7 +150,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
                                 &dws->tx_sgl,
                                 1,
                                 DMA_MEM_TO_DEV,
-                               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+                               DMA_PREP_INTERRUPT);
         txdesc->callback = dw_spi_dma_done;
         txdesc->callback_param = dws;
  
@@ -173,7 +173,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
                                 &dws->rx_sgl,
                                 1,
                                 DMA_DEV_TO_MEM,
-                               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+                               DMA_PREP_INTERRUPT);
         rxdesc->callback = dw_spi_dma_done;
         rxdesc->callback_param = dws;
  
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c

index 8d85ddc4601173c14fd24ae6a5f7defe60b94102..18cc625d887f796aed4b082eb2366262aec8aaa8 100644 (file)
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -357,6 +357,19 @@ struct spi_device *spi_alloc_device(struct spi_master *master)
  }
  EXPORT_SYMBOL_GPL(spi_alloc_device);
  
+static void spi_dev_set_name(struct spi_device *spi)
+{
+       struct acpi_device *adev = ACPI_COMPANION(&spi->dev);
+
+       if (adev) {
+               dev_set_name(&spi->dev, "spi-%s", acpi_dev_name(adev));
+               return;
+       }
+
+       dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
+                    spi->chip_select);
+}
+
  /**
   * spi_add_device - Add spi_device allocated with spi_alloc_device
   * @spi: spi_device to register
@@ -383,9 +396,7 @@ int spi_add_device(struct spi_device *spi)
         }
  
         /* Set the bus ID string */
-       dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
-                       spi->chip_select);
-
+       spi_dev_set_name(spi);
  
         /* We need to make sure there's no other device with this
          * chipselect **BEFORE** we call setup(), else we'll trash
@@ -1144,7 +1155,7 @@ static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
                 return AE_NO_MEMORY;
         }
  
-       ACPI_HANDLE_SET(&spi->dev, handle);
+       ACPI_COMPANION_SET(&spi->dev, adev);
         spi->irq = -1;
  
         INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c

index 537750261aaa2fe5a5e2de8fdb3e6ee0adf8fe4d..7d8103cd3e2ec56eacbb5a5d3f3f332597e190ff 100644 (file)
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1433,7 +1433,7 @@ static void work_fn_rx(struct work_struct *work)
         desc = s->desc_rx[new];
  
         if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
-           DMA_SUCCESS) {
+           DMA_COMPLETE) {
                 /* Handle incomplete DMA receive */
                 struct dma_chan *chan = s->chan_rx;
                 struct shdma_desc *sh_desc = container_of(desc,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c

index 06cec635e703adc3f9859ae8ca53dbb124820cd4..a7c04e24ca484deb233db5dcfd995b73427c4cc0 100644 (file)
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5501,6 +5501,6 @@ acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev,
         if (!hub)
                 return NULL;
  
-       return DEVICE_ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
+       return ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
  }
  #endif
diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c

index 255c14464bf2ea7a30dffab6fa1770c7b030c0aa..4e243c37f17f50ab197582a3ee5d4662e933d368 100644 (file)
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -173,7 +173,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
                 }
  
                 /* root hub's parent is the usb hcd. */
-               parent_handle = DEVICE_ACPI_HANDLE(dev->parent);
+               parent_handle = ACPI_HANDLE(dev->parent);
                 *handle = acpi_get_child(parent_handle, udev->portnum);
                 if (!*handle)
                         return -ENODEV;
@@ -194,7 +194,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
  
                         raw_port_num = usb_hcd_find_raw_port_number(hcd,
                                 port_num);
-                       *handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+                       *handle = acpi_get_child(ACPI_HANDLE(&udev->dev),
                                 raw_port_num);
                         if (!*handle)
                                 return -ENODEV;
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c

index d15f6e80479f5388f52af9eba6effb9e6e769a11..188825122aae8a037bccc8255dbd43211ca6a941 100644 (file)
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -59,12 +59,12 @@ static int xen_add_device(struct device *dev)
                         add.flags = XEN_PCI_DEV_EXTFN;
  
  #ifdef CONFIG_ACPI
-               handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+               handle = ACPI_HANDLE(&pci_dev->dev);
                 if (!handle && pci_dev->bus->bridge)
-                       handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+                       handle = ACPI_HANDLE(pci_dev->bus->bridge);
  #ifdef CONFIG_PCI_IOV
                 if (!handle && pci_dev->is_virtfn)
-                       handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+                       handle = ACPI_HANDLE(physfn->bus->bridge);
  #endif
                 if (handle) {
                         acpi_status status;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c

index f039b104a98e9093cadfb62b040a604628a76b7a..b03dd23feda8104d70536b514ec996fa612c5b89 100644 (file)
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -42,23 +42,6 @@
  #include "v9fs_vfs.h"
  #include "fid.h"
  
-/**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry:  dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
-       p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-                dentry->d_name.name, dentry);
-
-       return 1;
-}
-
  /**
   * v9fs_cached_dentry_delete - called when dentry refcount equals 0
   * @dentry:  dentry in question
@@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
  };
  
  const struct dentry_operations v9fs_dentry_operations = {
-       .d_delete = v9fs_dentry_delete,
+       .d_delete = always_delete_dentry,
         .d_release = v9fs_dentry_release,
  };
diff --git a/fs/aio.c b/fs/aio.c

index 823efcbb6ccd1dc7936f77890183cee0ac93ed94..08159ed13649cacbec1825065e24b2b5b61be267 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -80,6 +80,8 @@ struct kioctx {
         struct percpu_ref       users;
         atomic_t                dead;
  
+       struct percpu_ref       reqs;
+
         unsigned long           user_id;
  
         struct __percpu kioctx_cpu *cpu;
@@ -107,7 +109,6 @@ struct kioctx {
         struct page             **ring_pages;
         long                    nr_pages;
  
-       struct rcu_head         rcu_head;
         struct work_struct      free_work;
  
         struct {
@@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx)
  
         put_aio_ring_file(ctx);
  
-       if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
+       if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
                 kfree(ctx->ring_pages);
+               ctx->ring_pages = NULL;
+       }
  }
  
  static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
         return cancel(kiocb);
  }
  
-static void free_ioctx_rcu(struct rcu_head *head)
+static void free_ioctx(struct work_struct *work)
  {
-       struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+       struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  
+       pr_debug("freeing %p\n", ctx);
+
+       aio_free_ring(ctx);
         free_percpu(ctx->cpu);
         kmem_cache_free(kioctx_cachep, ctx);
  }
  
+static void free_ioctx_reqs(struct percpu_ref *ref)
+{
+       struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+
+       INIT_WORK(&ctx->free_work, free_ioctx);
+       schedule_work(&ctx->free_work);
+}
+
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
   * now it's safe to cancel any that need to be.
   */
-static void free_ioctx(struct work_struct *work)
+static void free_ioctx_users(struct percpu_ref *ref)
  {
-       struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-       struct aio_ring *ring;
+       struct kioctx *ctx = container_of(ref, struct kioctx, users);
         struct kiocb *req;
-       unsigned cpu, avail;
-       DEFINE_WAIT(wait);
  
         spin_lock_irq(&ctx->ctx_lock);
  
@@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work)
  
         spin_unlock_irq(&ctx->ctx_lock);
  
-       for_each_possible_cpu(cpu) {
-               struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
-
-               atomic_add(kcpu->reqs_available, &ctx->reqs_available);
-               kcpu->reqs_available = 0;
-       }
-
-       while (1) {
-               prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-
-               ring = kmap_atomic(ctx->ring_pages[0]);
-               avail = (ring->head <= ring->tail)
-                        ? ring->tail - ring->head
-                        : ctx->nr_events - ring->head + ring->tail;
-
-               atomic_add(avail, &ctx->reqs_available);
-               ring->head = ring->tail;
-               kunmap_atomic(ring);
-
-               if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
-                       break;
-
-               schedule();
-       }
-       finish_wait(&ctx->wait, &wait);
-
-       WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
-
-       aio_free_ring(ctx);
-
-       pr_debug("freeing %p\n", ctx);
-
-       /*
-        * Here the call_rcu() is between the wait_event() for reqs_active to
-        * hit 0, and freeing the ioctx.
-        *
-        * aio_complete() decrements reqs_active, but it has to touch the ioctx
-        * after to issue a wakeup so we use rcu.
-        */
-       call_rcu(&ctx->rcu_head, free_ioctx_rcu);
-}
-
-static void free_ioctx_ref(struct percpu_ref *ref)
-{
-       struct kioctx *ctx = container_of(ref, struct kioctx, users);
-
-       INIT_WORK(&ctx->free_work, free_ioctx);
-       schedule_work(&ctx->free_work);
+       percpu_ref_kill(&ctx->reqs);
+       percpu_ref_put(&ctx->reqs);
  }
  
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
@@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
         }
  }
  
+static void aio_nr_sub(unsigned nr)
+{
+       spin_lock(&aio_nr_lock);
+       if (WARN_ON(aio_nr - nr > aio_nr))
+               aio_nr = 0;
+       else
+               aio_nr -= nr;
+       spin_unlock(&aio_nr_lock);
+}
+
  /* ioctx_alloc
   *     Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
   */
@@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
  
         ctx->max_reqs = nr_events;
  
-       if (percpu_ref_init(&ctx->users, free_ioctx_ref))
-               goto out_freectx;
+       if (percpu_ref_init(&ctx->users, free_ioctx_users))
+               goto err;
+
+       if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+               goto err;
  
         spin_lock_init(&ctx->ctx_lock);
         spin_lock_init(&ctx->completion_lock);
@@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
  
         ctx->cpu = alloc_percpu(struct kioctx_cpu);
         if (!ctx->cpu)
-               goto out_freeref;
+               goto err;
  
         if (aio_setup_ring(ctx) < 0)
-               goto out_freepcpu;
+               goto err;
  
         atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
         ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
@@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
         if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
             aio_nr + nr_events < aio_nr) {
                 spin_unlock(&aio_nr_lock);
-               goto out_cleanup;
+               err = -EAGAIN;
+               goto err;
         }
         aio_nr += ctx->max_reqs;
         spin_unlock(&aio_nr_lock);
@@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
  
         err = ioctx_add_table(ctx, mm);
         if (err)
-               goto out_cleanup_put;
+               goto err_cleanup;
  
         pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
                  ctx, ctx->user_id, mm, ctx->nr_events);
         return ctx;
  
-out_cleanup_put:
-       percpu_ref_put(&ctx->users);
-out_cleanup:
-       err = -EAGAIN;
-       aio_free_ring(ctx);
-out_freepcpu:
+err_cleanup:
+       aio_nr_sub(ctx->max_reqs);
+err:
         free_percpu(ctx->cpu);
-out_freeref:
+       free_percpu(ctx->reqs.pcpu_count);
         free_percpu(ctx->users.pcpu_count);
-out_freectx:
-       put_aio_ring_file(ctx);
         kmem_cache_free(kioctx_cachep, ctx);
         pr_debug("error allocating ioctx %d\n", err);
         return ERR_PTR(err);
@@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
                  * -EAGAIN with no ioctxs actually in use (as far as userspace
                  *  could tell).
                  */
-               spin_lock(&aio_nr_lock);
-               BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
-               aio_nr -= ctx->max_reqs;
-               spin_unlock(&aio_nr_lock);
+               aio_nr_sub(ctx->max_reqs);
  
                 if (ctx->mmap_size)
                         vm_munmap(ctx->mmap_base, ctx->mmap_size);
@@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
         if (unlikely(!req))
                 goto out_put;
  
+       percpu_ref_get(&ctx->reqs);
+
         req->ki_ctx = ctx;
         return req;
  out_put:
@@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
                 return;
         }
  
-       /*
-        * Take rcu_read_lock() in case the kioctx is being destroyed, as we
-        * need to issue a wakeup after incrementing reqs_available.
-        */
-       rcu_read_lock();
-
         if (iocb->ki_list.next) {
                 unsigned long flags;
  
@@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
         if (waitqueue_active(&ctx->wait))
                 wake_up(&ctx->wait);
  
-       rcu_read_unlock();
+       percpu_ref_put(&ctx->reqs);
  }
  EXPORT_SYMBOL(aio_complete);
  
@@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
         return 0;
  out_put_req:
         put_reqs_available(ctx, 1);
+       percpu_ref_put(&ctx->reqs);
         kiocb_free(req);
         return ret;
  }
diff --git a/fs/bio.c b/fs/bio.c

index 2bdb4e25ee77db6c2a5c135b3c726eae734153b9..33d79a4eb92d6e623aa90e2291af39b2b2689d83 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
  
  static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
                           *page, unsigned int len, unsigned int offset,
-                         unsigned short max_sectors)
+                         unsigned int max_sectors)
  {
         int retried_segments = 0;
         struct bio_vec *bvec;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig

index f9d5094e102943db81708451a43312f6ac2e9525..aa976eced2d2ea8dfa9c0e97ea84da7438626d62 100644 (file)
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,12 +9,17 @@ config BTRFS_FS
         select XOR_BLOCKS
  
         help
-         Btrfs is a new filesystem with extents, writable snapshotting,
-         support for multiple devices and many more features.
+         Btrfs is a general purpose copy-on-write filesystem with extents,
+         writable snapshotting, support for multiple devices and many more
+         features focused on fault tolerance, repair and easy administration.
  
-         Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
-         FINALIZED.  You should say N here unless you are interested in
-         testing Btrfs with non-critical data.
+         The filesystem disk format is no longer unstable, and it's not
+         expected to change unless there are strong reasons to do so. If there
+         is a format change, file systems with a unchanged format will
+         continue to be mountable and usable by newer kernels.
+
+         For more information, please see the web pages at
+         http://btrfs.wiki.kernel.org.
  
           To compile this file system support as a module, choose M here. The
           module will be called btrfs.
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c

index 8aec751fa464c126a7fd80e21b526692896d98c9..c1e0b0caf9cc975c2822cadf9aaaf0c1454dcf91 100644 (file)
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
         spin_lock_irq(&workers->lock);
         if (workers->stopping) {
                 spin_unlock_irq(&workers->lock);
+               ret = -EINVAL;
                 goto fail_kthread;
         }
         list_add_tail(&worker->worker_list, &workers->idle_list);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c

index e0aab44569741342e8a197fcbdfe4d7e6fbd6481..b50764bef1410c2750b17d943ae3597899b3ee9e 100644 (file)
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,6 +77,15 @@
   * the integrity of (super)-block write requests, do not
   * enable the config option BTRFS_FS_CHECK_INTEGRITY to
   * include and compile the integrity check tool.
+ *
+ * Expect millions of lines of information in the kernel log with an
+ * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
+ * kernel config to at least 26 (which is 64MB). Usually the value is
+ * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
+ * changed like this before LOG_BUF_SHIFT can be set to a high value:
+ * config LOG_BUF_SHIFT
+ *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ *       range 12 30
   */
  
  #include <linux/sched.h>
@@ -124,6 +133,7 @@
  #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                    0x00000400
  #define BTRFSIC_PRINT_MASK_NUM_COPIES                          0x00000800
  #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS               0x00001000
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE               0x00002000
  
  struct btrfsic_dev_state;
  struct btrfsic_state;
@@ -3015,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
             (rw & WRITE) && NULL != bio->bi_io_vec) {
                 unsigned int i;
                 u64 dev_bytenr;
+               u64 cur_bytenr;
                 int bio_is_patched;
                 char **mapped_datav;
  
@@ -3033,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                                        GFP_NOFS);
                 if (!mapped_datav)
                         goto leave;
+               cur_bytenr = dev_bytenr;
                 for (i = 0; i < bio->bi_vcnt; i++) {
                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3044,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                                 kfree(mapped_datav);
                                 goto leave;
                         }
-                       if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
-                            BTRFSIC_PRINT_MASK_VERBOSE) ==
-                           (dev_state->state->print_mask &
-                            (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
-                             BTRFSIC_PRINT_MASK_VERBOSE)))
+                       if (dev_state->state->print_mask &
+                           BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
                                 printk(KERN_INFO
-                                      "#%u: page=%p, len=%u, offset=%u\n",
-                                      i, bio->bi_io_vec[i].bv_page,
-                                      bio->bi_io_vec[i].bv_len,
+                                      "#%u: bytenr=%llu, len=%u, offset=%u\n",
+                                      i, cur_bytenr, bio->bi_io_vec[i].bv_len,
                                        bio->bi_io_vec[i].bv_offset);
+                       cur_bytenr += bio->bi_io_vec[i].bv_len;
                 }
                 btrfsic_process_written_block(dev_state, dev_bytenr,
                                               mapped_datav, bio->bi_vcnt,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index f9aeb2759a646e7d3c258ce4ea3498e7c786e1ad..54ab86127f7af49500f6a0bf2bf4b63cd6074c40 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                            struct btrfs_ordered_sum *sums);
  int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                        struct bio *bio, u64 file_start, int contig);
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *root, struct btrfs_path *path,
-                       u64 isize);
  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                              struct list_head *list, int search_commit);
  /* inode.c */
@@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
  int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
  void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                              int skip_pinned);
-int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
-                              u64 start, u64 end, int skip_pinned,
-                              int modified);
  extern const struct file_operations btrfs_file_operations;
  int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c

index 342f9fd411e3f5d79c15f675300ebcabad95738b..2cfc3dfff64f5708f71ec83af691b425b9f01b01 100644 (file)
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
         dev_replace->tgtdev = tgt_device;
  
         printk_in_rcu(KERN_INFO
-                     "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
+                     "btrfs: dev_replace from %s (devid %llu) to %s started\n",
                       src_device->missing ? "<missing disk>" :
                         rcu_str_deref(src_device->name),
                       src_device->devid,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 4c4ed0bb3da1bfc02a5dad41f66bdaf52c49aa71..8072cfa8a3b16c075e5c381f481e7cb874d9c531 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
  int btrfs_commit_super(struct btrfs_root *root)
  {
         struct btrfs_trans_handle *trans;
-       int ret;
  
         mutex_lock(&root->fs_info->cleaner_mutex);
         btrfs_run_delayed_iputs(root);
@@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root)
         trans = btrfs_join_transaction(root);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
-       ret = btrfs_commit_transaction(trans, root);
-       if (ret)
-               return ret;
-       /* run commit again to drop the original snapshot */
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
-       ret = btrfs_commit_transaction(trans, root);
-       if (ret)
-               return ret;
-       ret = btrfs_write_and_wait_transaction(NULL, root);
-       if (ret) {
-               btrfs_error(root->fs_info, ret,
-                           "Failed to sync btree inode to disk.");
-               return ret;
-       }
-
-       ret = write_ctree_super(NULL, root, 0);
-       return ret;
+       return btrfs_commit_transaction(trans, root);
  }
  
  int close_ctree(struct btrfs_root *root)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 856bc2b2192cb8c2c86f23676abf36cc3cb361fe..8e457fca0a0ba5c04afb84414ccd640821a640d1 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1980,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
         int ret;
  
+       ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
         BUG_ON(!mirror_num);
  
         /* we can't repair anything in raid56 yet */
@@ -2036,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
         unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
         int ret = 0;
  
+       if (root->fs_info->sb->s_flags & MS_RDONLY)
+               return -EROFS;
+
         for (i = 0; i < num_pages; i++) {
                 struct page *p = extent_buffer_page(eb, i);
                 ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2057,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page)
         u64 private;
         u64 private_failure;
         struct io_failure_record *failrec;
-       struct btrfs_fs_info *fs_info;
+       struct inode *inode = page->mapping->host;
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
         struct extent_state *state;
         int num_copies;
         int did_repair = 0;
         int ret;
-       struct inode *inode = page->mapping->host;
  
         private = 0;
         ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2085,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
                 did_repair = 1;
                 goto out;
         }
+       if (fs_info->sb->s_flags & MS_RDONLY)
+               goto out;
  
         spin_lock(&BTRFS_I(inode)->io_tree.lock);
         state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2094,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page)
  
         if (state && state->start <= failrec->start &&
             state->end >= failrec->start + failrec->len - 1) {
-               fs_info = BTRFS_I(inode)->root->fs_info;
                 num_copies = btrfs_num_copies(fs_info, failrec->logical,
                                               failrec->len);
                 if (num_copies > 1)  {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index da8d2f696ac5c461154046e985dcd8dfc5c22b29..f1a77449d032b1fbd481eb16cd232653e5bfbefa 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
                                                   old->extent_offset, fs_info,
                                                   path, record_one_backref,
                                                   old);
-               BUG_ON(ret < 0 && ret != -ENOENT);
+               if (ret < 0 && ret != -ENOENT)
+                       return false;
  
                 /* no backref to be processed for this extent */
                 if (!old->count) {
@@ -6186,8 +6187,7 @@ insert:
         write_unlock(&em_tree->lock);
  out:
  
-       if (em)
-               trace_btrfs_get_extent(root, em);
+       trace_btrfs_get_extent(root, em);
  
         if (path)
                 btrfs_free_path(path);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 25a8f3812f14e0410a95902d2985455313cb1e6f..69582d5b69d1f6064a77a409760a3ba1886b6d92 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
                         WARN_ON(nr < 0);
                 }
         }
+       list_splice_tail(&splice, &fs_info->ordered_roots);
         spin_unlock(&fs_info->ordered_root_lock);
  }
  
@@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                         btrfs_put_ordered_extent(ordered);
                         break;
                 }
-               if (ordered->file_offset + ordered->len < start) {
+               if (ordered->file_offset + ordered->len <= start) {
                         btrfs_put_ordered_extent(ordered);
                         break;
                 }
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index 2544805544f0baf137c33de162c46f14b5ddf002..561e2f16ba3e3ff3b0be72b12b4a052b86082d2a 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
                                 BTRFS_DEV_STAT_CORRUPTION_ERRS);
         }
  
-       if (sctx->readonly && !sctx->is_dev_replace)
-               goto did_not_correct_error;
+       if (sctx->readonly) {
+               ASSERT(!sctx->is_dev_replace);
+               goto out;
+       }
  
         if (!is_metadata && !have_csum) {
                 struct scrub_fixup_nodatasum *fixup_nodatasum;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 57c16b46afbd353b8fdcc22021ae8e00aca0d040..c6a872a8a46862948e93c343cdd0c7479caf3883 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work)
          * We've got freeze protection passed with the transaction.
          * Tell lockdep about it.
          */
-       if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+       if (ac->newtrans->type & __TRANS_FREEZABLE)
                 rwsem_acquire_read(
                      &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
                      0, 1, _THIS_IP_);
@@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
          * Tell lockdep we've released the freeze rwsem, since the
          * async commit thread will be the one to unlock it.
          */
-       if (trans->type < TRANS_JOIN_NOLOCK)
+       if (ac->newtrans->type & __TRANS_FREEZABLE)
                 rwsem_release(
                         &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
                         1, _THIS_IP_);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 744553c83fe2ad79a5b07715167e6d887373bc8f..9f7fc51ca334864b72336e127d786047dfb1f5de 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                         ret = btrfs_truncate_inode_items(trans, log,
                                                          inode, 0, 0);
                 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                             &BTRFS_I(inode)->runtime_flags)) {
+                                             &BTRFS_I(inode)->runtime_flags) ||
+                          inode_only == LOG_INODE_EXISTS) {
                         if (inode_only == LOG_INODE_ALL)
                                 fast_search = true;
                         max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3801,7 +3802,7 @@ log_extents:
                         err = ret;
                         goto out_unlock;
                 }
-       } else {
+       } else if (inode_only == LOG_INODE_ALL) {
                 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
                 struct extent_map *em, *n;
  
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 0db63709786291731aa3e465dd20c46830890311..92303f42baaa92d5d845edddff1f8600fc46518e 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
  {
         struct bio_vec *prev;
         struct request_queue *q = bdev_get_queue(bdev);
-       unsigned short max_sectors = queue_max_sectors(q);
+       unsigned int max_sectors = queue_max_sectors(q);
         struct bvec_merge_data bvm = {
                 .bi_bdev = bdev,
                 .bi_sector = sector,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c

index 277bd1be21fd70061fcd8d6694ed65ab4a34abd3..e081acbac2e756372340379fea7a69a84c9c3dd0 100644 (file)
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry,
         struct configfs_dirent *sd = dentry->d_fsdata;
  
         if (sd) {
-               BUG_ON(sd->s_dentry != dentry);
                 /* Coordinate with configfs_readdir */
                 spin_lock(&configfs_dirent_lock);
-               sd->s_dentry = NULL;
+               /* Coordinate with configfs_attach_attr where will increase
+                * sd->s_count and update sd->s_dentry to new allocated one.
+                * Only set sd->dentry to null when this dentry is the only
+                * sd owner.
+                * If not do so, configfs_d_iput may run just after
+                * configfs_attach_attr and set sd->s_dentry to null
+                * even it's still in use.
+                */
+               if (atomic_read(&sd->s_count) <= 2)
+                       sd->s_dentry = NULL;
+
                 spin_unlock(&configfs_dirent_lock);
                 configfs_put(sd);
         }
         iput(inode);
  }
  
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
-       return 1;
-}
-
  const struct dentry_operations configfs_dentry_ops = {
         .d_iput         = configfs_d_iput,
-       /* simple_delete_dentry() isn't exported */
-       .d_delete       = configfs_d_delete,
+       .d_delete       = always_delete_dentry,
  };
  
  #ifdef CONFIG_LOCKDEP
@@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
         struct configfs_attribute * attr = sd->s_element;
         int error;
  
+       spin_lock(&configfs_dirent_lock);
         dentry->d_fsdata = configfs_get(sd);
         sd->s_dentry = dentry;
+       spin_unlock(&configfs_dirent_lock);
+
         error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
                                 configfs_init_file);
         if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c

index 62406b6959b63389bd503cf6db4ea528266b79a8..bc3fbcd32558fd61823b126997ace2785d7bac21 100644 (file)
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
         while (nr) {
                 if (dump_interrupted())
                         return 0;
-               n = vfs_write(file, addr, nr, &pos);
+               n = __kernel_write(file, addr, nr, &pos);
                 if (n <= 0)
                         return 0;
                 file->f_pos = pos;
@@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align)
  {
         unsigned mod = cprm->written & (align - 1);
         if (align & (align - 1))
-               return -EINVAL;
-       return mod ? dump_skip(cprm, align - mod) : 0;
+               return 0;
+       return mod ? dump_skip(cprm, align - mod) : 1;
  }
  EXPORT_SYMBOL(dump_align);
diff --git a/fs/dcache.c b/fs/dcache.c

index 0a38ef8d7f0088579089d101c99a0e12193f22f5..4bdb300b16e2e940bab8eeb07417b7f87914815f 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock);
  
  static struct kmem_cache *dentry_cache __read_mostly;
  
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-       if (!(*seq & 1))        /* Even */
-               *seq = read_seqbegin(lock);
-       else                    /* Odd */
-               read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-       return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-       if (seq & 1)
-               read_sequnlock_excl(lock);
-}
-
  /*
   * This is the single most critical data structure when it comes
   * to the dcache: the hashtable for lookups. Somebody should try
@@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq)
   * This hash-function tries to avoid losing too many bits of hash
   * information, yet avoid using a prime hash-size or similar.
   */
-#define D_HASHBITS     d_hash_shift
-#define D_HASHMASK     d_hash_mask
  
  static unsigned int d_hash_mask __read_mostly;
  static unsigned int d_hash_shift __read_mostly;
@@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
                                         unsigned int hash)
  {
         hash += (unsigned long) parent / L1_CACHE_BYTES;
-       hash = hash + (hash >> D_HASHBITS);
-       return dentry_hashtable + (hash & D_HASHMASK);
+       hash = hash + (hash >> d_hash_shift);
+       return dentry_hashtable + (hash & d_hash_mask);
  }
  
  /* Statistics gathering. */
@@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
  {
         list_del(&dentry->d_u.d_child);
         /*
-        * Inform try_to_ascend() that we are no longer attached to the
+        * Inform d_walk() that we are no longer attached to the
          * dentry tree
          */
         dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb)
  }
  EXPORT_SYMBOL(shrink_dcache_sb);
  
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
-{
-       struct dentry *new = old->d_parent;
-
-       rcu_read_lock();
-       spin_unlock(&old->d_lock);
-       spin_lock(&new->d_lock);
-
-       /*
-        * might go back up the wrong parent if we have had a rename
-        * or deletion
-        */
-       if (new != old->d_parent ||
-                (old->d_flags & DCACHE_DENTRY_KILLED) ||
-                need_seqretry(&rename_lock, seq)) {
-               spin_unlock(&new->d_lock);
-               new = NULL;
-       }
-       rcu_read_unlock();
-       return new;
-}
-
  /**
   * enum d_walk_ret - action to talke during tree walk
   * @D_WALK_CONTINUE:   contrinue walk
@@ -1185,9 +1126,24 @@ resume:
          */
         if (this_parent != parent) {
                 struct dentry *child = this_parent;
-               this_parent = try_to_ascend(this_parent, seq);
-               if (!this_parent)
+               this_parent = child->d_parent;
+
+               rcu_read_lock();
+               spin_unlock(&child->d_lock);
+               spin_lock(&this_parent->d_lock);
+
+               /*
+                * might go back up the wrong parent if we have had a rename
+                * or deletion
+                */
+               if (this_parent != child->d_parent ||
+                        (child->d_flags & DCACHE_DENTRY_KILLED) ||
+                        need_seqretry(&rename_lock, seq)) {
+                       spin_unlock(&this_parent->d_lock);
+                       rcu_read_unlock();
                         goto rename_retry;
+               }
+               rcu_read_unlock();
                 next = child->d_u.d_child.next;
                 goto resume;
         }
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c

index a8766b880c0783b3adca66bac0e5f0ba76840c70..becc725a195308edfcd518572f1f8784052b1bb6 100644 (file)
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
         return 0;
  }
  
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
-       return 1;
-}
-
  static struct dentry_operations efivarfs_d_ops = {
         .d_compare = efivarfs_d_compare,
         .d_hash = efivarfs_d_hash,
-       .d_delete = efivarfs_delete_dentry,
+       .d_delete = always_delete_dentry,
  };
  
  static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/exec.c b/fs/exec.c

index 977319fd77f39de88ef66979d1de95f7d846f9a6..7ea097f6b341f06982f3ea3b068de5755b1605e0 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm)
         if (retval)
                 return retval;
  
-       retval = audit_bprm(bprm);
-       if (retval)
-               return retval;
-
         retval = -ENOENT;
   retry:
         read_lock(&binfmt_lock);
@@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm)
  
         ret = search_binary_handler(bprm);
         if (ret >= 0) {
+               audit_bprm(bprm);
                 trace_sched_process_exec(current, old_pid, bprm);
                 ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
                 current->did_exec = 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c

index e66a8009aff16d66b1179bba0ffc3a266ff923f6..c8420f7e4db604da3663da61c5ae4556b6439783 100644 (file)
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                         gi->nhash = 0;
                 }
         /* Skip entries for other sb and dead entries */
-       } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref));
+       } while (gi->sdp != gi->gl->gl_sbd ||
+                __lockref_is_dead(&gi->gl->gl_lockref));
  
         return 0;
  }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c

index 1615df16cf4eb9ed5c5c4f20ee46c56bc00d3143..7119504159f17ba8fdde8317abee9eaf38583c71 100644 (file)
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
         if (d != NULL)
                 dentry = d;
         if (dentry->d_inode) {
-               if (!(*opened & FILE_OPENED))
+               if (!(*opened & FILE_OPENED)) {
+                       if (d == NULL)
+                               dget(dentry);
                         return finish_no_open(file, dentry);
+               }
                 dput(d);
                 return 0;
         }
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c

index c8423d6de6c3ee54341d5ea5c4eb7e20ca7255de..2a6ba06bee6fca0ffada9c155ca243466fcf1f62 100644 (file)
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl)
  static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
                              char *lvb_bits)
  {
-       uint32_t gen;
+       __le32 gen;
         memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
-       memcpy(&gen, lvb_bits, sizeof(uint32_t));
+       memcpy(&gen, lvb_bits, sizeof(__le32));
         *lvb_gen = le32_to_cpu(gen);
  }
  
  static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
                               char *lvb_bits)
  {
-       uint32_t gen;
+       __le32 gen;
         memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
         gen = cpu_to_le32(lvb_gen);
-       memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+       memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
  }
  
  static int all_jid_bits_clear(char *lvb)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c

index 453b50eaddec42f8e8461d9f96965e7d40ba746f..98236d0df3cae7ce7666a10dc7fc907590b873b0 100644 (file)
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
         struct buffer_head *bh;
         struct page *page;
         void *kaddr, *ptr;
-       struct gfs2_quota q, *qp;
+       struct gfs2_quota q;
         int err, nbytes;
         u64 size;
  
@@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                 return err;
  
         err = -EIO;
-       qp = &q;
-       qp->qu_value = be64_to_cpu(qp->qu_value);
-       qp->qu_value += change;
-       qp->qu_value = cpu_to_be64(qp->qu_value);
-       qd->qd_qb.qb_value = qp->qu_value;
+       be64_add_cpu(&q.qu_value, change);
+       qd->qd_qb.qb_value = q.qu_value;
         if (fdq) {
                 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-                       qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
-                       qd->qd_qb.qb_warn = qp->qu_warn;
+                       q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_warn = q.qu_warn;
                 }
                 if (fdq->d_fieldmask & FS_DQ_BHARD) {
-                       qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
-                       qd->qd_qb.qb_limit = qp->qu_limit;
+                       q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_limit = q.qu_limit;
                 }
                 if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
-                       qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
-                       qd->qd_qb.qb_value = qp->qu_value;
+                       q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_value = q.qu_value;
                 }
         }
  
         /* Write the quota into the quota file on disk */
-       ptr = qp;
+       ptr = &q;
         nbytes = sizeof(struct gfs2_quota);
  get_a_page:
         page = find_or_create_page(mapping, index, GFP_NOFS);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c

index 4d83abdd5635273b3e0af9589eec83226be249ff..c8d6161bd682bd6cbd05247e2f0efc8274afe0b9 100644 (file)
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
                 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                 rgd->rd_free_clone = rgd->rd_free;
         }
-       if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+       if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
                 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
                 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
                                      rgd->rd_bits[0].bi_bh->b_data);
@@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
         if (rgd->rd_flags & GFS2_RDF_UPTODATE)
                 return 0;
  
-       if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+       if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
                 return gfs2_rgrp_bh_get(rgd);
  
         rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c

index 25437280a2071b8970efe6e394edb97a4433acd8..db23ce1bd9031028390eb33016a05783950bc379 100644 (file)
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
  
  #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
  
-static int hostfs_d_delete(const struct dentry *dentry)
-{
-       return 1;
-}
-
-static const struct dentry_operations hostfs_dentry_ops = {
-       .d_delete               = hostfs_d_delete,
-};
-
  /* Changed in hostfs_args before the kernel starts running */
  static char *root_ino = "";
  static int append = 0;
@@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
         sb->s_blocksize_bits = 10;
         sb->s_magic = HOSTFS_SUPER_MAGIC;
         sb->s_op = &hostfs_sbops;
-       sb->s_d_op = &hostfs_dentry_ops;
+       sb->s_d_op = &simple_dentry_operations;
         sb->s_maxbytes = MAX_LFS_FILESIZE;
  
         /* NULL is printed as <NULL> by sprintf: avoid that. */
diff --git a/fs/libfs.c b/fs/libfs.c

index 5de06947ba5ebf2e98caa8177085f7c191df4602..a1844244246f8e8e8da60da61e2aaabd2750e58e 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs);
   * Retaining negative dentries for an in-memory filesystem just wastes
   * memory and lookup time: arrange for them to be deleted immediately.
   */
-static int simple_delete_dentry(const struct dentry *dentry)
+int always_delete_dentry(const struct dentry *dentry)
  {
         return 1;
  }
+EXPORT_SYMBOL(always_delete_dentry);
+
+const struct dentry_operations simple_dentry_operations = {
+       .d_delete = always_delete_dentry,
+};
+EXPORT_SYMBOL(simple_dentry_operations);
  
  /*
   * Lookup the data. This is trivial - if the dentry didn't already
@@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry)
   */
  struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  {
-       static const struct dentry_operations simple_dentry_operations = {
-               .d_delete = simple_delete_dentry,
-       };
-
         if (dentry->d_name.len > NAME_MAX)
                 return ERR_PTR(-ENAMETOOLONG);
         if (!dentry->d_sb->s_d_op)
diff --git a/fs/namei.c b/fs/namei.c

index e029a4cbff7db7b23af15628ca4d8c2cac5da491..8f77a8cea289350b9d0e427b284cc01a2df4691d 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
   */
  static inline int may_create(struct inode *dir, struct dentry *child)
  {
+       audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
         if (child->d_inode)
                 return -EEXIST;
         if (IS_DEADDIR(dir))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c

index 088de1355e930c05fd9dd9c03bb3710b7531fed7..ee7237f99f54cd413dba6375dbc344084d0ece56 100644 (file)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -141,8 +141,8 @@ xdr_error:                                  \
  
  static void next_decode_page(struct nfsd4_compoundargs *argp)
  {
-       argp->pagelist++;
         argp->p = page_address(argp->pagelist[0]);
+       argp->pagelist++;
         if (argp->pagelen < PAGE_SIZE) {
                 argp->end = argp->p + (argp->pagelen>>2);
                 argp->pagelen = 0;
@@ -1229,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
                 len -= pages * PAGE_SIZE;
  
                 argp->p = (__be32 *)page_address(argp->pagelist[0]);
+               argp->pagelist++;
                 argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
         }
         argp->p += XDR_QUADLEN(len);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 94b5f5d2bfedd94be68e7879a8cf8b40d0e3cde3..7eea63cada1d4a3ea5f9dd95401f83714689fcb0 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp)
  }
  
  /*
- * Set various file attributes.
- * N.B. After this call fhp needs an fh_put
+ * Go over the attributes and take care of the small differences between
+ * NFS semantics and what Linux expects.
   */
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
-            int check_guard, time_t guardtime)
+static void
+nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
  {
-       struct dentry   *dentry;
-       struct inode    *inode;
-       int             accmode = NFSD_MAY_SATTR;
-       umode_t         ftype = 0;
-       __be32          err;
-       int             host_err;
-       int             size_change = 0;
-
-       if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
-               accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
-       if (iap->ia_valid & ATTR_SIZE)
-               ftype = S_IFREG;
-
-       /* Get inode */
-       err = fh_verify(rqstp, fhp, ftype, accmode);
-       if (err)
-               goto out;
-
-       dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
-
-       /* Ignore any mode updates on symlinks */
-       if (S_ISLNK(inode->i_mode))
-               iap->ia_valid &= ~ATTR_MODE;
-
-       if (!iap->ia_valid)
-               goto out;
-
         /*
          * NFSv2 does not differentiate between "set-[ac]time-to-now"
          * which only requires access, and "set-[ac]time-to-X" which
@@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
          * convert to "set to now" instead of "set to explicit time"
          *
          * We only call inode_change_ok as the last test as technically
-        * it is not an interface that we should be using.  It is only
-        * valid if the filesystem does not define it's own i_op->setattr.
+        * it is not an interface that we should be using.
          */
  #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
  #define        MAX_TOUCH_TIME_ERROR (30*60)
@@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                         iap->ia_valid &= ~BOTH_TIME_SET;
                 }
         }
-           
-       /*
-        * The size case is special.
-        * It changes the file as well as the attributes.
-        */
-       if (iap->ia_valid & ATTR_SIZE) {
-               if (iap->ia_size < inode->i_size) {
-                       err = nfsd_permission(rqstp, fhp->fh_export, dentry,
-                                       NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
-                       if (err)
-                               goto out;
-               }
-
-               host_err = get_write_access(inode);
-               if (host_err)
-                       goto out_nfserr;
-
-               size_change = 1;
-               host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
-               if (host_err) {
-                       put_write_access(inode);
-                       goto out_nfserr;
-               }
-       }
  
         /* sanitize the mode change */
         if (iap->ia_valid & ATTR_MODE) {
@@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                         iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
                 }
         }
+}
  
-       /* Change the attributes. */
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               struct iattr *iap)
+{
+       struct inode *inode = fhp->fh_dentry->d_inode;
+       int host_err;
  
-       iap->ia_valid |= ATTR_CTIME;
+       if (iap->ia_size < inode->i_size) {
+               __be32 err;
  
-       err = nfserr_notsync;
-       if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
-               host_err = nfsd_break_lease(inode);
-               if (host_err)
-                       goto out_nfserr;
-               fh_lock(fhp);
+               err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+                               NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+               if (err)
+                       return err;
+       }
  
-               host_err = notify_change(dentry, iap, NULL);
-               err = nfserrno(host_err);
-               fh_unlock(fhp);
+       host_err = get_write_access(inode);
+       if (host_err)
+               goto out_nfserrno;
+
+       host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+       if (host_err)
+               goto out_put_write_access;
+       return 0;
+
+out_put_write_access:
+       put_write_access(inode);
+out_nfserrno:
+       return nfserrno(host_err);
+}
+
+/*
+ * Set various file attributes.  After this call fhp needs an fh_put.
+ */
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+            int check_guard, time_t guardtime)
+{
+       struct dentry   *dentry;
+       struct inode    *inode;
+       int             accmode = NFSD_MAY_SATTR;
+       umode_t         ftype = 0;
+       __be32          err;
+       int             host_err;
+       int             size_change = 0;
+
+       if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+               accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+       if (iap->ia_valid & ATTR_SIZE)
+               ftype = S_IFREG;
+
+       /* Get inode */
+       err = fh_verify(rqstp, fhp, ftype, accmode);
+       if (err)
+               goto out;
+
+       dentry = fhp->fh_dentry;
+       inode = dentry->d_inode;
+
+       /* Ignore any mode updates on symlinks */
+       if (S_ISLNK(inode->i_mode))
+               iap->ia_valid &= ~ATTR_MODE;
+
+       if (!iap->ia_valid)
+               goto out;
+
+       nfsd_sanitize_attrs(inode, iap);
+
+       /*
+        * The size case is special, it changes the file in addition to the
+        * attributes.
+        */
+       if (iap->ia_valid & ATTR_SIZE) {
+               err = nfsd_get_write_access(rqstp, fhp, iap);
+               if (err)
+                       goto out;
+               size_change = 1;
         }
+
+       iap->ia_valid |= ATTR_CTIME;
+
+       if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+               err = nfserr_notsync;
+               goto out_put_write_access;
+       }
+
+       host_err = nfsd_break_lease(inode);
+       if (host_err)
+               goto out_put_write_access_nfserror;
+
+       fh_lock(fhp);
+       host_err = notify_change(dentry, iap, NULL);
+       fh_unlock(fhp);
+
+out_put_write_access_nfserror:
+       err = nfserrno(host_err);
+out_put_write_access:
         if (size_change)
                 put_write_access(inode);
         if (!err)
                 commit_metadata(fhp);
  out:
         return err;
-
-out_nfserr:
-       err = nfserrno(host_err);
-       goto out;
  }
  
  #if defined(CONFIG_NFSD_V2_ACL) || \
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 1485e38daaa38100278f56e710a8233338693fd5..03c8d747be48be2a14e93fed94355d42c5d7bd52 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
                 goto out_free_page;
  
         }
-       kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
-       if (!uid_valid(kloginuid)) {
-               length = -EINVAL;
-               goto out_free_page;
+
+       /* is userspace tring to explicitly UNSET the loginuid? */
+       if (loginuid == AUDIT_UID_UNSET) {
+               kloginuid = INVALID_UID;
+       } else {
+               kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+               if (!uid_valid(kloginuid)) {
+                       length = -EINVAL;
+                       goto out_free_page;
+               }
         }
  
         length = audit_set_loginuid(kloginuid);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c

index 737e15615b0490c40d002a315217033f5463d5b3..cca93b6fb9a9e841cc480308968771125fb7cd87 100644 (file)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -174,22 +174,6 @@ static const struct inode_operations proc_link_inode_operations = {
         .follow_link    = proc_follow_link,
  };
  
-/*
- * As some entries in /proc are volatile, we want to 
- * get rid of unused dentries.  This could be made 
- * smarter: we could keep a "volatile" flag in the 
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
-       return 1;
-}
-
-static const struct dentry_operations proc_dentry_operations =
-{
-       .d_delete       = proc_delete_dentry,
-};
-
  /*
   * Don't create negative dentries here, return -ENOENT by hand
   * instead.
@@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
                         inode = proc_get_inode(dir->i_sb, de);
                         if (!inode)
                                 return ERR_PTR(-ENOMEM);
-                       d_set_d_op(dentry, &proc_dentry_operations);
+                       d_set_d_op(dentry, &simple_dentry_operations);
                         d_add(dentry, inode);
                         return NULL;
                 }
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c

index 49a7fff2e83a9906f39a685ddad0e98211dd155f..9ae46b87470dd9fe9fe6962c689abb4a7500e697 100644 (file)
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = {
         .setattr        = proc_setattr,
  };
  
-static int ns_delete_dentry(const struct dentry *dentry)
-{
-       /* Don't cache namespace inodes when not in use */
-       return 1;
-}
-
  static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
  {
         struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
  
  const struct dentry_operations ns_dentry_operations =
  {
-       .d_delete       = ns_delete_dentry,
+       .d_delete       = always_delete_dentry,
         .d_dname        = ns_dname,
  };
  
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig

index c70111ebefd44aaacf7248e10a5e80fecc06582a..b6fa8657dcbc51dcb904a7e5b018d474c94d9d83 100644 (file)
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,6 +25,78 @@ config SQUASHFS
  
           If unsure, say N.
  
+choice
+       prompt "File decompression options"
+       depends on SQUASHFS
+       help
+         Squashfs now supports two options for decompressing file
+         data.  Traditionally Squashfs has decompressed into an
+         intermediate buffer and then memcopied it into the page cache.
+         Squashfs now supports the ability to decompress directly into
+         the page cache.
+
+         If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+       bool "Decompress file data into an intermediate buffer"
+       help
+         Decompress file data into an intermediate buffer and then
+         memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+       bool "Decompress files directly into the page cache"
+       help
+         Directly decompress file data into the page cache.
+         Doing so can significantly improve performance because
+         it eliminates a memcpy and it also removes the lock contention
+         on the single buffer.
+
+endchoice
+
+choice
+       prompt "Decompressor parallelisation options"
+       depends on SQUASHFS
+       help
+         Squashfs now supports three parallelisation options for
+         decompression.  Each one exhibits various trade-offs between
+         decompression performance and CPU and memory usage.
+
+         If in doubt, select "Single threaded compression"
+
+config SQUASHFS_DECOMP_SINGLE
+       bool "Single threaded compression"
+       help
+         Traditionally Squashfs has used single-threaded decompression.
+         Only one block (data or metadata) can be decompressed at any
+         one time.  This limits CPU and memory usage to a minimum.
+
+config SQUASHFS_DECOMP_MULTI
+       bool "Use multiple decompressors for parallel I/O"
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         If you have a parallel I/O workload and your system has enough memory,
+         using this option may improve overall I/O performance.
+
+         This decompressor implementation uses up to two parallel
+         decompressors per core.  It dynamically allocates decompressors
+         on a demand basis.
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+       bool "Use percpu multiple decompressors for parallel I/O"
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         This decompressor implementation uses a maximum of one
+         decompressor per core.  It uses percpu variables to ensure
+         decompression is load-balanced across the cores.
+
+endchoice
+
  config SQUASHFS_XATTR
         bool "Squashfs XATTR support"
         depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile

index 110b0476f3b48a21e016dd8f4337476ddf37c3ce..4132520b4ff2cfbec2e49b5c0ebe67f8661dd748 100644 (file)
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,6 +5,11 @@
  obj-$(CONFIG_SQUASHFS) += squashfs.o
  squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
  squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
  squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
  squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
  squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c

index 41d108ecc9be305211a635bfdf7932ac52d91097..0cea9b9236d07c81d0cc46c0b22aeba334cc645d 100644 (file)
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -36,6 +36,7 @@
  #include "squashfs_fs_sb.h"
  #include "squashfs.h"
  #include "decompressor.h"
+#include "page_actor.h"
  
  /*
   * Read the metadata block length, this is stored in the first two
@@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb,
   * generated a larger block - this does occasionally happen with compression
   * algorithms).
   */
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
-                       int length, u64 *next_index, int srclength, int pages)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+               u64 *next_index, struct squashfs_page_actor *output)
  {
         struct squashfs_sb_info *msblk = sb->s_fs_info;
         struct buffer_head **bh;
         int offset = index & ((1 << msblk->devblksize_log2) - 1);
         u64 cur_index = index >> msblk->devblksize_log2;
-       int bytes, compressed, b = 0, k = 0, page = 0, avail;
+       int bytes, compressed, b = 0, k = 0, avail, i;
  
-       bh = kcalloc(((srclength + msblk->devblksize - 1)
+       bh = kcalloc(((output->length + msblk->devblksize - 1)
                 >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
         if (bh == NULL)
                 return -ENOMEM;
@@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                         *next_index = index + length;
  
                 TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-                       index, compressed ? "" : "un", length, srclength);
+                       index, compressed ? "" : "un", length, output->length);
  
-               if (length < 0 || length > srclength ||
+               if (length < 0 || length > output->length ||
                                 (index + length) > msblk->bytes_used)
                         goto read_failure;
  
@@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                 TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
                                 compressed ? "" : "un", length);
  
-               if (length < 0 || length > srclength ||
+               if (length < 0 || length > output->length ||
                                         (index + length) > msblk->bytes_used)
                         goto block_release;
  
@@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                 ll_rw_block(READ, b - 1, bh + 1);
         }
  
+       for (i = 0; i < b; i++) {
+               wait_on_buffer(bh[i]);
+               if (!buffer_uptodate(bh[i]))
+                       goto block_release;
+       }
+
         if (compressed) {
-               length = squashfs_decompress(msblk, buffer, bh, b, offset,
-                        length, srclength, pages);
+               length = squashfs_decompress(msblk, bh, b, offset, length,
+                       output);
                 if (length < 0)
                         goto read_failure;
         } else {
@@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                  * Block is uncompressed.
                  */
                 int in, pg_offset = 0;
+               void *data = squashfs_first_page(output);
  
                 for (bytes = length; k < b; k++) {
                         in = min(bytes, msblk->devblksize - offset);
                         bytes -= in;
-                       wait_on_buffer(bh[k]);
-                       if (!buffer_uptodate(bh[k]))
-                               goto block_release;
                         while (in) {
                                 if (pg_offset == PAGE_CACHE_SIZE) {
-                                       page++;
+                                       data = squashfs_next_page(output);
                                         pg_offset = 0;
                                 }
                                 avail = min_t(int, in, PAGE_CACHE_SIZE -
                                                 pg_offset);
-                               memcpy(buffer[page] + pg_offset,
-                                               bh[k]->b_data + offset, avail);
+                               memcpy(data + pg_offset, bh[k]->b_data + offset,
+                                               avail);
                                 in -= avail;
                                 pg_offset += avail;
                                 offset += avail;
@@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                         offset = 0;
                         put_bh(bh[k]);
                 }
+               squashfs_finish_page(output);
         }
  
         kfree(bh);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c

index af0b738025929b1c18ac884734131ea5e843e40c..1cb70a0b216844a136bf3b47ee6534d22496bb1c 100644 (file)
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -56,6 +56,7 @@
  #include "squashfs_fs.h"
  #include "squashfs_fs_sb.h"
  #include "squashfs.h"
+#include "page_actor.h"
  
  /*
   * Look-up block in cache, and increment usage count.  If not in cache, read
@@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
                         entry->error = 0;
                         spin_unlock(&cache->lock);
  
-                       entry->length = squashfs_read_data(sb, entry->data,
-                               block, length, &entry->next_index,
-                               cache->block_size, cache->pages);
+                       entry->length = squashfs_read_data(sb, block, length,
+                               &entry->next_index, entry->actor);
  
                         spin_lock(&cache->lock);
  
@@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
                                 kfree(cache->entry[i].data[j]);
                         kfree(cache->entry[i].data);
                 }
+               kfree(cache->entry[i].actor);
         }
  
         kfree(cache->entry);
@@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
                                 goto cleanup;
                         }
                 }
+
+               entry->actor = squashfs_page_actor_init(entry->data,
+                                               cache->pages, 0);
+               if (entry->actor == NULL) {
+                       ERROR("Failed to allocate %s cache entry\n", name);
+                       goto cleanup;
+               }
         }
  
         return cache;
@@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
         int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
         int i, res;
         void *table, *buffer, **data;
+       struct squashfs_page_actor *actor;
  
         table = buffer = kmalloc(length, GFP_KERNEL);
         if (table == NULL)
@@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
                 goto failed;
         }
  
+       actor = squashfs_page_actor_init(data, pages, length);
+       if (actor == NULL) {
+               res = -ENOMEM;
+               goto failed2;
+       }
+
         for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
                 data[i] = buffer;
  
-       res = squashfs_read_data(sb, data, block, length |
-               SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+       res = squashfs_read_data(sb, block, length |
+               SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
  
         kfree(data);
+       kfree(actor);
  
         if (res < 0)
                 goto failed;
  
         return table;
  
+failed2:
+       kfree(data);
  failed:
         kfree(table);
         return ERR_PTR(res);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c

index 3f6271d86abc48ba3132ccfdd5668e118799e249..ac22fe73b0adc241449c35e58faf535d510bb3ec 100644 (file)
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -30,6 +30,7 @@
  #include "squashfs_fs_sb.h"
  #include "decompressor.h"
  #include "squashfs.h"
+#include "page_actor.h"
  
  /*
   * This file (and decompressor.h) implements a decompressor framework for
@@ -37,29 +38,29 @@
   */
  
  static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
-       NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+       NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
  };
  
  #ifndef CONFIG_SQUASHFS_LZO
  static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
-       NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+       NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
  };
  #endif
  
  #ifndef CONFIG_SQUASHFS_XZ
  static const struct squashfs_decompressor squashfs_xz_comp_ops = {
-       NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+       NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
  };
  #endif
  
  #ifndef CONFIG_SQUASHFS_ZLIB
  static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
-       NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+       NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
  };
  #endif
  
  static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
-       NULL, NULL, NULL, 0, "unknown", 0
+       NULL, NULL, NULL, NULL, 0, "unknown", 0
  };
  
  static const struct squashfs_decompressor *decompressor[] = {
@@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
  }
  
  
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+static void *get_comp_opts(struct super_block *sb, unsigned short flags)
  {
         struct squashfs_sb_info *msblk = sb->s_fs_info;
-       void *strm, *buffer = NULL;
+       void *buffer = NULL, *comp_opts;
+       struct squashfs_page_actor *actor = NULL;
         int length = 0;
  
         /*
@@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
          */
         if (SQUASHFS_COMP_OPTS(flags)) {
                 buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
-               if (buffer == NULL)
-                       return ERR_PTR(-ENOMEM);
+               if (buffer == NULL) {
+                       comp_opts = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
+
+               actor = squashfs_page_actor_init(&buffer, 1, 0);
+               if (actor == NULL) {
+                       comp_opts = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
  
-               length = squashfs_read_data(sb, &buffer,
-                       sizeof(struct squashfs_super_block), 0, NULL,
-                       PAGE_CACHE_SIZE, 1);
+               length = squashfs_read_data(sb,
+                       sizeof(struct squashfs_super_block), 0, NULL, actor);
  
                 if (length < 0) {
-                       strm = ERR_PTR(length);
-                       goto finished;
+                       comp_opts = ERR_PTR(length);
+                       goto out;
                 }
         }
  
-       strm = msblk->decompressor->init(msblk, buffer, length);
+       comp_opts = squashfs_comp_opts(msblk, buffer, length);
  
-finished:
+out:
+       kfree(actor);
         kfree(buffer);
+       return comp_opts;
+}
+
+
+void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
+{
+       struct squashfs_sb_info *msblk = sb->s_fs_info;
+       void *stream, *comp_opts = get_comp_opts(sb, flags);
+
+       if (IS_ERR(comp_opts))
+               return comp_opts;
+
+       stream = squashfs_decompressor_create(msblk, comp_opts);
+       if (IS_ERR(stream))
+               kfree(comp_opts);
  
-       return strm;
+       return stream;
  }
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h

index 330073e29029950238da75d90fbd86a9dc8c881a..af0985321808e6e92b771b7d3ea63442e5fc8581 100644 (file)
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,28 +24,22 @@
   */
  
  struct squashfs_decompressor {
-       void    *(*init)(struct squashfs_sb_info *, void *, int);
+       void    *(*init)(struct squashfs_sb_info *, void *);
+       void    *(*comp_opts)(struct squashfs_sb_info *, void *, int);
         void    (*free)(void *);
-       int     (*decompress)(struct squashfs_sb_info *, void **,
-               struct buffer_head **, int, int, int, int, int);
+       int     (*decompress)(struct squashfs_sb_info *, void *,
+               struct buffer_head **, int, int, int,
+               struct squashfs_page_actor *);
         int     id;
         char    *name;
         int     supported;
  };
  
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
-       void *s)
+static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
+                                                       void *buff, int length)
  {
-       if (msblk->decompressor)
-               msblk->decompressor->free(s);
-}
-
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
-       void **buffer, struct buffer_head **bh, int b, int offset, int length,
-       int srclength, int pages)
-{
-       return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
-               length, srclength, pages);
+       return msblk->decompressor->comp_opts ?
+               msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
  }
  
  #ifdef CONFIG_SQUASHFS_XZ
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c

new file mode 100644 (file)

index 0000000..d6008a6
--- /dev/null
+++ b/fs/squashfs/decompressor_multi.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (c) 2013
+ *  Minchan Kim <minchan@kernel.org>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+
+
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR       (num_online_cpus() * 2)
+
+
+int squashfs_max_decompressors(void)
+{
+       return MAX_DECOMPRESSOR;
+}
+
+
+struct squashfs_stream {
+       void                    *comp_opts;
+       struct list_head        strm_list;
+       struct mutex            mutex;
+       int                     avail_decomp;
+       wait_queue_head_t       wait;
+};
+
+
+struct decomp_stream {
+       void *stream;
+       struct list_head list;
+};
+
+
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+                               struct squashfs_stream *stream)
+{
+       mutex_lock(&stream->mutex);
+       list_add(&decomp_strm->list, &stream->strm_list);
+       mutex_unlock(&stream->mutex);
+       wake_up(&stream->wait);
+}
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       struct decomp_stream *decomp_strm = NULL;
+       int err = -ENOMEM;
+
+       stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+       if (!stream)
+               goto out;
+
+       stream->comp_opts = comp_opts;
+       mutex_init(&stream->mutex);
+       INIT_LIST_HEAD(&stream->strm_list);
+       init_waitqueue_head(&stream->wait);
+
+       /*
+        * We should have a decompressor at least as default
+        * so if we fail to allocate new decompressor dynamically,
+        * we could always fall back to default decompressor and
+        * file system works.
+        */
+       decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+       if (!decomp_strm)
+               goto out;
+
+       decomp_strm->stream = msblk->decompressor->init(msblk,
+                                               stream->comp_opts);
+       if (IS_ERR(decomp_strm->stream)) {
+               err = PTR_ERR(decomp_strm->stream);
+               goto out;
+       }
+
+       list_add(&decomp_strm->list, &stream->strm_list);
+       stream->avail_decomp = 1;
+       return stream;
+
+out:
+       kfree(decomp_strm);
+       kfree(stream);
+       return ERR_PTR(err);
+}
+
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream *stream = msblk->stream;
+       if (stream) {
+               struct decomp_stream *decomp_strm;
+
+               while (!list_empty(&stream->strm_list)) {
+                       decomp_strm = list_entry(stream->strm_list.prev,
+                                               struct decomp_stream, list);
+                       list_del(&decomp_strm->list);
+                       msblk->decompressor->free(decomp_strm->stream);
+                       kfree(decomp_strm);
+                       stream->avail_decomp--;
+               }
+               WARN_ON(stream->avail_decomp);
+               kfree(stream->comp_opts);
+               kfree(stream);
+       }
+}
+
+
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+                                       struct squashfs_stream *stream)
+{
+       struct decomp_stream *decomp_strm;
+
+       while (1) {
+               mutex_lock(&stream->mutex);
+
+               /* There is available decomp_stream */
+               if (!list_empty(&stream->strm_list)) {
+                       decomp_strm = list_entry(stream->strm_list.prev,
+                               struct decomp_stream, list);
+                       list_del(&decomp_strm->list);
+                       mutex_unlock(&stream->mutex);
+                       break;
+               }
+
+               /*
+                * If there is no available decomp and already full,
+                * let's wait for releasing decomp from other users.
+                */
+               if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+                       goto wait;
+
+               /* Let's allocate new decomp */
+               decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+               if (!decomp_strm)
+                       goto wait;
+
+               decomp_strm->stream = msblk->decompressor->init(msblk,
+                                               stream->comp_opts);
+               if (IS_ERR(decomp_strm->stream)) {
+                       kfree(decomp_strm);
+                       goto wait;
+               }
+
+               stream->avail_decomp++;
+               WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+
+               mutex_unlock(&stream->mutex);
+               break;
+wait:
+               /*
+                * If system memory is tough, let's for other's
+                * releasing instead of hurting VM because it could
+                * make page cache thrashing.
+                */
+               mutex_unlock(&stream->mutex);
+               wait_event(stream->wait,
+                       !list_empty(&stream->strm_list));
+       }
+
+       return decomp_strm;
+}
+
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+       int b, int offset, int length, struct squashfs_page_actor *output)
+{
+       int res;
+       struct squashfs_stream *stream = msblk->stream;
+       struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+       res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+               bh, b, offset, length, output);
+       put_decomp_stream(decomp_stream, stream);
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+       return res;
+}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c

new file mode 100644 (file)

index 0000000..23a9c28
--- /dev/null
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+
+struct squashfs_stream {
+       void            *stream;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       struct squashfs_stream __percpu *percpu;
+       int err, cpu;
+
+       percpu = alloc_percpu(struct squashfs_stream);
+       if (percpu == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       for_each_possible_cpu(cpu) {
+               stream = per_cpu_ptr(percpu, cpu);
+               stream->stream = msblk->decompressor->init(msblk, comp_opts);
+               if (IS_ERR(stream->stream)) {
+                       err = PTR_ERR(stream->stream);
+                       goto out;
+               }
+       }
+
+       kfree(comp_opts);
+       return (__force void *) percpu;
+
+out:
+       for_each_possible_cpu(cpu) {
+               stream = per_cpu_ptr(percpu, cpu);
+               if (!IS_ERR_OR_NULL(stream->stream))
+                       msblk->decompressor->free(stream->stream);
+       }
+       free_percpu(percpu);
+       return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream __percpu *percpu =
+                       (struct squashfs_stream __percpu *) msblk->stream;
+       struct squashfs_stream *stream;
+       int cpu;
+
+       if (msblk->stream) {
+               for_each_possible_cpu(cpu) {
+                       stream = per_cpu_ptr(percpu, cpu);
+                       msblk->decompressor->free(stream->stream);
+               }
+               free_percpu(percpu);
+       }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+       int b, int offset, int length, struct squashfs_page_actor *output)
+{
+       struct squashfs_stream __percpu *percpu =
+                       (struct squashfs_stream __percpu *) msblk->stream;
+       struct squashfs_stream *stream = get_cpu_ptr(percpu);
+       int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+               offset, length, output);
+       put_cpu_ptr(stream);
+
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+
+       return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+       return num_possible_cpus();
+}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c

new file mode 100644 (file)

index 0000000..a6c7592
--- /dev/null
+++ b/fs/squashfs/decompressor_single.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements single-threaded decompression in the
+ * decompressor framework
+ */
+
+struct squashfs_stream {
+       void            *stream;
+       struct mutex    mutex;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       int err = -ENOMEM;
+
+       stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+       if (stream == NULL)
+               goto out;
+
+       stream->stream = msblk->decompressor->init(msblk, comp_opts);
+       if (IS_ERR(stream->stream)) {
+               err = PTR_ERR(stream->stream);
+               goto out;
+       }
+
+       kfree(comp_opts);
+       mutex_init(&stream->mutex);
+       return stream;
+
+out:
+       kfree(stream);
+       return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream *stream = msblk->stream;
+
+       if (stream) {
+               msblk->decompressor->free(stream->stream);
+               kfree(stream);
+       }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+       int b, int offset, int length, struct squashfs_page_actor *output)
+{
+       int res;
+       struct squashfs_stream *stream = msblk->stream;
+
+       mutex_lock(&stream->mutex);
+       res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+               offset, length, output);
+       mutex_unlock(&stream->mutex);
+
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+
+       return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+       return 1;
+}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c

index 8ca62c28fe1249fd40d7b8e01612b1dedc5ca704..e5c9689062ba81fff5db08f50c2d39d53c4508d9 100644 (file)
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
         return le32_to_cpu(size);
  }
  
-
-static int squashfs_readpage(struct file *file, struct page *page)
+/* Copy data into page cache  */
+void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
+       int bytes, int offset)
  {
         struct inode *inode = page->mapping->host;
         struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-       int bytes, i, offset = 0, sparse = 0;
-       struct squashfs_cache_entry *buffer = NULL;
         void *pageaddr;
-
-       int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
-       int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-       int start_index = page->index & ~mask;
-       int end_index = start_index | mask;
-       int file_end = i_size_read(inode) >> msblk->block_log;
-
-       TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-                               page->index, squashfs_i(inode)->start);
-
-       if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-                                       PAGE_CACHE_SHIFT))
-               goto out;
-
-       if (index < file_end || squashfs_i(inode)->fragment_block ==
-                                       SQUASHFS_INVALID_BLK) {
-               /*
-                * Reading a datablock from disk.  Need to read block list
-                * to get location and block size.
-                */
-               u64 block = 0;
-               int bsize = read_blocklist(inode, index, &block);
-               if (bsize < 0)
-                       goto error_out;
-
-               if (bsize == 0) { /* hole */
-                       bytes = index == file_end ?
-                               (i_size_read(inode) & (msblk->block_size - 1)) :
-                                msblk->block_size;
-                       sparse = 1;
-               } else {
-                       /*
-                        * Read and decompress datablock.
-                        */
-                       buffer = squashfs_get_datablock(inode->i_sb,
-                                                               block, bsize);
-                       if (buffer->error) {
-                               ERROR("Unable to read page, block %llx, size %x"
-                                       "\n", block, bsize);
-                               squashfs_cache_put(buffer);
-                               goto error_out;
-                       }
-                       bytes = buffer->length;
-               }
-       } else {
-               /*
-                * Datablock is stored inside a fragment (tail-end packed
-                * block).
-                */
-               buffer = squashfs_get_fragment(inode->i_sb,
-                               squashfs_i(inode)->fragment_block,
-                               squashfs_i(inode)->fragment_size);
-
-               if (buffer->error) {
-                       ERROR("Unable to read page, block %llx, size %x\n",
-                               squashfs_i(inode)->fragment_block,
-                               squashfs_i(inode)->fragment_size);
-                       squashfs_cache_put(buffer);
-                       goto error_out;
-               }
-               bytes = i_size_read(inode) & (msblk->block_size - 1);
-               offset = squashfs_i(inode)->fragment_offset;
-       }
+       int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+       int start_index = page->index & ~mask, end_index = start_index | mask;
  
         /*
          * Loop copying datablock into pages.  As the datablock likely covers
@@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
         for (i = start_index; i <= end_index && bytes > 0; i++,
                         bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
                 struct page *push_page;
-               int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+               int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
  
                 TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
  
@@ -475,11 +413,75 @@ skip_page:
                 if (i != page->index)
                         page_cache_release(push_page);
         }
+}
+
+/* Read datablock stored packed inside a fragment (tail-end packed block) */
+static int squashfs_readpage_fragment(struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+       struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+               squashfs_i(inode)->fragment_block,
+               squashfs_i(inode)->fragment_size);
+       int res = buffer->error;
+
+       if (res)
+               ERROR("Unable to read page, block %llx, size %x\n",
+                       squashfs_i(inode)->fragment_block,
+                       squashfs_i(inode)->fragment_size);
+       else
+               squashfs_copy_cache(page, buffer, i_size_read(inode) &
+                       (msblk->block_size - 1),
+                       squashfs_i(inode)->fragment_offset);
+
+       squashfs_cache_put(buffer);
+       return res;
+}
  
-       if (!sparse)
-               squashfs_cache_put(buffer);
+static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+{
+       struct inode *inode = page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+       int bytes = index == file_end ?
+                       (i_size_read(inode) & (msblk->block_size - 1)) :
+                        msblk->block_size;
  
+       squashfs_copy_cache(page, NULL, bytes, 0);
         return 0;
+}
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+       int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+       int file_end = i_size_read(inode) >> msblk->block_log;
+       int res;
+       void *pageaddr;
+
+       TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+                               page->index, squashfs_i(inode)->start);
+
+       if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+                                       PAGE_CACHE_SHIFT))
+               goto out;
+
+       if (index < file_end || squashfs_i(inode)->fragment_block ==
+                                       SQUASHFS_INVALID_BLK) {
+               u64 block = 0;
+               int bsize = read_blocklist(inode, index, &block);
+               if (bsize < 0)
+                       goto error_out;
+
+               if (bsize == 0)
+                       res = squashfs_readpage_sparse(page, index, file_end);
+               else
+                       res = squashfs_readpage_block(page, block, bsize);
+       } else
+               res = squashfs_readpage_fragment(page);
+
+       if (!res)
+               return 0;
  
  error_out:
         SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c

new file mode 100644 (file)

index 0000000..f2310d2
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+       struct inode *i = page->mapping->host;
+       struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+               block, bsize);
+       int res = buffer->error;
+
+       if (res)
+               ERROR("Unable to read page, block %llx, size %x\n", block,
+                       bsize);
+       else
+               squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+       squashfs_cache_put(buffer);
+       return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c

new file mode 100644 (file)

index 0000000..2943b2b
--- /dev/null
+++ b/fs/squashfs/file_direct.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "page_actor.h"
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+       int pages, struct page **page);
+
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+
+{
+       struct inode *inode = target_page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+       int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+       int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+       int start_index = target_page->index & ~mask;
+       int end_index = start_index | mask;
+       int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+       struct page **page;
+       struct squashfs_page_actor *actor;
+       void *pageaddr;
+
+       if (end_index > file_end)
+               end_index = file_end;
+
+       pages = end_index - start_index + 1;
+
+       page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+       if (page == NULL)
+               return res;
+
+       /*
+        * Create a "page actor" which will kmap and kunmap the
+        * page cache pages appropriately within the decompressor
+        */
+       actor = squashfs_page_actor_init_special(page, pages, 0);
+       if (actor == NULL)
+               goto out;
+
+       /* Try to grab all the pages covered by the Squashfs block */
+       for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+               page[i] = (n == target_page->index) ? target_page :
+                       grab_cache_page_nowait(target_page->mapping, n);
+
+               if (page[i] == NULL) {
+                       missing_pages++;
+                       continue;
+               }
+
+               if (PageUptodate(page[i])) {
+                       unlock_page(page[i]);
+                       page_cache_release(page[i]);
+                       page[i] = NULL;
+                       missing_pages++;
+               }
+       }
+
+       if (missing_pages) {
+               /*
+                * Couldn't get one or more pages, this page has either
+                * been VM reclaimed, but others are still in the page cache
+                * and uptodate, or we're racing with another thread in
+                * squashfs_readpage also trying to grab them.  Fall back to
+                * using an intermediate buffer.
+                */
+               res = squashfs_read_cache(target_page, block, bsize, pages,
+                                                               page);
+               goto out;
+       }
+
+       /* Decompress directly into the page cache buffers */
+       res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+       if (res < 0)
+               goto mark_errored;
+
+       /* Last page may have trailing bytes not filled */
+       bytes = res % PAGE_CACHE_SIZE;
+       if (bytes) {
+               pageaddr = kmap_atomic(page[pages - 1]);
+               memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+               kunmap_atomic(pageaddr);
+       }
+
+       /* Mark pages as uptodate, unlock and release */
+       for (i = 0; i < pages; i++) {
+               flush_dcache_page(page[i]);
+               SetPageUptodate(page[i]);
+               unlock_page(page[i]);
+               if (page[i] != target_page)
+                       page_cache_release(page[i]);
+       }
+
+       kfree(actor);
+       kfree(page);
+
+       return 0;
+
+mark_errored:
+       /* Decompression failed, mark pages as errored.  Target_page is
+        * dealt with by the caller
+        */
+       for (i = 0; i < pages; i++) {
+               if (page[i] == target_page)
+                       continue;
+               flush_dcache_page(page[i]);
+               SetPageError(page[i]);
+               unlock_page(page[i]);
+               page_cache_release(page[i]);
+       }
+
+out:
+       kfree(actor);
+       kfree(page);
+       return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+       int pages, struct page **page)
+{
+       struct inode *i = target_page->mapping->host;
+       struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+                                                block, bsize);
+       int bytes = buffer->length, res = buffer->error, n, offset = 0;
+       void *pageaddr;
+
+       if (res) {
+               ERROR("Unable to read page, block %llx, size %x\n", block,
+                       bsize);
+               goto out;
+       }
+
+       for (n = 0; n < pages && bytes > 0; n++,
+                       bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+               int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+               if (page[n] == NULL)
+                       continue;
+
+               pageaddr = kmap_atomic(page[n]);
+               squashfs_copy_data(pageaddr, buffer, offset, avail);
+               memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+               kunmap_atomic(pageaddr);
+               flush_dcache_page(page[n]);
+               SetPageUptodate(page[n]);
+               unlock_page(page[n]);
+               if (page[n] != target_page)
+                       page_cache_release(page[n]);
+       }
+
+out:
+       squashfs_cache_put(buffer);
+       return res;
+}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c

index 00f4dfc5f0884cb6d77920130883521b04635784..244b9fbfff7b299195585320328cfc7540e6887e 100644 (file)
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -31,13 +31,14 @@
  #include "squashfs_fs_sb.h"
  #include "squashfs.h"
  #include "decompressor.h"
+#include "page_actor.h"
  
  struct squashfs_lzo {
         void    *input;
         void    *output;
  };
  
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
  {
         int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
  
@@ -74,22 +75,16 @@ static void lzo_free(void *strm)
  }
  
  
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-       struct buffer_head **bh, int b, int offset, int length, int srclength,
-       int pages)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
+       struct buffer_head **bh, int b, int offset, int length,
+       struct squashfs_page_actor *output)
  {
-       struct squashfs_lzo *stream = msblk->stream;
-       void *buff = stream->input;
+       struct squashfs_lzo *stream = strm;
+       void *buff = stream->input, *data;
         int avail, i, bytes = length, res;
-       size_t out_len = srclength;
-
-       mutex_lock(&msblk->read_data_mutex);
+       size_t out_len = output->length;
  
         for (i = 0; i < b; i++) {
-               wait_on_buffer(bh[i]);
-               if (!buffer_uptodate(bh[i]))
-                       goto block_release;
-
                 avail = min(bytes, msblk->devblksize - offset);
                 memcpy(buff, bh[i]->b_data + offset, avail);
                 buff += avail;
@@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                 goto failed;
  
         res = bytes = (int)out_len;
-       for (i = 0, buff = stream->output; bytes && i < pages; i++) {
-               avail = min_t(int, bytes, PAGE_CACHE_SIZE);
-               memcpy(buffer[i], buff, avail);
-               buff += avail;
-               bytes -= avail;
+       data = squashfs_first_page(output);
+       buff = stream->output;
+       while (data) {
+               if (bytes <= PAGE_CACHE_SIZE) {
+                       memcpy(data, buff, bytes);
+                       break;
+               } else {
+                       memcpy(data, buff, PAGE_CACHE_SIZE);
+                       buff += PAGE_CACHE_SIZE;
+                       bytes -= PAGE_CACHE_SIZE;
+                       data = squashfs_next_page(output);
+               }
         }
+       squashfs_finish_page(output);
  
-       mutex_unlock(&msblk->read_data_mutex);
         return res;
  
-block_release:
-       for (; i < b; i++)
-               put_bh(bh[i]);
-
  failed:
-       mutex_unlock(&msblk->read_data_mutex);
-
-       ERROR("lzo decompression failed, data probably corrupt\n");
         return -EIO;
  }
  
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c

new file mode 100644 (file)

index 0000000..5a1c11f
--- /dev/null
+++ b/fs/squashfs/page_actor.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "page_actor.h"
+
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+       actor->next_page = 1;
+       return actor->buffer[0];
+}
+
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+       if (actor->next_page == actor->pages)
+               return NULL;
+
+       return actor->buffer[actor->next_page++];
+}
+
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+       /* empty */
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+       int pages, int length)
+{
+       struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+       if (actor == NULL)
+               return NULL;
+
+       actor->length = length ? : pages * PAGE_CACHE_SIZE;
+       actor->buffer = buffer;
+       actor->pages = pages;
+       actor->next_page = 0;
+       actor->squashfs_first_page = cache_first_page;
+       actor->squashfs_next_page = cache_next_page;
+       actor->squashfs_finish_page = cache_finish_page;
+       return actor;
+}
+
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+       actor->next_page = 1;
+       return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+       if (actor->pageaddr)
+               kunmap_atomic(actor->pageaddr);
+
+       return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+               kmap_atomic(actor->page[actor->next_page++]);
+}
+
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+       if (actor->pageaddr)
+               kunmap_atomic(actor->pageaddr);
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+       int pages, int length)
+{
+       struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+       if (actor == NULL)
+               return NULL;
+
+       actor->length = length ? : pages * PAGE_CACHE_SIZE;
+       actor->page = page;
+       actor->pages = pages;
+       actor->next_page = 0;
+       actor->pageaddr = NULL;
+       actor->squashfs_first_page = direct_first_page;
+       actor->squashfs_next_page = direct_next_page;
+       actor->squashfs_finish_page = direct_finish_page;
+       return actor;
+}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h

new file mode 100644 (file)

index 0000000..26dd820
--- /dev/null
+++ b/fs/squashfs/page_actor.h
@@ -0,0 +1,81 @@
+#ifndef PAGE_ACTOR_H
+#define PAGE_ACTOR_H
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
+struct squashfs_page_actor {
+       void    **page;
+       int     pages;
+       int     length;
+       int     next_page;
+};
+
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+       int pages, int length)
+{
+       struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+       if (actor == NULL)
+               return NULL;
+
+       actor->length = length ? : pages * PAGE_CACHE_SIZE;
+       actor->page = page;
+       actor->pages = pages;
+       actor->next_page = 0;
+       return actor;
+}
+
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+       actor->next_page = 1;
+       return actor->page[0];
+}
+
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+       return actor->next_page == actor->pages ? NULL :
+               actor->page[actor->next_page++];
+}
+
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+       /* empty */
+}
+#else
+struct squashfs_page_actor {
+       union {
+               void            **buffer;
+               struct page     **page;
+       };
+       void    *pageaddr;
+       void    *(*squashfs_first_page)(struct squashfs_page_actor *);
+       void    *(*squashfs_next_page)(struct squashfs_page_actor *);
+       void    (*squashfs_finish_page)(struct squashfs_page_actor *);
+       int     pages;
+       int     length;
+       int     next_page;
+};
+
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+                                                        **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+       return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+       return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+       actor->squashfs_finish_page(actor);
+}
+#endif
+#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h

index d1266516ed08494e93f4dc2ef00fe66be9216cb5..9e1bb79f7e6f09ea793d11d5e627483bddc048ea 100644 (file)
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,8 @@
  #define WARNING(s, args...)    pr_warning("SQUASHFS: "s, ## args)
  
  /* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
-                               int, int);
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
+                               struct squashfs_page_actor *);
  
  /* cache.c */
  extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
  
  /* decompressor.c */
  extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
+
+/* decompressor_xxx.c */
+extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
+extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+       int, int, int, struct squashfs_page_actor *);
+extern int squashfs_max_decompressors(void);
  
  /* export.c */
  extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
  extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
                                 u64, u64, unsigned int);
  
+/* file.c */
+void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
+                               int);
+
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
+
  /* id.c */
  extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
  extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h

index 52934a22f29665a00f082b24aa57ce915e875913..1da565cb50c3d0f1e652671dd7a81577b598ca2f 100644 (file)
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -50,6 +50,7 @@ struct squashfs_cache_entry {
         wait_queue_head_t       wait_queue;
         struct squashfs_cache   *cache;
         void                    **data;
+       struct squashfs_page_actor      *actor;
  };
  
  struct squashfs_sb_info {
@@ -63,10 +64,9 @@ struct squashfs_sb_info {
         __le64                                  *id_table;
         __le64                                  *fragment_index;
         __le64                                  *xattr_id_table;
-       struct mutex                            read_data_mutex;
         struct mutex                            meta_index_mutex;
         struct meta_index                       *meta_index;
-       void                                    *stream;
+       struct squashfs_stream                  *stream;
         __le64                                  *inode_lookup_table;
         u64                                     inode_table;
         u64                                     directory_table;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c

index 60553a9053ca82b7264862e8168b61df0ce7de89..202df6312d4e8517a63bb9ff10b385738e54ab6e 100644 (file)
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
         msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
         msblk->devblksize_log2 = ffz(~msblk->devblksize);
  
-       mutex_init(&msblk->read_data_mutex);
         mutex_init(&msblk->meta_index_mutex);
  
         /*
@@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
                 goto failed_mount;
  
         /* Allocate read_page block */
-       msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+       msblk->read_page = squashfs_cache_init("data",
+               squashfs_max_decompressors(), msblk->block_size);
         if (msblk->read_page == NULL) {
                 ERROR("Failed to allocate read_page block\n");
                 goto failed_mount;
         }
  
-       msblk->stream = squashfs_decompressor_init(sb, flags);
+       msblk->stream = squashfs_decompressor_setup(sb, flags);
         if (IS_ERR(msblk->stream)) {
                 err = PTR_ERR(msblk->stream);
                 msblk->stream = NULL;
@@ -336,7 +336,7 @@ failed_mount:
         squashfs_cache_delete(msblk->block_cache);
         squashfs_cache_delete(msblk->fragment_cache);
         squashfs_cache_delete(msblk->read_page);
-       squashfs_decompressor_free(msblk, msblk->stream);
+       squashfs_decompressor_destroy(msblk);
         kfree(msblk->inode_lookup_table);
         kfree(msblk->fragment_index);
         kfree(msblk->id_table);
@@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb)
                 squashfs_cache_delete(sbi->block_cache);
                 squashfs_cache_delete(sbi->fragment_cache);
                 squashfs_cache_delete(sbi->read_page);
-               squashfs_decompressor_free(sbi, sbi->stream);
+               squashfs_decompressor_destroy(sbi);
                 kfree(sbi->id_table);
                 kfree(sbi->fragment_index);
                 kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c

index 1760b7d108f66a55614102c43713ef315592374e..c609624e4b8a8cf88152310337c2533be7bec5cb 100644 (file)
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -32,44 +32,70 @@
  #include "squashfs_fs_sb.h"
  #include "squashfs.h"
  #include "decompressor.h"
+#include "page_actor.h"
  
  struct squashfs_xz {
         struct xz_dec *state;
         struct xz_buf buf;
  };
  
-struct comp_opts {
+struct disk_comp_opts {
         __le32 dictionary_size;
         __le32 flags;
  };
  
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
-       int len)
+struct comp_opts {
+       int dict_size;
+};
+
+static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
+       void *buff, int len)
  {
-       struct comp_opts *comp_opts = buff;
-       struct squashfs_xz *stream;
-       int dict_size = msblk->block_size;
-       int err, n;
+       struct disk_comp_opts *comp_opts = buff;
+       struct comp_opts *opts;
+       int err = 0, n;
+
+       opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+       if (opts == NULL) {
+               err = -ENOMEM;
+               goto out2;
+       }
  
         if (comp_opts) {
                 /* check compressor options are the expected length */
                 if (len < sizeof(*comp_opts)) {
                         err = -EIO;
-                       goto failed;
+                       goto out;
                 }
  
-               dict_size = le32_to_cpu(comp_opts->dictionary_size);
+               opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
  
                 /* the dictionary size should be 2^n or 2^n+2^(n+1) */
-               n = ffs(dict_size) - 1;
-               if (dict_size != (1 << n) && dict_size != (1 << n) +
+               n = ffs(opts->dict_size) - 1;
+               if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
                                                 (1 << (n + 1))) {
                         err = -EIO;
-                       goto failed;
+                       goto out;
                 }
-       }
+       } else
+               /* use defaults */
+               opts->dict_size = max_t(int, msblk->block_size,
+                                                       SQUASHFS_METADATA_SIZE);
+
+       return opts;
+
+out:
+       kfree(opts);
+out2:
+       return ERR_PTR(err);
+}
+
  
-       dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
+{
+       struct comp_opts *comp_opts = buff;
+       struct squashfs_xz *stream;
+       int err;
  
         stream = kmalloc(sizeof(*stream), GFP_KERNEL);
         if (stream == NULL) {
@@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
                 goto failed;
         }
  
-       stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+       stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
         if (stream->state == NULL) {
                 kfree(stream);
                 err = -ENOMEM;
@@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm)
  }
  
  
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-       struct buffer_head **bh, int b, int offset, int length, int srclength,
-       int pages)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
+       struct buffer_head **bh, int b, int offset, int length,
+       struct squashfs_page_actor *output)
  {
         enum xz_ret xz_err;
-       int avail, total = 0, k = 0, page = 0;
-       struct squashfs_xz *stream = msblk->stream;
-
-       mutex_lock(&msblk->read_data_mutex);
+       int avail, total = 0, k = 0;
+       struct squashfs_xz *stream = strm;
  
         xz_dec_reset(stream->state);
         stream->buf.in_pos = 0;
         stream->buf.in_size = 0;
         stream->buf.out_pos = 0;
         stream->buf.out_size = PAGE_CACHE_SIZE;
-       stream->buf.out = buffer[page++];
+       stream->buf.out = squashfs_first_page(output);
  
         do {
                 if (stream->buf.in_pos == stream->buf.in_size && k < b) {
                         avail = min(length, msblk->devblksize - offset);
                         length -= avail;
-                       wait_on_buffer(bh[k]);
-                       if (!buffer_uptodate(bh[k]))
-                               goto release_mutex;
-
                         stream->buf.in = bh[k]->b_data + offset;
                         stream->buf.in_size = avail;
                         stream->buf.in_pos = 0;
                         offset = 0;
                 }
  
-               if (stream->buf.out_pos == stream->buf.out_size
-                                                       && page < pages) {
-                       stream->buf.out = buffer[page++];
-                       stream->buf.out_pos = 0;
-                       total += PAGE_CACHE_SIZE;
+               if (stream->buf.out_pos == stream->buf.out_size) {
+                       stream->buf.out = squashfs_next_page(output);
+                       if (stream->buf.out != NULL) {
+                               stream->buf.out_pos = 0;
+                               total += PAGE_CACHE_SIZE;
+                       }
                 }
  
                 xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                         put_bh(bh[k++]);
         } while (xz_err == XZ_OK);
  
-       if (xz_err != XZ_STREAM_END) {
-               ERROR("xz_dec_run error, data probably corrupt\n");
-               goto release_mutex;
-       }
-
-       if (k < b) {
-               ERROR("xz_uncompress error, input remaining\n");
-               goto release_mutex;
-       }
+       squashfs_finish_page(output);
  
-       total += stream->buf.out_pos;
-       mutex_unlock(&msblk->read_data_mutex);
-       return total;
+       if (xz_err != XZ_STREAM_END || k < b)
+               goto out;
  
-release_mutex:
-       mutex_unlock(&msblk->read_data_mutex);
+       return total + stream->buf.out_pos;
  
+out:
         for (; k < b; k++)
                 put_bh(bh[k]);
  
@@ -172,6 +184,7 @@ release_mutex:
  
  const struct squashfs_decompressor squashfs_xz_comp_ops = {
         .init = squashfs_xz_init,
+       .comp_opts = squashfs_xz_comp_opts,
         .free = squashfs_xz_free,
         .decompress = squashfs_xz_uncompress,
         .id = XZ_COMPRESSION,
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c

index 55d918fd2d862605beb85dae54c3a177b776381d..8727caba6882209ad62102c0148a09b64421626a 100644 (file)
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -32,8 +32,9 @@
  #include "squashfs_fs_sb.h"
  #include "squashfs.h"
  #include "decompressor.h"
+#include "page_actor.h"
  
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
  {
         z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
         if (stream == NULL)
@@ -61,44 +62,37 @@ static void zlib_free(void *strm)
  }
  
  
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-       struct buffer_head **bh, int b, int offset, int length, int srclength,
-       int pages)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
+       struct buffer_head **bh, int b, int offset, int length,
+       struct squashfs_page_actor *output)
  {
-       int zlib_err, zlib_init = 0;
-       int k = 0, page = 0;
-       z_stream *stream = msblk->stream;
-
-       mutex_lock(&msblk->read_data_mutex);
+       int zlib_err, zlib_init = 0, k = 0;
+       z_stream *stream = strm;
  
-       stream->avail_out = 0;
+       stream->avail_out = PAGE_CACHE_SIZE;
+       stream->next_out = squashfs_first_page(output);
         stream->avail_in = 0;
  
         do {
                 if (stream->avail_in == 0 && k < b) {
                         int avail = min(length, msblk->devblksize - offset);
                         length -= avail;
-                       wait_on_buffer(bh[k]);
-                       if (!buffer_uptodate(bh[k]))
-                               goto release_mutex;
-
                         stream->next_in = bh[k]->b_data + offset;
                         stream->avail_in = avail;
                         offset = 0;
                 }
  
-               if (stream->avail_out == 0 && page < pages) {
-                       stream->next_out = buffer[page++];
-                       stream->avail_out = PAGE_CACHE_SIZE;
+               if (stream->avail_out == 0) {
+                       stream->next_out = squashfs_next_page(output);
+                       if (stream->next_out != NULL)
+                               stream->avail_out = PAGE_CACHE_SIZE;
                 }
  
                 if (!zlib_init) {
                         zlib_err = zlib_inflateInit(stream);
                         if (zlib_err != Z_OK) {
-                               ERROR("zlib_inflateInit returned unexpected "
-                                       "result 0x%x, srclength %d\n",
-                                       zlib_err, srclength);
-                               goto release_mutex;
+                               squashfs_finish_page(output);
+                               goto out;
                         }
                         zlib_init = 1;
                 }
@@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                         put_bh(bh[k++]);
         } while (zlib_err == Z_OK);
  
-       if (zlib_err != Z_STREAM_END) {
-               ERROR("zlib_inflate error, data probably corrupt\n");
-               goto release_mutex;
-       }
+       squashfs_finish_page(output);
  
-       zlib_err = zlib_inflateEnd(stream);
-       if (zlib_err != Z_OK) {
-               ERROR("zlib_inflate error, data probably corrupt\n");
-               goto release_mutex;
-       }
+       if (zlib_err != Z_STREAM_END)
+               goto out;
  
-       if (k < b) {
-               ERROR("zlib_uncompress error, data remaining\n");
-               goto release_mutex;
-       }
+       zlib_err = zlib_inflateEnd(stream);
+       if (zlib_err != Z_OK)
+               goto out;
  
-       length = stream->total_out;
-       mutex_unlock(&msblk->read_data_mutex);
-       return length;
+       if (k < b)
+               goto out;
  
-release_mutex:
-       mutex_unlock(&msblk->read_data_mutex);
+       return stream->total_out;
  
+out:
         for (; k < b; k++)
                 put_bh(bh[k]);
  
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c

index 1c02da8bb7df5a0bf5729cd5375a0266731e545c..3ef11b22e7505c380feb6597113d5b150f7b1afb 100644 (file)
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork(
         int                     committed;      /* xaction was committed */
         int                     logflags;       /* logging flags */
         int                     error;          /* error return value */
+       int                     cancel_flags = 0;
  
         ASSERT(XFS_IFORK_Q(ip) == 0);
  
@@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork(
         if (rsvd)
                 tp->t_flags |= XFS_TRANS_RESERVE;
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
-       if (error)
-               goto error0;
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                         XFS_QMOPT_RES_REGBLKS);
-       if (error) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
-               return error;
-       }
+       if (error)
+               goto trans_cancel;
+       cancel_flags |= XFS_TRANS_ABORT;
         if (XFS_IFORK_Q(ip))
-               goto error1;
+               goto trans_cancel;
         if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
                 /*
                  * For inodes coming from pre-6.2 filesystems.
@@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork(
         }
         ASSERT(ip->i_d.di_anextents == 0);
  
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, 0);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  
         switch (ip->i_d.di_format) {
@@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork(
         default:
                 ASSERT(0);
                 error = XFS_ERROR(EINVAL);
-               goto error1;
+               goto trans_cancel;
         }
  
         ASSERT(ip->i_afp == NULL);
@@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork(
         if (logflags)
                 xfs_trans_log_inode(tp, ip, logflags);
         if (error)
-               goto error2;
+               goto bmap_cancel;
         if (!xfs_sb_version_hasattr(&mp->m_sb) ||
            (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
                 __int64_t sbfields = 0;
@@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork(
  
         error = xfs_bmap_finish(&tp, &flist, &committed);
         if (error)
-               goto error2;
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-error2:
+               goto bmap_cancel;
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+
+bmap_cancel:
         xfs_bmap_cancel(&flist);
-error1:
+trans_cancel:
+       xfs_trans_cancel(tp, cancel_flags);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
-error0:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
         return error;
  }
  
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index da88f167af78dbf04df4eb0c1cdcfcad7a5bd699..02df7b408a2623d6a32e7a2bc8285be9348ca646 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -41,6 +41,7 @@
  #include "xfs_fsops.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
+#include "xfs_dinode.h"
  
  
  #ifdef HAVE_PERCPU_SB
@@ -718,8 +719,22 @@ xfs_mountfs(
          * Set the inode cluster size.
          * This may still be overridden by the file system
          * block size if it is larger than the chosen cluster size.
+        *
+        * For v5 filesystems, scale the cluster size with the inode size to
+        * keep a constant ratio of inode per cluster buffer, but only if mkfs
+        * has set the inode alignment value appropriately for larger cluster
+        * sizes.
          */
         mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               int     new_size = mp->m_inode_cluster_size;
+
+               new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+               if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+                       mp->m_inode_cluster_size = new_size;
+               xfs_info(mp, "Using inode cluster size of %d bytes",
+                        mp->m_inode_cluster_size);
+       }
  
         /*
          * Set inode alignment fields
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 1d8101a10d8eecb1644b203591068baa1a434493..a466c5e5826eed27b489f0018c024362214123c3 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -112,7 +112,7 @@ typedef struct xfs_mount {
         __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
         __uint8_t               m_agno_log;     /* log #ag's */
         __uint8_t               m_agino_log;    /* #bits for agino in inum */
-       __uint16_t              m_inode_cluster_size;/* min inode buf size */
+       uint                    m_inode_cluster_size;/* min inode buf size */
         uint                    m_blockmask;    /* sb_blocksize-1 */
         uint                    m_blockwsize;   /* sb_blocksize in words */
         uint                    m_blockwmask;   /* blockwsize-1 */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c

index 1bba7f60d94cab1fe153b073b8ca42f24fbd4bfc..50c3f5614288febe4c85fdb7c231887527e80f79 100644 (file)
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,12 +111,14 @@ xfs_trans_log_inode(
  
         /*
          * First time we log the inode in a transaction, bump the inode change
-        * counter if it is configured for this to occur.
+        * counter if it is configured for this to occur. We don't use
+        * inode_inc_version() because there is no need for extra locking around
+        * i_version as we already hold the inode locked exclusively for
+        * metadata modification.
          */
         if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
             IS_I_VERSION(VFS_I(ip))) {
-               inode_inc_iversion(VFS_I(ip));
-               ip->i_d.di_changecount = VFS_I(ip)->i_version;
+               ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
                 flags |= XFS_ILOG_CORE;
         }
  
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c

index d53d9f0627a779cacab8adaa5aea71be36f8e41e..2fd59c0dae667b58029bad83b56dfad4958e4e0c 100644 (file)
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -385,8 +385,7 @@ xfs_calc_ifree_reservation(
                 xfs_calc_inode_res(mp, 1) +
                 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
                 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-               MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
-                   XFS_INODE_CLUSTER_SIZE(mp)) +
+               max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
                 xfs_calc_buf_res(1, 0) +
                 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
                                  mp->m_in_maxlevels, 0) +
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h

index 89c60b0f640819c7ed93b40774d5b9c3f3cbadfb..7b2de026a4f3db11c730d9d3abfa8b903ba5713f 100644 (file)
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -431,9 +431,9 @@ static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
  {
         return acpi_find_child(handle, addr, false);
  }
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr);
  int acpi_is_root_bridge(acpi_handle);
  struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
  
  int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
  int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h

new file mode 100644 (file)

index 0000000..e1e5a3e
--- /dev/null
+++ b/include/crypto/hash_info.h
@@ -0,0 +1,40 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_HASH_INFO_H
+#define _CRYPTO_HASH_INFO_H
+
+#include <crypto/sha.h>
+#include <crypto/md5.h>
+
+#include <uapi/linux/hash_info.h>
+
+/* not defined in include/crypto/ */
+#define RMD128_DIGEST_SIZE      16
+#define RMD160_DIGEST_SIZE     20
+#define RMD256_DIGEST_SIZE      32
+#define RMD320_DIGEST_SIZE      40
+
+/* not defined in include/crypto/ */
+#define WP512_DIGEST_SIZE      64
+#define WP384_DIGEST_SIZE      48
+#define WP256_DIGEST_SIZE      32
+
+/* not defined in include/crypto/ */
+#define TGR128_DIGEST_SIZE 16
+#define TGR160_DIGEST_SIZE 20
+#define TGR192_DIGEST_SIZE 24
+
+extern const char *const hash_algo_name[HASH_ALGO__LAST];
+extern const int hash_digest_size[HASH_ALGO__LAST];
+
+#endif /* _CRYPTO_HASH_INFO_H */
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h

index f5b0224c99679ed23b8475a75b78992d3ca8c899..fc09732613adbe98985083a0ca8fc0ec5ea334a9 100644 (file)
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -15,6 +15,7 @@
  #define _LINUX_PUBLIC_KEY_H
  
  #include <linux/mpi.h>
+#include <crypto/hash_info.h>
  
  enum pkey_algo {
         PKEY_ALGO_DSA,
@@ -22,21 +23,11 @@ enum pkey_algo {
         PKEY_ALGO__LAST
  };
  
-extern const char *const pkey_algo[PKEY_ALGO__LAST];
+extern const char *const pkey_algo_name[PKEY_ALGO__LAST];
+extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST];
  
-enum pkey_hash_algo {
-       PKEY_HASH_MD4,
-       PKEY_HASH_MD5,
-       PKEY_HASH_SHA1,
-       PKEY_HASH_RIPE_MD_160,
-       PKEY_HASH_SHA256,
-       PKEY_HASH_SHA384,
-       PKEY_HASH_SHA512,
-       PKEY_HASH_SHA224,
-       PKEY_HASH__LAST
-};
-
-extern const char *const pkey_hash_algo[PKEY_HASH__LAST];
+/* asymmetric key implementation supports only up to SHA224 */
+#define PKEY_HASH__LAST                (HASH_ALGO_SHA224 + 1)
  
  enum pkey_id_type {
         PKEY_ID_PGP,            /* OpenPGP generated key ID */
@@ -44,7 +35,7 @@ enum pkey_id_type {
         PKEY_ID_TYPE__LAST
  };
  
-extern const char *const pkey_id_type[PKEY_ID_TYPE__LAST];
+extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST];
  
  /*
   * Cryptographic data for the public-key subtype of the asymmetric key type.
@@ -59,6 +50,7 @@ struct public_key {
  #define PKEY_CAN_DECRYPT       0x02
  #define PKEY_CAN_SIGN          0x04
  #define PKEY_CAN_VERIFY                0x08
+       enum pkey_algo pkey_algo : 8;
         enum pkey_id_type id_type : 8;
         union {
                 MPI     mpi[5];
@@ -88,7 +80,8 @@ struct public_key_signature {
         u8 *digest;
         u8 digest_size;                 /* Number of bytes in digest */
         u8 nr_mpi;                      /* Occupancy of mpi[] */
-       enum pkey_hash_algo pkey_hash_algo : 8;
+       enum pkey_algo pkey_algo : 8;
+       enum hash_algo pkey_hash_algo : 8;
         union {
                 MPI mpi[2];
                 struct {
diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h

new file mode 100644 (file)

index 0000000..d69bc8a
--- /dev/null
+++ b/include/keys/big_key-type.h
@@ -0,0 +1,25 @@
+/* Big capacity key type.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_BIG_KEY_TYPE_H
+#define _KEYS_BIG_KEY_TYPE_H
+
+#include <linux/key-type.h>
+
+extern struct key_type key_type_big_key;
+
+extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern void big_key_revoke(struct key *key);
+extern void big_key_destroy(struct key *key);
+extern void big_key_describe(const struct key *big_key, struct seq_file *m);
+extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
+
+#endif /* _KEYS_BIG_KEY_TYPE_H */
diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h

index cf49159b0e3a4f47c7890a122ae44ea0dc35a5da..fca5c62340a47fbbb25386002d7c8293fab7a40b 100644 (file)
--- a/include/keys/keyring-type.h
+++ b/include/keys/keyring-type.h
@@ -1,6 +1,6 @@
  /* Keyring key type
   *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
   * Written by David Howells (dhowells@redhat.com)
   *
   * This program is free software; you can redistribute it and/or
@@ -13,19 +13,6 @@
  #define _KEYS_KEYRING_TYPE_H
  
  #include <linux/key.h>
-#include <linux/rcupdate.h>
-
-/*
- * the keyring payload contains a list of the keys to which the keyring is
- * subscribed
- */
-struct keyring_list {
-       struct rcu_head rcu;            /* RCU deletion hook */
-       unsigned short  maxkeys;        /* max keys this list can hold */
-       unsigned short  nkeys;          /* number of keys currently held */
-       unsigned short  delkey;         /* key to be unlinked by RCU */
-       struct key __rcu *keys[0];
-};
-
+#include <linux/assoc_array.h>
  
  #endif /* _KEYS_KEYRING_TYPE_H */
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h

new file mode 100644 (file)

index 0000000..8dabc39
--- /dev/null
+++ b/include/keys/system_keyring.h
@@ -0,0 +1,23 @@
+/* System keyring containing trusted public keys.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_SYSTEM_KEYRING_H
+#define _KEYS_SYSTEM_KEYRING_H
+
+#ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
+
+#include <linux/key.h>
+
+extern struct key *system_trusted_keyring;
+
+#endif
+
+#endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index b0972c4ce81c3cc7e8ad1bbb03c5dc9054f2f32a..d9099b15b4726404343308a39e5e55d39a16aa05 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -44,6 +44,20 @@
  #include <acpi/acpi_numa.h>
  #include <asm/acpi.h>
  
+static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
+{
+       return adev ? adev->handle : NULL;
+}
+
+#define ACPI_COMPANION(dev)            ((dev)->acpi_node.companion)
+#define ACPI_COMPANION_SET(dev, adev)  ACPI_COMPANION(dev) = (adev)
+#define ACPI_HANDLE(dev)               acpi_device_handle(ACPI_COMPANION(dev))
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+       return dev_name(&adev->dev);
+}
+
  enum acpi_irq_model_id {
         ACPI_IRQ_MODEL_PIC = 0,
         ACPI_IRQ_MODEL_IOAPIC,
@@ -401,6 +415,15 @@ static inline bool acpi_driver_match_device(struct device *dev,
  
  #define acpi_disabled 1
  
+#define ACPI_COMPANION(dev)            (NULL)
+#define ACPI_COMPANION_SET(dev, adev)  do { } while (0)
+#define ACPI_HANDLE(dev)               (NULL)
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+       return NULL;
+}
+
  static inline void acpi_early_init(void) { }
  
  static inline int early_acpi_boot_init(void)
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h

new file mode 100644 (file)

index 0000000..9a193b8
--- /dev/null
+++ b/include/linux/assoc_array.h
@@ -0,0 +1,92 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_H
+#define _LINUX_ASSOC_ARRAY_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/types.h>
+
+#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
+
+/*
+ * Generic associative array.
+ */
+struct assoc_array {
+       struct assoc_array_ptr  *root;          /* The node at the root of the tree */
+       unsigned long           nr_leaves_on_tree;
+};
+
+/*
+ * Operations on objects and index keys for use by array manipulation routines.
+ */
+struct assoc_array_ops {
+       /* Method to get a chunk of an index key from caller-supplied data */
+       unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+       /* Method to get a piece of an object's index key */
+       unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+       /* Is this the object we're looking for? */
+       bool (*compare_object)(const void *object, const void *index_key);
+
+       /* How different are two objects, to a bit position in their keys? (or
+        * -1 if they're the same)
+        */
+       int (*diff_objects)(const void *a, const void *b);
+
+       /* Method to free an object. */
+       void (*free_object)(void *object);
+};
+
+/*
+ * Access and manipulation functions.
+ */
+struct assoc_array_edit;
+
+static inline void assoc_array_init(struct assoc_array *array)
+{
+       array->root = NULL;
+       array->nr_leaves_on_tree = 0;
+}
+
+extern int assoc_array_iterate(const struct assoc_array *array,
+                              int (*iterator)(const void *object,
+                                              void *iterator_data),
+                              void *iterator_data);
+extern void *assoc_array_find(const struct assoc_array *array,
+                             const struct assoc_array_ops *ops,
+                             const void *index_key);
+extern void assoc_array_destroy(struct assoc_array *array,
+                               const struct assoc_array_ops *ops);
+extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key,
+                                                  void *object);
+extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
+                                         void *object);
+extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key);
+extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                                 const struct assoc_array_ops *ops);
+extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
+extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+extern int assoc_array_gc(struct assoc_array *array,
+                         const struct assoc_array_ops *ops,
+                         bool (*iterator)(void *object, void *iterator_data),
+                         void *iterator_data);
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_H */
diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h

new file mode 100644 (file)

index 0000000..711275e
--- /dev/null
+++ b/include/linux/assoc_array_priv.h
@@ -0,0 +1,182 @@
+/* Private definitions for the generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
+#define _LINUX_ASSOC_ARRAY_PRIV_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/assoc_array.h>
+
+#define ASSOC_ARRAY_FAN_OUT            16      /* Number of slots per node */
+#define ASSOC_ARRAY_FAN_MASK           (ASSOC_ARRAY_FAN_OUT - 1)
+#define ASSOC_ARRAY_LEVEL_STEP         (ilog2(ASSOC_ARRAY_FAN_OUT))
+#define ASSOC_ARRAY_LEVEL_STEP_MASK    (ASSOC_ARRAY_LEVEL_STEP - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_MASK     (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_SHIFT    (ilog2(BITS_PER_LONG))
+
+/*
+ * Undefined type representing a pointer with type information in the bottom
+ * two bits.
+ */
+struct assoc_array_ptr;
+
+/*
+ * An N-way node in the tree.
+ *
+ * Each slot contains one of four things:
+ *
+ *     (1) Nothing (NULL).
+ *
+ *     (2) A leaf object (pointer types 0).
+ *
+ *     (3) A next-level node (pointer type 1, subtype 0).
+ *
+ *     (4) A shortcut (pointer type 1, subtype 1).
+ *
+ * The tree is optimised for search-by-ID, but permits reasonable iteration
+ * also.
+ *
+ * The tree is navigated by constructing an index key consisting of an array of
+ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
+ *
+ * The segments correspond to levels of the tree (the first segment is used at
+ * level 0, the second at level 1, etc.).
+ */
+struct assoc_array_node {
+       struct assoc_array_ptr  *back_pointer;
+       u8                      parent_slot;
+       struct assoc_array_ptr  *slots[ASSOC_ARRAY_FAN_OUT];
+       unsigned long           nr_leaves_on_branch;
+};
+
+/*
+ * A shortcut through the index space out to where a collection of nodes/leaves
+ * with the same IDs live.
+ */
+struct assoc_array_shortcut {
+       struct assoc_array_ptr  *back_pointer;
+       int                     parent_slot;
+       int                     skip_to_level;
+       struct assoc_array_ptr  *next_node;
+       unsigned long           index_key[];
+};
+
+/*
+ * Preallocation cache.
+ */
+struct assoc_array_edit {
+       struct rcu_head                 rcu;
+       struct assoc_array              *array;
+       const struct assoc_array_ops    *ops;
+       const struct assoc_array_ops    *ops_for_excised_subtree;
+       struct assoc_array_ptr          *leaf;
+       struct assoc_array_ptr          **leaf_p;
+       struct assoc_array_ptr          *dead_leaf;
+       struct assoc_array_ptr          *new_meta[3];
+       struct assoc_array_ptr          *excised_meta[1];
+       struct assoc_array_ptr          *excised_subtree;
+       struct assoc_array_ptr          **set_backpointers[ASSOC_ARRAY_FAN_OUT];
+       struct assoc_array_ptr          *set_backpointers_to;
+       struct assoc_array_node         *adjust_count_on;
+       long                            adjust_count_by;
+       struct {
+               struct assoc_array_ptr  **ptr;
+               struct assoc_array_ptr  *to;
+       } set[2];
+       struct {
+               u8                      *p;
+               u8                      to;
+       } set_parent_slot[1];
+       u8                              segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
+};
+
+/*
+ * Internal tree member pointers are marked in the bottom one or two bits to
+ * indicate what type they are so that we don't have to look behind every
+ * pointer to see what it points to.
+ *
+ * We provide functions to test type annotations and to create and translate
+ * the annotated pointers.
+ */
+#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
+#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL        /* Points to leaf (or nowhere) */
+#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL        /* Points to node or shortcut */
+#define ASSOC_ARRAY_PTR_SUBTYPE_MASK   0x2UL
+#define ASSOC_ARRAY_PTR_NODE_SUBTYPE   0x0UL
+#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
+
+static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
+{
+       return !assoc_array_ptr_is_meta(x);
+}
+static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
+{
+       return !assoc_array_ptr_is_shortcut(x);
+}
+
+static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
+{
+       return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+
+static inline
+unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x &
+               ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+static inline
+struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
+{
+       return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
+}
+static inline
+struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
+{
+       return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
+}
+
+static inline
+struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
+{
+       return (struct assoc_array_ptr *)((unsigned long)p | t);
+}
+static inline
+struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
+{
+       return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
+{
+       return __assoc_array_x_to_ptr(
+               p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
+{
+       return __assoc_array_x_to_ptr(
+               p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
+}
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
diff --git a/include/linux/audit.h b/include/linux/audit.h

index 729a4d165bcc52b444ee0ea89e8faac36f6518f1..a40641954c296c3042c0e34f5c1ff170aee5cfa5 100644 (file)
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -73,6 +73,8 @@ struct audit_field {
         void                            *lsm_rule;
  };
  
+extern int is_audit_feature_set(int which);
+
  extern int __init audit_register_class(int class, unsigned *list);
  extern int audit_classify_syscall(int abi, unsigned syscall);
  extern int audit_classify_arch(int arch);
@@ -207,7 +209,7 @@ static inline int audit_get_sessionid(struct task_struct *tsk)
  
  extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
  extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_bprm(struct linux_binprm *bprm);
  extern int __audit_socketcall(int nargs, unsigned long *args);
  extern int __audit_sockaddr(int len, void *addr);
  extern void __audit_fd_pair(int fd1, int fd2);
@@ -236,11 +238,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
         if (unlikely(!audit_dummy_context()))
                 __audit_ipc_set_perm(qbytes, uid, gid, mode);
  }
-static inline int audit_bprm(struct linux_binprm *bprm)
+static inline void audit_bprm(struct linux_binprm *bprm)
  {
         if (unlikely(!audit_dummy_context()))
-               return __audit_bprm(bprm);
-       return 0;
+               __audit_bprm(bprm);
  }
  static inline int audit_socketcall(int nargs, unsigned long *args)
  {
@@ -367,10 +368,8 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
  static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
                                         gid_t gid, umode_t mode)
  { }
-static inline int audit_bprm(struct linux_binprm *bprm)
-{
-       return 0;
-}
+static inline void audit_bprm(struct linux_binprm *bprm)
+{ }
  static inline int audit_socketcall(int nargs, unsigned long *args)
  {
         return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index f26ec20f635476a88c8879baad16a5a908433fa7..1b135d49b27985d3243cdbf92d6002ce5eea8597 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -505,6 +505,9 @@ struct request_queue {
                                  (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                  (1 << QUEUE_FLAG_ADD_RANDOM))
  
+#define QUEUE_FLAG_MQ_DEFAULT  ((1 << QUEUE_FLAG_IO_STAT) |            \
+                                (1 << QUEUE_FLAG_SAME_COMP))
+
  static inline void queue_lockdep_assert_held(struct request_queue *q)
  {
         if (q->queue_lock)
diff --git a/include/linux/device.h b/include/linux/device.h

index b025925df7f75a5b86fa894de93e2e83d0bdf571..952b01033c32dedcf83349a988b920c1c5ed8aa8 100644 (file)
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -644,9 +644,11 @@ struct device_dma_parameters {
         unsigned long segment_boundary_mask;
  };
  
+struct acpi_device;
+
  struct acpi_dev_node {
  #ifdef CONFIG_ACPI
-       void    *handle;
+       struct acpi_device *companion;
  #endif
  };
  
@@ -790,14 +792,6 @@ static inline struct device *kobj_to_dev(struct kobject *kobj)
         return container_of(kobj, struct device, kobj);
  }
  
-#ifdef CONFIG_ACPI
-#define ACPI_HANDLE(dev)       ((dev)->acpi_node.handle)
-#define ACPI_HANDLE_SET(dev, _handle_) (dev)->acpi_node.handle = (_handle_)
-#else
-#define ACPI_HANDLE(dev)       (NULL)
-#define ACPI_HANDLE_SET(dev, _handle_) do { } while (0)
-#endif
-
  /* Get the wakeup routines, which depend on struct device */
  #include <linux/pm_wakeup.h>
  
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h

index 0bc727534108d5a2d5d527e75eaa8020a3ccd239..41cf0c399288e022edf32f7e65c6f151004829d9 100644 (file)
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -45,13 +45,13 @@ static inline int dma_submit_error(dma_cookie_t cookie)
  
  /**
   * enum dma_status - DMA transaction status
- * @DMA_SUCCESS: transaction completed successfully
+ * @DMA_COMPLETE: transaction completed
   * @DMA_IN_PROGRESS: transaction not yet processed
   * @DMA_PAUSED: transaction is paused
   * @DMA_ERROR: transaction failed
   */
  enum dma_status {
-       DMA_SUCCESS,
+       DMA_COMPLETE,
         DMA_IN_PROGRESS,
         DMA_PAUSED,
         DMA_ERROR,
@@ -171,12 +171,6 @@ struct dma_interleaved_template {
   * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
   *  acknowledges receipt, i.e. has has a chance to establish any dependency
   *  chains
- * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
- * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
- * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
- *     (if not set, do the source dma-unmapping as page)
- * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
- *     (if not set, do the destination dma-unmapping as page)
   * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
   * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
   * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
@@ -188,14 +182,10 @@ struct dma_interleaved_template {
  enum dma_ctrl_flags {
         DMA_PREP_INTERRUPT = (1 << 0),
         DMA_CTRL_ACK = (1 << 1),
-       DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
-       DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
-       DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
-       DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
-       DMA_PREP_PQ_DISABLE_P = (1 << 6),
-       DMA_PREP_PQ_DISABLE_Q = (1 << 7),
-       DMA_PREP_CONTINUE = (1 << 8),
-       DMA_PREP_FENCE = (1 << 9),
+       DMA_PREP_PQ_DISABLE_P = (1 << 2),
+       DMA_PREP_PQ_DISABLE_Q = (1 << 3),
+       DMA_PREP_CONTINUE = (1 << 4),
+       DMA_PREP_FENCE = (1 << 5),
  };
  
  /**
@@ -413,6 +403,17 @@ void dma_chan_cleanup(struct kref *kref);
  typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
  
  typedef void (*dma_async_tx_callback)(void *dma_async_param);
+
+struct dmaengine_unmap_data {
+       u8 to_cnt;
+       u8 from_cnt;
+       u8 bidi_cnt;
+       struct device *dev;
+       struct kref kref;
+       size_t len;
+       dma_addr_t addr[0];
+};
+
  /**
   * struct dma_async_tx_descriptor - async transaction descriptor
   * ---dma generic offload fields---
@@ -438,6 +439,7 @@ struct dma_async_tx_descriptor {
         dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
         dma_async_tx_callback callback;
         void *callback_param;
+       struct dmaengine_unmap_data *unmap;
  #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
         struct dma_async_tx_descriptor *next;
         struct dma_async_tx_descriptor *parent;
@@ -445,6 +447,40 @@ struct dma_async_tx_descriptor {
  #endif
  };
  
+#ifdef CONFIG_DMA_ENGINE
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+                                struct dmaengine_unmap_data *unmap)
+{
+       kref_get(&unmap->kref);
+       tx->unmap = unmap;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags);
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+#else
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+                                struct dmaengine_unmap_data *unmap)
+{
+}
+static inline struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+       return NULL;
+}
+static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+}
+#endif
+
+static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
+{
+       if (tx->unmap) {
+               dmaengine_unmap_put(tx->unmap);
+               tx->unmap = NULL;
+       }
+}
+
  #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
  static inline void txd_lock(struct dma_async_tx_descriptor *txd)
  {
@@ -979,10 +1015,10 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
  {
         if (last_complete <= last_used) {
                 if ((cookie <= last_complete) || (cookie > last_used))
-                       return DMA_SUCCESS;
+                       return DMA_COMPLETE;
         } else {
                 if ((cookie <= last_complete) && (cookie > last_used))
-                       return DMA_SUCCESS;
+                       return DMA_COMPLETE;
         }
         return DMA_IN_PROGRESS;
  }
@@ -1013,11 +1049,11 @@ static inline struct dma_chan *dma_find_channel(enum dma_transaction_type tx_typ
  }
  static inline enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
  {
-       return DMA_SUCCESS;
+       return DMA_COMPLETE;
  }
  static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
  {
-       return DMA_SUCCESS;
+       return DMA_COMPLETE;
  }
  static inline void dma_issue_pending_all(void)
  {
diff --git a/include/linux/fs.h b/include/linux/fs.h

index bf5d574ebdf4c94da0238c14add54bc51587175a..121f11f001c06f1cdcdeb6533234d76626a2915b 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2622,7 +2622,9 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
  extern int simple_write_end(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned copied,
                         struct page *page, void *fsdata);
+extern int always_delete_dentry(const struct dentry *);
  extern struct inode *alloc_anon_inode(struct super_block *);
+extern const struct dentry_operations simple_dentry_operations;
  
  extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
  extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index acd2010328f3044b1cf6dd788e4ac09ab9262925..9649ff0c63f8d0bb5253cd08b3f32499986b4cfd 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
  void hugepage_put_subpool(struct hugepage_subpool *spool);
  
  int PageHuge(struct page *page);
+int PageHeadHuge(struct page *page_head);
  
  void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
  int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -69,7 +70,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page);
  bool isolate_huge_page(struct page *page, struct list_head *list);
  void putback_active_hugepage(struct page *page);
  bool is_hugepage_active(struct page *page);
-void copy_huge_page(struct page *dst, struct page *src);
  
  #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
  pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
@@ -104,6 +104,11 @@ static inline int PageHuge(struct page *page)
         return 0;
  }
  
+static inline int PageHeadHuge(struct page *page_head)
+{
+       return 0;
+}
+
  static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
  {
  }
@@ -140,9 +145,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
  #define isolate_huge_page(p, l) false
  #define putback_active_hugepage(p)     do {} while (0)
  #define is_hugepage_active(x)  false
-static inline void copy_huge_page(struct page *dst, struct page *src)
-{
-}
  
  static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                 unsigned long address, unsigned long end, pgprot_t newprot)
diff --git a/include/linux/key-type.h b/include/linux/key-type.h

index 518a53afb9ea24b301d87d4af6c824bca5fc5548..a74c3a84dfdd05223cc0613c9445bdc396e1afe9 100644 (file)
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,6 +45,7 @@ struct key_preparsed_payload {
         const void      *data;          /* Raw data */
         size_t          datalen;        /* Raw datalen */
         size_t          quotalen;       /* Quota length for proposed payload */
+       bool            trusted;        /* True if key is trusted */
  };
  
  typedef int (*request_key_actor_t)(struct key_construction *key,
@@ -63,6 +64,11 @@ struct key_type {
          */
         size_t def_datalen;
  
+       /* Default key search algorithm. */
+       unsigned def_lookup_type;
+#define KEYRING_SEARCH_LOOKUP_DIRECT   0x0000  /* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE  0x0001  /* Iterative search. */
+
         /* vet a description */
         int (*vet_description)(const char *description);
  
diff --git a/include/linux/key.h b/include/linux/key.h

index 4dfde1161c5e7878565d05ad7f5293e9e4d19cef..80d677483e31f95ce4b08ef00ffb79c239932c18 100644 (file)
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -22,6 +22,7 @@
  #include <linux/sysctl.h>
  #include <linux/rwsem.h>
  #include <linux/atomic.h>
+#include <linux/assoc_array.h>
  
  #ifdef __KERNEL__
  #include <linux/uidgid.h>
@@ -82,6 +83,12 @@ struct key_owner;
  struct keyring_list;
  struct keyring_name;
  
+struct keyring_index_key {
+       struct key_type         *type;
+       const char              *description;
+       size_t                  desc_len;
+};
+
  /*****************************************************************************/
  /*
   * key reference with possession attribute handling
@@ -99,7 +106,7 @@ struct keyring_name;
  typedef struct __key_reference_with_attributes *key_ref_t;
  
  static inline key_ref_t make_key_ref(const struct key *key,
-                                    unsigned long possession)
+                                    bool possession)
  {
         return (key_ref_t) ((unsigned long) key | possession);
  }
@@ -109,7 +116,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
         return (struct key *) ((unsigned long) key_ref & ~1UL);
  }
  
-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
+static inline bool is_key_possessed(const key_ref_t key_ref)
  {
         return (unsigned long) key_ref & 1UL;
  }
@@ -129,7 +136,6 @@ struct key {
                 struct list_head graveyard_link;
                 struct rb_node  serial_node;
         };
-       struct key_type         *type;          /* type of key */
         struct rw_semaphore     sem;            /* change vs change sem */
         struct key_user         *user;          /* owner of this key */
         void                    *security;      /* security data for this key */
@@ -162,13 +168,21 @@ struct key {
  #define KEY_FLAG_NEGATIVE      5       /* set if key is negative */
  #define KEY_FLAG_ROOT_CAN_CLEAR        6       /* set if key can be cleared by root without permission */
  #define KEY_FLAG_INVALIDATED   7       /* set if key has been invalidated */
+#define KEY_FLAG_TRUSTED       8       /* set if key is trusted */
+#define KEY_FLAG_TRUSTED_ONLY  9       /* set if keyring only accepts links to trusted keys */
  
-       /* the description string
-        * - this is used to match a key against search criteria
-        * - this should be a printable string
+       /* the key type and key description string
+        * - the desc is used to match a key against search criteria
+        * - it should be a printable string
          * - eg: for krb5 AFS, this might be "afs@REDHAT.COM"
          */
-       char                    *description;
+       union {
+               struct keyring_index_key index_key;
+               struct {
+                       struct key_type *type;          /* type of key */
+                       char            *description;
+               };
+       };
  
         /* type specific data
          * - this is used by the keyring type to index the name
@@ -185,11 +199,14 @@ struct key {
          *   whatever
          */
         union {
-               unsigned long           value;
-               void __rcu              *rcudata;
-               void                    *data;
-               struct keyring_list __rcu *subscriptions;
-       } payload;
+               union {
+                       unsigned long           value;
+                       void __rcu              *rcudata;
+                       void                    *data;
+                       void                    *data2[2];
+               } payload;
+               struct assoc_array keys;
+       };
  };
  
  extern struct key *key_alloc(struct key_type *type,
@@ -203,18 +220,23 @@ extern struct key *key_alloc(struct key_type *type,
  #define KEY_ALLOC_IN_QUOTA     0x0000  /* add to quota, reject if would overrun */
  #define KEY_ALLOC_QUOTA_OVERRUN        0x0001  /* add to quota, permit even if overrun */
  #define KEY_ALLOC_NOT_IN_QUOTA 0x0002  /* not in quota */
+#define KEY_ALLOC_TRUSTED      0x0004  /* Key should be flagged as trusted */
  
  extern void key_revoke(struct key *key);
  extern void key_invalidate(struct key *key);
  extern void key_put(struct key *key);
  
-static inline struct key *key_get(struct key *key)
+static inline struct key *__key_get(struct key *key)
  {
-       if (key)
-               atomic_inc(&key->usage);
+       atomic_inc(&key->usage);
         return key;
  }
  
+static inline struct key *key_get(struct key *key)
+{
+       return key ? __key_get(key) : key;
+}
+
  static inline void key_ref_put(key_ref_t key_ref)
  {
         key_put(key_ref_to_ptr(key_ref));
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 0548eb201e058afdd32f2645240633ff7a8783d4..1cedd000cf293f4486575b87086b01e2ee1b26e5 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1318,7 +1318,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
  
  #if USE_SPLIT_PTE_PTLOCKS
  #if BLOATED_SPINLOCKS
-void __init ptlock_cache_init(void);
  extern bool ptlock_alloc(struct page *page);
  extern void ptlock_free(struct page *page);
  
@@ -1327,7 +1326,6 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
         return page->ptl;
  }
  #else /* BLOATED_SPINLOCKS */
-static inline void ptlock_cache_init(void) {}
  static inline bool ptlock_alloc(struct page *page)
  {
         return true;
@@ -1380,17 +1378,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
  {
         return &mm->page_table_lock;
  }
-static inline void ptlock_cache_init(void) {}
  static inline bool ptlock_init(struct page *page) { return true; }
  static inline void pte_lock_deinit(struct page *page) {}
  #endif /* USE_SPLIT_PTE_PTLOCKS */
  
-static inline void pgtable_init(void)
-{
-       ptlock_cache_init();
-       pgtable_cache_init();
-}
-
  static inline bool pgtable_page_ctor(struct page *page)
  {
         inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 10f5a7272b8046ff7561b6d78a3b665828415ef1..bd299418a934e21b99c303af82a7c2f427bbf915 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -44,18 +44,22 @@ struct page {
         /* First double word block */
         unsigned long flags;            /* Atomic flags, some possibly
                                          * updated asynchronously */
-       struct address_space *mapping;  /* If low bit clear, points to
-                                        * inode address_space, or NULL.
-                                        * If page mapped as anonymous
-                                        * memory, low bit is set, and
-                                        * it points to anon_vma object:
-                                        * see PAGE_MAPPING_ANON below.
-                                        */
+       union {
+               struct address_space *mapping;  /* If low bit clear, points to
+                                                * inode address_space, or NULL.
+                                                * If page mapped as anonymous
+                                                * memory, low bit is set, and
+                                                * it points to anon_vma object:
+                                                * see PAGE_MAPPING_ANON below.
+                                                */
+               void *s_mem;                    /* slab first object */
+       };
+
         /* Second double word */
         struct {
                 union {
                         pgoff_t index;          /* Our offset within mapping. */
-                       void *freelist;         /* slub/slob first free object */
+                       void *freelist;         /* sl[aou]b first free object */
                         bool pfmemalloc;        /* If set by the page allocator,
                                                  * ALLOC_NO_WATERMARKS was set
                                                  * and the low watermark was not
@@ -65,9 +69,6 @@ struct page {
                                                  * this page is only used to
                                                  * free other pages.
                                                  */
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-               pgtable_t pmd_huge_pte; /* protected by page->ptl */
-#endif
                 };
  
                 union {
@@ -114,6 +115,7 @@ struct page {
                                 };
                                 atomic_t _count;                /* Usage count, see below. */
                         };
+                       unsigned int active;    /* SLAB */
                 };
         };
  
@@ -135,6 +137,12 @@ struct page {
  
                 struct list_head list;  /* slobs list of pages */
                 struct slab *slab_page; /* slab fields */
+               struct rcu_head rcu_head;       /* Used by SLAB
+                                                * when destroying via RCU
+                                                */
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
+               pgtable_t pmd_huge_pte; /* protected by page->ptl */
+#endif
         };
  
         /* Remainder is not double word aligned */
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h

index d006f0ca60f46e92705fb8b3fd315ebacc5242b2..5a462c4e5009d68960525d2167728d59ee75e33b 100644 (file)
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -27,7 +27,7 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
         while (!pci_is_root_bus(pbus))
                 pbus = pbus->parent;
  
-       return DEVICE_ACPI_HANDLE(pbus->bridge);
+       return ACPI_HANDLE(pbus->bridge);
  }
  
  static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
@@ -39,7 +39,7 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
         else
                 dev = &pbus->self->dev;
  
-       return DEVICE_ACPI_HANDLE(dev);
+       return ACPI_HANDLE(dev);
  }
  
  void acpi_pci_add_bus(struct pci_bus *bus);
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h

index 179fb91bb5f2eaef7354e37411628f90a6c02335..f50821cb64be8c11165659b0f0719398bfa004cf 100644 (file)
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -67,10 +67,10 @@ struct edmacc_param {
  #define ITCCHEN                BIT(23)
  
  /*ch_status paramater of callback function possible values*/
-#define DMA_COMPLETE 1
-#define DMA_CC_ERROR 2
-#define DMA_TC1_ERROR 3
-#define DMA_TC2_ERROR 4
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
  
  enum address_mode {
         INCR = 0,
diff --git a/include/linux/security.h b/include/linux/security.h

index 9d37e2b9d3ec030f026117d99de94dfd50b32ab0..5623a7f965b7bbb07ab94a72351aec027ef4adb7 100644 (file)
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1052,17 +1052,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
   * @xfrm_policy_delete_security:
   *     @ctx contains the xfrm_sec_ctx.
   *     Authorize deletion of xp->security.
- * @xfrm_state_alloc_security:
+ * @xfrm_state_alloc:
   *     @x contains the xfrm_state being added to the Security Association
   *     Database by the XFRM system.
   *     @sec_ctx contains the security context information being provided by
   *     the user-level SA generation program (e.g., setkey or racoon).
- *     @secid contains the secid from which to take the mls portion of the context.
   *     Allocate a security structure to the x->security field; the security
   *     field is initialized to NULL when the xfrm_state is allocated. Set the
- *     context to correspond to either sec_ctx or polsec, with the mls portion
- *     taken from secid in the latter case.
- *     Return 0 if operation was successful (memory to allocate, legal context).
+ *     context to correspond to sec_ctx. Return 0 if operation was successful
+ *     (memory to allocate, legal context).
+ * @xfrm_state_alloc_acquire:
+ *     @x contains the xfrm_state being added to the Security Association
+ *     Database by the XFRM system.
+ *     @polsec contains the policy's security context.
+ *     @secid contains the secid from which to take the mls portion of the
+ *     context.
+ *     Allocate a security structure to the x->security field; the security
+ *     field is initialized to NULL when the xfrm_state is allocated. Set the
+ *     context to correspond to secid. Return 0 if operation was successful
+ *     (memory to allocate, legal context).
   * @xfrm_state_free_security:
   *     @x contains the xfrm_state.
   *     Deallocate x->security.
@@ -1679,9 +1687,11 @@ struct security_operations {
         int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
         void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
         int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
-       int (*xfrm_state_alloc_security) (struct xfrm_state *x,
-               struct xfrm_user_sec_ctx *sec_ctx,
-               u32 secid);
+       int (*xfrm_state_alloc) (struct xfrm_state *x,
+                                struct xfrm_user_sec_ctx *sec_ctx);
+       int (*xfrm_state_alloc_acquire) (struct xfrm_state *x,
+                                        struct xfrm_sec_ctx *polsec,
+                                        u32 secid);
         void (*xfrm_state_free_security) (struct xfrm_state *x);
         int (*xfrm_state_delete_security) (struct xfrm_state *x);
         int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h

index 1e8a8b6e837d8621fa5488f832273df673ff3520..cf87a24c0f92f1963081c93d7b0cce2356025358 100644 (file)
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -354,6 +354,35 @@ static inline void read_sequnlock_excl(seqlock_t *sl)
         spin_unlock(&sl->lock);
  }
  
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+       if (!(*seq & 1))        /* Even */
+               *seq = read_seqbegin(lock);
+       else                    /* Odd */
+               read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+       return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+       if (seq & 1)
+               read_sequnlock_excl(lock);
+}
+
  static inline void read_seqlock_excl_bh(seqlock_t *sl)
  {
         spin_lock_bh(&sl->lock);
diff --git a/include/linux/slab.h b/include/linux/slab.h

index 74f105847d13ceae757c8aa6b83bdf8412816696..c2bba248fa63d46024930f5b4d6c47b652a51dc6 100644 (file)
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -53,7 +53,14 @@
   *  }
   *  rcu_read_unlock();
   *
- * See also the comment on struct slab_rcu in mm/slab.c.
+ * This is useful if we need to approach a kernel structure obliquely,
+ * from its address obtained without the usual locking. We can lock
+ * the structure to stabilize it and check it's still at the given address,
+ * only if we can be sure that the memory has not been meanwhile reused
+ * for some other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
   */
  #define SLAB_DESTROY_BY_RCU    0x00080000UL    /* Defer freeing slabs to RCU */
  #define SLAB_MEM_SPREAD                0x00100000UL    /* Spread some memory over cpuset */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h

index e9346b4f1ef4b2ef6d302a5799e30e631fa10ce2..09bfffb08a56db285caa27146202f04e2188b480 100644 (file)
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -27,8 +27,8 @@ struct kmem_cache {
  
         size_t colour;                  /* cache colouring range */
         unsigned int colour_off;        /* colour offset */
-       struct kmem_cache *slabp_cache;
-       unsigned int slab_size;
+       struct kmem_cache *freelist_cache;
+       unsigned int freelist_size;
  
         /* constructor func */
         void (*ctor)(void *obj);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h

index cc0b67eada4260331276e9ae145fa593c14ecee8..f56bfa9e4526f6382467fe2647c19bdbca185346 100644 (file)
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,7 +11,7 @@
  enum stat_item {
         ALLOC_FASTPATH,         /* Allocation from cpu slab */
         ALLOC_SLOWPATH,         /* Allocation by getting a new cpu slab */
-       FREE_FASTPATH,          /* Free to cpu slub */
+       FREE_FASTPATH,          /* Free to cpu slab */
         FREE_SLOWPATH,          /* Freeing not to cpu slab */
         FREE_FROZEN,            /* Freeing to frozen slab */
         FREE_ADD_PARTIAL,       /* Freeing moves slab to partial list */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h

index 4db29859464f3af3d78caafa41a1e7525034a589..4836ba3c1cd8266c294b9dfd28aa6d0d433db0d6 100644 (file)
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,12 @@ struct user_namespace {
         kuid_t                  owner;
         kgid_t                  group;
         unsigned int            proc_inum;
+
+       /* Register of per-UID persistent keyrings for this namespace */
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       struct key              *persistent_keyring_register;
+       struct rw_semaphore     persistent_keyring_register_sem;
+#endif
  };
  
  extern struct user_namespace init_user_ns;
diff --git a/include/linux/wait.h b/include/linux/wait.h

index 61939ba30aa0abdbe7e36e608343338c80fada6b..eaa00b10abaaa53cf441170841c3faec588e9de0 100644 (file)
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -278,6 +278,31 @@ do {                                                                       \
         __ret;                                                          \
  })
  
+#define __wait_event_cmd(wq, condition, cmd1, cmd2)                    \
+       (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \
+                           cmd1; schedule(); cmd2)
+
+/**
+ * wait_event_cmd - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * cmd1: the command will be executed before sleep
+ * cmd2: the command will be executed after sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_cmd(wq, condition, cmd1, cmd2)                      \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event_cmd(wq, condition, cmd1, cmd2);                    \
+} while (0)
+
  #define __wait_event_interruptible(wq, condition)                      \
         ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,          \
                       schedule())
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h

index f18b3b76e01e22e00c00ee133b2ebdc1013bcc62..4832d75dcbaedb888a2751303fe7d5a4e62b8010 100644 (file)
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -162,12 +162,14 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
                 { EXTENT_FLAG_LOGGING,          "LOGGING"       },      \
                 { EXTENT_FLAG_FILLING,          "FILLING"       })
  
-TRACE_EVENT(btrfs_get_extent,
+TRACE_EVENT_CONDITION(btrfs_get_extent,
  
         TP_PROTO(struct btrfs_root *root, struct extent_map *map),
  
         TP_ARGS(root, map),
  
+       TP_CONDITION(map),
+
         TP_STRUCT__entry(
                 __field(        u64,  root_objectid     )
                 __field(        u64,  start             )
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h

index db0b825b48109f2e8cc011f211749043757a33e5..44b05a09f1933a1c293a2174eb58d4db7b473b80 100644 (file)
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -68,6 +68,9 @@
  #define AUDIT_MAKE_EQUIV       1015    /* Append to watched tree */
  #define AUDIT_TTY_GET          1016    /* Get TTY auditing status */
  #define AUDIT_TTY_SET          1017    /* Set TTY auditing status */
+#define AUDIT_SET_FEATURE      1018    /* Turn an audit feature on or off */
+#define AUDIT_GET_FEATURE      1019    /* Get which features are enabled */
+#define AUDIT_FEATURE_CHANGE   1020    /* audit log listing feature changes */
  
  #define AUDIT_FIRST_USER_MSG   1100    /* Userspace messages mostly uninteresting to kernel */
  #define AUDIT_USER_AVC         1107    /* We filter this differently */
@@ -357,6 +360,12 @@ enum {
  #define AUDIT_PERM_READ                4
  #define AUDIT_PERM_ATTR                8
  
+/* MAX_AUDIT_MESSAGE_LENGTH is set in audit:lib/libaudit.h as:
+ * 8970 // PATH_MAX*2+CONTEXT_SIZE*2+11+256+1
+ * max header+body+tailer: 44 + 29 + 32 + 262 + 7 + pad
+ */
+#define AUDIT_MESSAGE_TEXT_MAX 8560
+
  struct audit_status {
         __u32           mask;           /* Bit mask for valid entries */
         __u32           enabled;        /* 1 = enabled, 0 = disabled */
@@ -368,11 +377,28 @@ struct audit_status {
         __u32           backlog;        /* messages waiting in queue */
  };
  
+struct audit_features {
+#define AUDIT_FEATURE_VERSION  1
+       __u32   vers;
+       __u32   mask;           /* which bits we are dealing with */
+       __u32   features;       /* which feature to enable/disable */
+       __u32   lock;           /* which features to lock */
+};
+
+#define AUDIT_FEATURE_ONLY_UNSET_LOGINUID      0
+#define AUDIT_FEATURE_LOGINUID_IMMUTABLE       1
+#define AUDIT_LAST_FEATURE                     AUDIT_FEATURE_LOGINUID_IMMUTABLE
+
+#define audit_feature_valid(x)         ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE)
+#define AUDIT_FEATURE_TO_MASK(x)       (1 << ((x) & 31)) /* mask for __u32 */
+
  struct audit_tty_status {
         __u32           enabled;        /* 1 = enabled, 0 = disabled */
         __u32           log_passwd;     /* 1 = enabled, 0 = disabled */
  };
  
+#define AUDIT_UID_UNSET (unsigned int)-1
+
  /* audit_rule_data supports filter rules with both integer and string
   * fields.  It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
   * AUDIT_LIST_RULES requests.
diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h

new file mode 100644 (file)

index 0000000..ca18c45
--- /dev/null
+++ b/include/uapi/linux/hash_info.h
@@ -0,0 +1,37 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _UAPI_LINUX_HASH_INFO_H
+#define _UAPI_LINUX_HASH_INFO_H
+
+enum hash_algo {
+       HASH_ALGO_MD4,
+       HASH_ALGO_MD5,
+       HASH_ALGO_SHA1,
+       HASH_ALGO_RIPE_MD_160,
+       HASH_ALGO_SHA256,
+       HASH_ALGO_SHA384,
+       HASH_ALGO_SHA512,
+       HASH_ALGO_SHA224,
+       HASH_ALGO_RIPE_MD_128,
+       HASH_ALGO_RIPE_MD_256,
+       HASH_ALGO_RIPE_MD_320,
+       HASH_ALGO_WP_256,
+       HASH_ALGO_WP_384,
+       HASH_ALGO_WP_512,
+       HASH_ALGO_TGR_128,
+       HASH_ALGO_TGR_160,
+       HASH_ALGO_TGR_192,
+       HASH_ALGO__LAST
+};
+
+#endif /* _UAPI_LINUX_HASH_INFO_H */
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h

index c9b7f4faf97aa5a790f6b58fd6d9da25ca2ebd64..840cb990abe2e7147ec89c92ef143e86a35ce0f8 100644 (file)
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -56,5 +56,6 @@
  #define KEYCTL_REJECT                  19      /* reject a partially constructed key */
  #define KEYCTL_INSTANTIATE_IOV         20      /* instantiate a partially constructed key */
  #define KEYCTL_INVALIDATE              21      /* invalidate a key */
+#define KEYCTL_GET_PERSISTENT          22      /* get a user's persistent keyring */
  
  #endif /*  _LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h

index fe1a5406d4d93cdf6012d11ab7315f014b552ede..f7cf7f351144873efd0bde3bdfe4e8f1676eb41d 100644 (file)
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -16,6 +16,7 @@
  #define _MD_P_H
  
  #include <linux/types.h>
+#include <asm/byteorder.h>
  
  /*
   * RAID superblock.
diff --git a/init/Kconfig b/init/Kconfig

index 3fc8a2f2fac4462eb7b1d1c5c1c1b7c4c3cabdf7..79383d3aa5dc5f7fe64abdcaf7d511144ca016e5 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -301,20 +301,6 @@ config AUDIT_TREE
         depends on AUDITSYSCALL
         select FSNOTIFY
  
-config AUDIT_LOGINUID_IMMUTABLE
-       bool "Make audit loginuid immutable"
-       depends on AUDIT
-       help
-         The config option toggles if a task setting its loginuid requires
-         CAP_SYS_AUDITCONTROL or if that task should require no special permissions
-         but should instead only allow setting its loginuid if it was never
-         previously set.  On systems which use systemd or a similar central
-         process to restart login services this should be set to true.  On older
-         systems in which an admin would typically have to directly stop and
-         start processes this should be set to false.  Setting this to true allows
-         one to drop potentially dangerous capabilites from the login tasks,
-         but may not be backwards compatible with older init systems.
-
  source "kernel/irq/Kconfig"
  source "kernel/time/Kconfig"
  
@@ -1669,6 +1655,18 @@ config BASE_SMALL
         default 0 if BASE_FULL
         default 1 if !BASE_FULL
  
+config SYSTEM_TRUSTED_KEYRING
+       bool "Provide system-wide ring of trusted keys"
+       depends on KEYS
+       help
+         Provide a system keyring to which trusted keys can be added.  Keys in
+         the keyring are considered to be trusted.  Keys may be added at will
+         by the kernel from compiled-in data and from hardware key stores, but
+         userspace may only add extra keys if those keys can be verified by
+         keys already in the keyring.
+
+         Keys in this keyring are used by module signature checking.
+
  menuconfig MODULES
         bool "Enable loadable module support"
         option modules
@@ -1742,6 +1740,7 @@ config MODULE_SRCVERSION_ALL
  config MODULE_SIG
         bool "Module signature verification"
         depends on MODULES
+       select SYSTEM_TRUSTED_KEYRING
         select KEYS
         select CRYPTO
         select ASYMMETRIC_KEY_TYPE
diff --git a/init/main.c b/init/main.c

index 01573fdfa1868fd7fb320d472eff6217f39a07eb..febc511e078a65d08ac4dd6a3be67b26926195cd 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -476,7 +476,7 @@ static void __init mm_init(void)
         mem_init();
         kmem_cache_init();
         percpu_init_late();
-       pgtable_init();
+       pgtable_cache_init();
         vmalloc_init();
  }
  
diff --git a/ipc/shm.c b/ipc/shm.c

index d69739610fd4384323004c46782116d54db6bbd5..7a51443a51d6421bd2a02a66ec18db98f6796dd3 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -208,15 +208,18 @@ static void shm_open(struct vm_area_struct *vma)
   */
  static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  {
+       struct file *shm_file;
+
+       shm_file = shp->shm_file;
+       shp->shm_file = NULL;
         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
         shm_rmid(ns, shp);
         shm_unlock(shp);
-       if (!is_file_hugepages(shp->shm_file))
-               shmem_lock(shp->shm_file, 0, shp->mlock_user);
+       if (!is_file_hugepages(shm_file))
+               shmem_lock(shm_file, 0, shp->mlock_user);
         else if (shp->mlock_user)
-               user_shm_unlock(file_inode(shp->shm_file)->i_size,
-                                               shp->mlock_user);
-       fput (shp->shm_file);
+               user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user);
+       fput(shm_file);
         ipc_rcu_putref(shp, shm_rcu_free);
  }
  
@@ -974,15 +977,25 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                 ipc_lock_object(&shp->shm_perm);
                 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
                         kuid_t euid = current_euid();
-                       err = -EPERM;
                         if (!uid_eq(euid, shp->shm_perm.uid) &&
-                           !uid_eq(euid, shp->shm_perm.cuid))
+                           !uid_eq(euid, shp->shm_perm.cuid)) {
+                               err = -EPERM;
                                 goto out_unlock0;
-                       if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
+                       }
+                       if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
+                               err = -EPERM;
                                 goto out_unlock0;
+                       }
                 }
  
                 shm_file = shp->shm_file;
+
+               /* check if shm_destroy() is tearing down shp */
+               if (shm_file == NULL) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+               }
+
                 if (is_file_hugepages(shm_file))
                         goto out_unlock0;
  
@@ -1101,6 +1114,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
                 goto out_unlock;
  
         ipc_lock_object(&shp->shm_perm);
+
+       /* check if shm_destroy() is tearing down shp */
+       if (shp->shm_file == NULL) {
+               ipc_unlock_object(&shp->shm_perm);
+               err = -EIDRM;
+               goto out_unlock;
+       }
+
         path = shp->shm_file->f_path;
         path_get(&path);
         shp->shm_nattch++;
diff --git a/kernel/Makefile b/kernel/Makefile

index 09a9c94f42bde841a58b875ca7fc75c1b69f65c6..bbaf7d59c1bb14f166441e6532b55585790e4b52 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y)
  obj-y += up.o
  endif
  obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
  obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
+obj-$(CONFIG_MODULE_SIG) += module_signing.o
  obj-$(CONFIG_KALLSYMS) += kallsyms.o
  obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
  obj-$(CONFIG_KEXEC) += kexec.o
@@ -122,19 +123,52 @@ targets += timeconst.h
  $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
         $(call if_changed,bc)
  
-ifeq ($(CONFIG_MODULE_SIG),y)
+###############################################################################
+#
+# Roll all the X.509 certificates that we can find together and pull them into
+# the kernel so that they get loaded into the system trusted keyring during
+# boot.
  #
-# Pull the signing certificate and any extra certificates into the kernel
+# We look in the source root and the build root for all files whose name ends
+# in ".x509".  Unfortunately, this will generate duplicate filenames, so we
+# have make canonicalise the pathnames and then sort them to discard the
+# duplicates.
  #
+###############################################################################
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
+X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509
+X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
+                               $(or $(realpath $(CERT)),$(CERT))))
+
+ifeq ($(X509_CERTIFICATES),)
+$(warning *** No X.509 certificates found ***)
+endif
+
+ifneq ($(wildcard $(obj)/.x509.list),)
+ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
+$(info X.509 certificate list changed)
+$(shell rm $(obj)/.x509.list)
+endif
+endif
+
+kernel/system_certificates.o: $(obj)/x509_certificate_list
  
-quiet_cmd_touch = TOUCH   $@
-      cmd_touch = touch   $@
+quiet_cmd_x509certs  = CERTS   $@
+      cmd_x509certs  = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo "  - Including cert $(X509)")
  
-extra_certificates:
-       $(call cmd,touch)
+targets += $(obj)/x509_certificate_list
+$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
+       $(call if_changed,x509certs)
  
-kernel/modsign_certificate.o: signing_key.x509 extra_certificates
+targets += $(obj)/.x509.list
+$(obj)/.x509.list:
+       @echo $(X509_CERTIFICATES) >$@
  
+clean-files := x509_certificate_list .x509.list
+endif
+
+ifeq ($(CONFIG_MODULE_SIG),y)
  ###############################################################################
  #
  # If module signing is requested, say by allyesconfig, but a key has not been
diff --git a/kernel/audit.c b/kernel/audit.c

index 7b0e23a740ce345987c33f9e012302c24de0f4db..906ae5a0233a1011d558ff47c548808517ef9a03 100644 (file)
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -60,7 +60,6 @@
  #ifdef CONFIG_SECURITY
  #include <linux/security.h>
  #endif
-#include <net/netlink.h>
  #include <linux/freezer.h>
  #include <linux/tty.h>
  #include <linux/pid_namespace.h>
@@ -140,6 +139,17 @@ static struct task_struct *kauditd_task;
  static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
  static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
  
+static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
+                                  .mask = -1,
+                                  .features = 0,
+                                  .lock = 0,};
+
+static char *audit_feature_names[2] = {
+       "only_unset_loginuid",
+       "loginuid_immutable",
+};
+
+
  /* Serialize requests from userspace. */
  DEFINE_MUTEX(audit_cmd_mutex);
  
@@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
                 return -EOPNOTSUPP;
         case AUDIT_GET:
         case AUDIT_SET:
+       case AUDIT_GET_FEATURE:
+       case AUDIT_SET_FEATURE:
         case AUDIT_LIST_RULES:
         case AUDIT_ADD_RULE:
         case AUDIT_DEL_RULE:
@@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
         int rc = 0;
         uid_t uid = from_kuid(&init_user_ns, current_uid());
  
-       if (!audit_enabled) {
+       if (!audit_enabled && msg_type != AUDIT_USER_AVC) {
                 *ab = NULL;
                 return rc;
         }
@@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
         return rc;
  }
  
+int is_audit_feature_set(int i)
+{
+       return af.features & AUDIT_FEATURE_TO_MASK(i);
+}
+
+
+static int audit_get_feature(struct sk_buff *skb)
+{
+       u32 seq;
+
+       seq = nlmsg_hdr(skb)->nlmsg_seq;
+
+       audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+                        &af, sizeof(af));
+
+       return 0;
+}
+
+static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature,
+                                    u32 old_lock, u32 new_lock, int res)
+{
+       struct audit_buffer *ab;
+
+       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+       audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d",
+                        audit_feature_names[which], !!old_feature, !!new_feature,
+                        !!old_lock, !!new_lock, res);
+       audit_log_end(ab);
+}
+
+static int audit_set_feature(struct sk_buff *skb)
+{
+       struct audit_features *uaf;
+       int i;
+
+       BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0]));
+       uaf = nlmsg_data(nlmsg_hdr(skb));
+
+       /* if there is ever a version 2 we should handle that here */
+
+       for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+               u32 feature = AUDIT_FEATURE_TO_MASK(i);
+               u32 old_feature, new_feature, old_lock, new_lock;
+
+               /* if we are not changing this feature, move along */
+               if (!(feature & uaf->mask))
+                       continue;
+
+               old_feature = af.features & feature;
+               new_feature = uaf->features & feature;
+               new_lock = (uaf->lock | af.lock) & feature;
+               old_lock = af.lock & feature;
+
+               /* are we changing a locked feature? */
+               if ((af.lock & feature) && (new_feature != old_feature)) {
+                       audit_log_feature_change(i, old_feature, new_feature,
+                                                old_lock, new_lock, 0);
+                       return -EPERM;
+               }
+       }
+       /* nothing invalid, do the changes */
+       for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+               u32 feature = AUDIT_FEATURE_TO_MASK(i);
+               u32 old_feature, new_feature, old_lock, new_lock;
+
+               /* if we are not changing this feature, move along */
+               if (!(feature & uaf->mask))
+                       continue;
+
+               old_feature = af.features & feature;
+               new_feature = uaf->features & feature;
+               old_lock = af.lock & feature;
+               new_lock = (uaf->lock | af.lock) & feature;
+
+               if (new_feature != old_feature)
+                       audit_log_feature_change(i, old_feature, new_feature,
+                                                old_lock, new_lock, 1);
+
+               if (new_feature)
+                       af.features |= feature;
+               else
+                       af.features &= ~feature;
+               af.lock |= new_lock;
+       }
+
+       return 0;
+}
+
  static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
  {
         u32                     seq;
@@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
  
         switch (msg_type) {
         case AUDIT_GET:
+               memset(&status_set, 0, sizeof(status_set));
                 status_set.enabled       = audit_enabled;
                 status_set.failure       = audit_failure;
                 status_set.pid           = audit_pid;
@@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                  &status_set, sizeof(status_set));
                 break;
         case AUDIT_SET:
-               if (nlh->nlmsg_len < sizeof(struct audit_status))
+               if (nlmsg_len(nlh) < sizeof(struct audit_status))
                         return -EINVAL;
                 status_get   = (struct audit_status *)data;
                 if (status_get->mask & AUDIT_STATUS_ENABLED) {
@@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
                         err = audit_set_backlog_limit(status_get->backlog_limit);
                 break;
+       case AUDIT_GET_FEATURE:
+               err = audit_get_feature(skb);
+               if (err)
+                       return err;
+               break;
+       case AUDIT_SET_FEATURE:
+               err = audit_set_feature(skb);
+               if (err)
+                       return err;
+               break;
         case AUDIT_USER:
         case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
         case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
@@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                         }
                         audit_log_common_recv_msg(&ab, msg_type);
                         if (msg_type != AUDIT_USER_TTY)
-                               audit_log_format(ab, " msg='%.1024s'",
+                               audit_log_format(ab, " msg='%.*s'",
+                                                AUDIT_MESSAGE_TEXT_MAX,
                                                  (char *)data);
                         else {
                                 int size;
@@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                 struct task_struct *tsk = current;
  
                 spin_lock(&tsk->sighand->siglock);
-               s.enabled = tsk->signal->audit_tty != 0;
+               s.enabled = tsk->signal->audit_tty;
                 s.log_passwd = tsk->signal->audit_tty_log_passwd;
                 spin_unlock(&tsk->sighand->siglock);
  
@@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
  
                 memset(&s, 0, sizeof(s));
                 /* guard against past and future API changes */
-               memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+               memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
                 if ((s.enabled != 0 && s.enabled != 1) ||
                     (s.log_passwd != 0 && s.log_passwd != 1))
                         return -EINVAL;
@@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time)
         remove_wait_queue(&audit_backlog_wait, &wait);
  }
  
-/* Obtain an audit buffer.  This routine does locking to obtain the
- * audit buffer, but then no locking is required for calls to
- * audit_log_*format.  If the tsk is a task that is currently in a
- * syscall, then the syscall is marked as auditable and an audit record
- * will be written at syscall exit.  If there is no associated task, tsk
- * should be NULL. */
-
  /**
   * audit_log_start - obtain an audit buffer
   * @ctx: audit_context (may be NULL)
@@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab)
         u32 sessionid = audit_get_sessionid(current);
         uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
  
-       audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
+       audit_log_format(ab, " auid=%u ses=%u", auid, sessionid);
  }
  
  void audit_log_key(struct audit_buffer *ab, char *key)
@@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
                 }
         }
  
+       /* log the audit_names record type */
+       audit_log_format(ab, " nametype=");
+       switch(n->type) {
+       case AUDIT_TYPE_NORMAL:
+               audit_log_format(ab, "NORMAL");
+               break;
+       case AUDIT_TYPE_PARENT:
+               audit_log_format(ab, "PARENT");
+               break;
+       case AUDIT_TYPE_CHILD_DELETE:
+               audit_log_format(ab, "DELETE");
+               break;
+       case AUDIT_TYPE_CHILD_CREATE:
+               audit_log_format(ab, "CREATE");
+               break;
+       default:
+               audit_log_format(ab, "UNKNOWN");
+               break;
+       }
+
         audit_log_fcaps(ab, n);
         audit_log_end(ab);
  }
diff --git a/kernel/audit.h b/kernel/audit.h

index 123c9b7c39795975e2ce18cf913aad6b1fbe5b85..b779642b29af9401fd15246816c376ec86e4fcab 100644 (file)
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -197,6 +197,9 @@ struct audit_context {
                         int                     fd;
                         int                     flags;
                 } mmap;
+               struct {
+                       int                     argc;
+               } execve;
         };
         int fds[2];
  
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c

index f7aee8be7fb286db4a40919bc10ede18f17016dc..51f3fd4c1ed3a71dd1b8cd95e6645221150f5047 100644 (file)
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
         case AUDIT_DEVMINOR:
         case AUDIT_EXIT:
         case AUDIT_SUCCESS:
+       case AUDIT_INODE:
                 /* bit ops are only useful on syscall args */
                 if (f->op == Audit_bitmask || f->op == Audit_bittest)
                         return -EINVAL;
@@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
                 f->lsm_rule = NULL;
  
                 /* Support legacy tests for a valid loginuid */
-               if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) {
+               if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
                         f->type = AUDIT_LOGINUID_SET;
                         f->val = 0;
                 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c

index 9845cb32b60a77c8f6e3a7f1e94c8aec9dc599b2..90594c9f755213232e5282899c8547cd5c61c823 100644 (file)
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -95,13 +95,6 @@ struct audit_aux_data {
  /* Number of target pids per aux struct. */
  #define AUDIT_AUX_PIDS 16
  
-struct audit_aux_data_execve {
-       struct audit_aux_data   d;
-       int argc;
-       int envc;
-       struct mm_struct *mm;
-};
-
  struct audit_aux_data_pids {
         struct audit_aux_data   d;
         pid_t                   target_pid[AUDIT_AUX_PIDS];
@@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps {
         struct audit_cap_data   new_pcap;
  };
  
-struct audit_aux_data_capset {
-       struct audit_aux_data   d;
-       pid_t                   pid;
-       struct audit_cap_data   cap;
-};
-
  struct audit_tree_refs {
         struct audit_tree_refs *next;
         struct audit_chunk *c[31];
@@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk,
                         break;
                 case AUDIT_INODE:
                         if (name)
-                               result = (name->ino == f->val);
+                               result = audit_comparator(name->ino, f->op, f->val);
                         else if (ctx) {
                                 list_for_each_entry(n, &ctx->names_list, list) {
                                         if (audit_comparator(n->ino, f->op, f->val)) {
@@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk)
                 return 0; /* Return if not auditing. */
  
         state = audit_filter_task(tsk, &key);
-       if (state == AUDIT_DISABLED)
+       if (state == AUDIT_DISABLED) {
+               clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
                 return 0;
+       }
  
         if (!(context = audit_alloc_context(state))) {
                 kfree(key);
@@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context,
  }
  
  static void audit_log_execve_info(struct audit_context *context,
-                                 struct audit_buffer **ab,
-                                 struct audit_aux_data_execve *axi)
+                                 struct audit_buffer **ab)
  {
         int i, len;
         size_t len_sent = 0;
         const char __user *p;
         char *buf;
  
-       if (axi->mm != current->mm)
-               return; /* execve failed, no additional info */
-
-       p = (const char __user *)axi->mm->arg_start;
+       p = (const char __user *)current->mm->arg_start;
  
-       audit_log_format(*ab, "argc=%d", axi->argc);
+       audit_log_format(*ab, "argc=%d", context->execve.argc);
  
         /*
          * we need some kernel buffer to hold the userspace args.  Just
@@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context,
                 return;
         }
  
-       for (i = 0; i < axi->argc; i++) {
+       for (i = 0; i < context->execve.argc; i++) {
                 len = audit_log_single_execve_arg(context, ab, i,
                                                   &len_sent, p, buf);
                 if (len <= 0)
@@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic)
                 audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
                                  context->mmap.flags);
                 break; }
+       case AUDIT_EXECVE: {
+               audit_log_execve_info(context, &ab);
+               break; }
         }
         audit_log_end(ab);
  }
@@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
  
                 switch (aux->type) {
  
-               case AUDIT_EXECVE: {
-                       struct audit_aux_data_execve *axi = (void *)aux;
-                       audit_log_execve_info(context, &ab, axi);
-                       break; }
-
                 case AUDIT_BPRM_FCAPS: {
                         struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
                         audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx,
  /* global counter which is incremented every time something logs in */
  static atomic_t session_id = ATOMIC_INIT(0);
  
+static int audit_set_loginuid_perm(kuid_t loginuid)
+{
+       /* if we are unset, we don't need privs */
+       if (!audit_loginuid_set(current))
+               return 0;
+       /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
+       if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
+               return -EPERM;
+       /* it is set, you need permission */
+       if (!capable(CAP_AUDIT_CONTROL))
+               return -EPERM;
+       /* reject if this is not an unset and we don't allow that */
+       if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
+               return -EPERM;
+       return 0;
+}
+
+static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
+                                  unsigned int oldsessionid, unsigned int sessionid,
+                                  int rc)
+{
+       struct audit_buffer *ab;
+       uid_t uid, ologinuid, nloginuid;
+
+       uid = from_kuid(&init_user_ns, task_uid(current));
+       ologinuid = from_kuid(&init_user_ns, koldloginuid);
+       nloginuid = from_kuid(&init_user_ns, kloginuid),
+
+       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+       if (!ab)
+               return;
+       audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old "
+                        "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid,
+                        nloginuid, oldsessionid, sessionid, !rc);
+       audit_log_end(ab);
+}
+
  /**
   * audit_set_loginuid - set current task's audit_context loginuid
   * @loginuid: loginuid value
@@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0);
  int audit_set_loginuid(kuid_t loginuid)
  {
         struct task_struct *task = current;
-       struct audit_context *context = task->audit_context;
-       unsigned int sessionid;
+       unsigned int oldsessionid, sessionid = (unsigned int)-1;
+       kuid_t oldloginuid;
+       int rc;
  
-#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
-       if (audit_loginuid_set(task))
-               return -EPERM;
-#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
-       if (!capable(CAP_AUDIT_CONTROL))
-               return -EPERM;
-#endif  /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+       oldloginuid = audit_get_loginuid(current);
+       oldsessionid = audit_get_sessionid(current);
  
-       sessionid = atomic_inc_return(&session_id);
-       if (context && context->in_syscall) {
-               struct audit_buffer *ab;
+       rc = audit_set_loginuid_perm(loginuid);
+       if (rc)
+               goto out;
+
+       /* are we setting or clearing? */
+       if (uid_valid(loginuid))
+               sessionid = atomic_inc_return(&session_id);
  
-               ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-               if (ab) {
-                       audit_log_format(ab, "login pid=%d uid=%u "
-                               "old auid=%u new auid=%u"
-                               " old ses=%u new ses=%u",
-                               task->pid,
-                               from_kuid(&init_user_ns, task_uid(task)),
-                               from_kuid(&init_user_ns, task->loginuid),
-                               from_kuid(&init_user_ns, loginuid),
-                               task->sessionid, sessionid);
-                       audit_log_end(ab);
-               }
-       }
         task->sessionid = sessionid;
         task->loginuid = loginuid;
-       return 0;
+out:
+       audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
+       return rc;
  }
  
  /**
@@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
         context->ipc.has_perm = 1;
  }
  
-int __audit_bprm(struct linux_binprm *bprm)
+void __audit_bprm(struct linux_binprm *bprm)
  {
-       struct audit_aux_data_execve *ax;
         struct audit_context *context = current->audit_context;
  
-       ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-       if (!ax)
-               return -ENOMEM;
-
-       ax->argc = bprm->argc;
-       ax->envc = bprm->envc;
-       ax->mm = bprm->mm;
-       ax->d.type = AUDIT_EXECVE;
-       ax->d.next = context->aux;
-       context->aux = (void *)ax;
-       return 0;
+       context->type = AUDIT_EXECVE;
+       context->execve.argc = bprm->argc;
  }
  
  
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index e0839bcd48c8c2fdce9c13a3bfeba35a8e5f0e75..4c62513fe19fc8c4b938aeb8b3acb5158da2f976 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -895,11 +895,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
         iput(inode);
  }
  
-static int cgroup_delete(const struct dentry *d)
-{
-       return 1;
-}
-
  static void remove_dir(struct dentry *d)
  {
         struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1481,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
  {
         static const struct dentry_operations cgroup_dops = {
                 .d_iput = cgroup_diput,
-               .d_delete = cgroup_delete,
+               .d_delete = always_delete_dentry,
         };
  
         struct inode *inode =
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S

deleted file mode 100644 (file)

index 4a9a86d..0000000
--- a/kernel/modsign_certificate.S
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <linux/export.h>
-
-#define GLOBAL(name)   \
-       .globl VMLINUX_SYMBOL(name);    \
-       VMLINUX_SYMBOL(name):
-
-       .section ".init.data","aw"
-
-GLOBAL(modsign_certificate_list)
-       .incbin "signing_key.x509"
-       .incbin "extra_certificates"
-GLOBAL(modsign_certificate_list_end)
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c

deleted file mode 100644 (file)

index 7cbd450..0000000
--- a/kernel/modsign_pubkey.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Public keys for module signature verification
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cred.h>
-#include <linux/err.h>
-#include <keys/asymmetric-type.h>
-#include "module-internal.h"
-
-struct key *modsign_keyring;
-
-extern __initconst const u8 modsign_certificate_list[];
-extern __initconst const u8 modsign_certificate_list_end[];
-
-/*
- * We need to make sure ccache doesn't cache the .o file as it doesn't notice
- * if modsign.pub changes.
- */
-static __initconst const char annoy_ccache[] = __TIME__ "foo";
-
-/*
- * Load the compiled-in keys
- */
-static __init int module_verify_init(void)
-{
-       pr_notice("Initialise module verification\n");
-
-       modsign_keyring = keyring_alloc(".module_sign",
-                                       KUIDT_INIT(0), KGIDT_INIT(0),
-                                       current_cred(),
-                                       ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                        KEY_USR_VIEW | KEY_USR_READ),
-                                       KEY_ALLOC_NOT_IN_QUOTA, NULL);
-       if (IS_ERR(modsign_keyring))
-               panic("Can't allocate module signing keyring\n");
-
-       return 0;
-}
-
-/*
- * Must be initialised before we try and load the keys into the keyring.
- */
-device_initcall(module_verify_init);
-
-/*
- * Load the compiled-in keys
- */
-static __init int load_module_signing_keys(void)
-{
-       key_ref_t key;
-       const u8 *p, *end;
-       size_t plen;
-
-       pr_notice("Loading module verification certificates\n");
-
-       end = modsign_certificate_list_end;
-       p = modsign_certificate_list;
-       while (p < end) {
-               /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
-                * than 256 bytes in size.
-                */
-               if (end - p < 4)
-                       goto dodgy_cert;
-               if (p[0] != 0x30 &&
-                   p[1] != 0x82)
-                       goto dodgy_cert;
-               plen = (p[2] << 8) | p[3];
-               plen += 4;
-               if (plen > end - p)
-                       goto dodgy_cert;
-
-               key = key_create_or_update(make_key_ref(modsign_keyring, 1),
-                                          "asymmetric",
-                                          NULL,
-                                          p,
-                                          plen,
-                                          (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                          KEY_USR_VIEW,
-                                          KEY_ALLOC_NOT_IN_QUOTA);
-               if (IS_ERR(key))
-                       pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n",
-                              PTR_ERR(key));
-               else
-                       pr_notice("MODSIGN: Loaded cert '%s'\n",
-                                 key_ref_to_ptr(key)->description);
-               p += plen;
-       }
-
-       return 0;
-
-dodgy_cert:
-       pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n");
-       return 0;
-}
-late_initcall(load_module_signing_keys);
diff --git a/kernel/module-internal.h b/kernel/module-internal.h

index 24f9247b7d0214d4b3755bb4396f14d9ef712753..915e123a430fbb0cb6ea9197b90ab6ec3684a191 100644 (file)
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -9,6 +9,4 @@
   * 2 of the Licence, or (at your option) any later version.
   */
  
-extern struct key *modsign_keyring;
-
  extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
diff --git a/kernel/module_signing.c b/kernel/module_signing.c

index f2970bddc5ea6224b8c0357970a543c90ac11da0..be5b8fac4bd0de72aba1f91674a2d0eb7a296d31 100644 (file)
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -14,6 +14,7 @@
  #include <crypto/public_key.h>
  #include <crypto/hash.h>
  #include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
  #include "module-internal.h"
  
  /*
@@ -28,7 +29,7 @@
   */
  struct module_signature {
         u8      algo;           /* Public-key crypto algorithm [enum pkey_algo] */
-       u8      hash;           /* Digest algorithm [enum pkey_hash_algo] */
+       u8      hash;           /* Digest algorithm [enum hash_algo] */
         u8      id_type;        /* Key identifier type [enum pkey_id_type] */
         u8      signer_len;     /* Length of signer's name */
         u8      key_id_len;     /* Length of key identifier */
@@ -39,7 +40,7 @@ struct module_signature {
  /*
   * Digest the module contents.
   */
-static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
+static struct public_key_signature *mod_make_digest(enum hash_algo hash,
                                                     const void *mod,
                                                     unsigned long modlen)
  {
@@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
         /* Allocate the hashing algorithm we're going to need and find out how
          * big the hash operational data will be.
          */
-       tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0);
+       tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
         if (IS_ERR(tfm))
                 return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
  
@@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
  
         pr_debug("Look up: \"%s\"\n", id);
  
-       key = keyring_search(make_key_ref(modsign_keyring, 1),
+       key = keyring_search(make_key_ref(system_trusted_keyring, 1),
                              &key_type_asymmetric, id);
         if (IS_ERR(key))
                 pr_warn("Request for unknown module key '%s' err %ld\n",
@@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
                 return -ENOPKG;
  
         if (ms.hash >= PKEY_HASH__LAST ||
-           !pkey_hash_algo[ms.hash])
+           !hash_algo_name[ms.hash])
                 return -ENOPKG;
  
         key = request_asymmetric_key(sig, ms.signer_len,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c

index 10c22cae83a035e43eb54a79272fd1e6ae2fc757..b38109e204aff8e83afb0e15484cdfec8e450575 100644 (file)
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -792,7 +792,8 @@ void free_basic_memory_bitmaps(void)
  {
         struct memory_bitmap *bm1, *bm2;
  
-       BUG_ON(!(forbidden_pages_map && free_pages_map));
+       if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
+               return;
  
         bm1 = forbidden_pages_map;
         bm2 = free_pages_map;
diff --git a/kernel/power/user.c b/kernel/power/user.c

index 24850270c8024948d60c0f827457b18d2de2d9a7..98d357584cd6bad7bdc87cbc63e5c296a8393b60 100644 (file)
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -70,6 +70,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                 data->swap = swsusp_resume_device ?
                         swap_type_of(swsusp_resume_device, 0, NULL) : -1;
                 data->mode = O_RDONLY;
+               data->free_bitmaps = false;
                 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
                 if (error)
                         pm_notifier_call_chain(PM_POST_HIBERNATION);
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S

new file mode 100644 (file)

index 0000000..4aef390
--- /dev/null
+++ b/kernel/system_certificates.S
@@ -0,0 +1,10 @@
+#include <linux/export.h>
+#include <linux/init.h>
+
+       __INITRODATA
+
+       .globl VMLINUX_SYMBOL(system_certificate_list)
+VMLINUX_SYMBOL(system_certificate_list):
+       .incbin "kernel/x509_certificate_list"
+       .globl VMLINUX_SYMBOL(system_certificate_list_end)
+VMLINUX_SYMBOL(system_certificate_list_end):
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c

new file mode 100644 (file)

index 0000000..564dd93
--- /dev/null
+++ b/kernel/system_keyring.c
@@ -0,0 +1,105 @@
+/* System trusted keyring for trusted public keys
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "module-internal.h"
+
+struct key *system_trusted_keyring;
+EXPORT_SYMBOL_GPL(system_trusted_keyring);
+
+extern __initconst const u8 system_certificate_list[];
+extern __initconst const u8 system_certificate_list_end[];
+
+/*
+ * Load the compiled-in keys
+ */
+static __init int system_trusted_keyring_init(void)
+{
+       pr_notice("Initialise system trusted keyring\n");
+
+       system_trusted_keyring =
+               keyring_alloc(".system_keyring",
+                             KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+                             ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                             KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+                             KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(system_trusted_keyring))
+               panic("Can't allocate system trusted keyring\n");
+
+       set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
+       return 0;
+}
+
+/*
+ * Must be initialised before we try and load the keys into the keyring.
+ */
+device_initcall(system_trusted_keyring_init);
+
+/*
+ * Load the compiled-in list of X.509 certificates.
+ */
+static __init int load_system_certificate_list(void)
+{
+       key_ref_t key;
+       const u8 *p, *end;
+       size_t plen;
+
+       pr_notice("Loading compiled-in X.509 certificates\n");
+
+       end = system_certificate_list_end;
+       p = system_certificate_list;
+       while (p < end) {
+               /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+                * than 256 bytes in size.
+                */
+               if (end - p < 4)
+                       goto dodgy_cert;
+               if (p[0] != 0x30 &&
+                   p[1] != 0x82)
+                       goto dodgy_cert;
+               plen = (p[2] << 8) | p[3];
+               plen += 4;
+               if (plen > end - p)
+                       goto dodgy_cert;
+
+               key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
+                                          "asymmetric",
+                                          NULL,
+                                          p,
+                                          plen,
+                                          ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                          KEY_USR_VIEW | KEY_USR_READ),
+                                          KEY_ALLOC_NOT_IN_QUOTA |
+                                          KEY_ALLOC_TRUSTED);
+               if (IS_ERR(key)) {
+                       pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+                              PTR_ERR(key));
+               } else {
+                       pr_notice("Loaded X.509 cert '%s'\n",
+                                 key_ref_to_ptr(key)->description);
+                       key_ref_put(key);
+               }
+               p += plen;
+       }
+
+       return 0;
+
+dodgy_cert:
+       pr_err("Problem parsing in-kernel X.509 certificate list\n");
+       return 0;
+}
+late_initcall(load_system_certificate_list);
diff --git a/kernel/user.c b/kernel/user.c

index 5bbb91988e69278f2cd012896db29688c9234476..a3a0dbfda32957616f143ae2722541a5846c0a62 100644 (file)
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,10 @@ struct user_namespace init_user_ns = {
         .owner = GLOBAL_ROOT_UID,
         .group = GLOBAL_ROOT_GID,
         .proc_inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_KEYS_KERBEROS_CACHE
+       .krb_cache_register_sem =
+       __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
+#endif
  };
  EXPORT_SYMBOL_GPL(init_user_ns);
  
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

index 13fb1134ba582e49c8aa3643feada72a2b0dae8b..240fb62cf3945aa0f7b601b343db65312a42f345 100644 (file)
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -101,6 +101,9 @@ int create_user_ns(struct cred *new)
  
         set_cred_user_ns(new, ns);
  
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       init_rwsem(&ns->persistent_keyring_register_sem);
+#endif
         return 0;
  }
  
@@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns)
  
         do {
                 parent = ns->parent;
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+               key_put(ns->persistent_keyring_register);
+#endif
                 proc_free_inum(ns->proc_inum);
                 kmem_cache_free(user_ns_cachep, ns);
                 ns = parent;
diff --git a/lib/Kconfig b/lib/Kconfig

index 06dc74200a5159c5b5458c36e2b71297e638a194..991c98bc4a3f51e9e7f377274084bec909483ea9 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
  config BTREE
         boolean
  
+config ASSOCIATIVE_ARRAY
+       bool
+       help
+         Generic associative array.  Can be searched and iterated over whilst
+         it is being modified.  It is also reasonably quick to search and
+         modify.  The algorithms are non-recursive, and the trees are highly
+         capacious.
+
+         See:
+
+               Documentation/assoc_array.txt
+
+         for more information.
+
  config HAS_IOMEM
         boolean
         depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile

index d480a8c9238562b144e59217cdf4ecc05b574556..b46065fd67a4741dadeabe1ced0e67ed6269b741 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
  obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
  
  obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
  obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
  obj-$(CONFIG_DEBUG_LIST) += list_debug.o
  obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c

new file mode 100644 (file)

index 0000000..17edeaf
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1746 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/assoc_array_priv.h>
+
+/*
+ * Iterate over an associative array.  The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+                                      const struct assoc_array_ptr *stop,
+                                      int (*iterator)(const void *leaf,
+                                                      void *iterator_data),
+                                      void *iterator_data)
+{
+       const struct assoc_array_shortcut *shortcut;
+       const struct assoc_array_node *node;
+       const struct assoc_array_ptr *cursor, *ptr, *parent;
+       unsigned long has_meta;
+       int slot, ret;
+
+       cursor = root;
+
+begin_node:
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               /* Descend through a shortcut */
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               smp_read_barrier_depends();
+               cursor = ACCESS_ONCE(shortcut->next_node);
+       }
+
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+       slot = 0;
+
+       /* We perform two passes of each node.
+        *
+        * The first pass does all the leaves in this node.  This means we
+        * don't miss any leaves if the node is split up by insertion whilst
+        * we're iterating over the branches rooted here (we may, however, see
+        * some leaves twice).
+        */
+       has_meta = 0;
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               has_meta |= (unsigned long)ptr;
+               if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                       /* We need a barrier between the read of the pointer
+                        * and dereferencing the pointer - but only if we are
+                        * actually going to dereference it.
+                        */
+                       smp_read_barrier_depends();
+
+                       /* Invoke the callback */
+                       ret = iterator(assoc_array_ptr_to_leaf(ptr),
+                                      iterator_data);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       /* The second pass attends to all the metadata pointers.  If we follow
+        * one of these we may find that we don't come back here, but rather go
+        * back to a replacement node with the leaves in a different layout.
+        *
+        * We are guaranteed to make progress, however, as the slot number for
+        * a particular portion of the key space cannot change - and we
+        * continue at the back pointer + 1.
+        */
+       if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+               goto finished_node;
+       slot = 0;
+
+continue_node:
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       cursor = ptr;
+                       goto begin_node;
+               }
+       }
+
+finished_node:
+       /* Move up to the parent (may need to skip back over a shortcut) */
+       parent = ACCESS_ONCE(node->back_pointer);
+       slot = node->parent_slot;
+       if (parent == stop)
+               return 0;
+
+       if (assoc_array_ptr_is_shortcut(parent)) {
+               shortcut = assoc_array_ptr_to_shortcut(parent);
+               smp_read_barrier_depends();
+               cursor = parent;
+               parent = ACCESS_ONCE(shortcut->back_pointer);
+               slot = shortcut->parent_slot;
+               if (parent == stop)
+                       return 0;
+       }
+
+       /* Ascend to next slot in parent node */
+       cursor = parent;
+       slot++;
+       goto continue_node;
+}
+
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array.  Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once.  If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called.  Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+                       int (*iterator)(const void *object,
+                                       void *iterator_data),
+                       void *iterator_data)
+{
+       struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+
+       if (!root)
+               return 0;
+       return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+
+enum assoc_array_walk_status {
+       assoc_array_walk_tree_empty,
+       assoc_array_walk_found_terminal_node,
+       assoc_array_walk_found_wrong_shortcut,
+} status;
+
+struct assoc_array_walk_result {
+       struct {
+               struct assoc_array_node *node;  /* Node in which leaf might be found */
+               int             level;
+               int             slot;
+       } terminal_node;
+       struct {
+               struct assoc_array_shortcut *shortcut;
+               int             level;
+               int             sc_level;
+               unsigned long   sc_segments;
+               unsigned long   dissimilarity;
+       } wrong_shortcut;
+};
+
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+                const struct assoc_array_ops *ops,
+                const void *index_key,
+                struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *cursor, *ptr;
+       unsigned long sc_segments, dissimilarity;
+       unsigned long segments;
+       int level, sc_level, next_sc_level;
+       int slot;
+
+       pr_devel("-->%s()\n", __func__);
+
+       cursor = ACCESS_ONCE(array->root);
+       if (!cursor)
+               return assoc_array_walk_tree_empty;
+
+       level = 0;
+
+       /* Use segments from the key for the new leaf to navigate through the
+        * internal tree, skipping through nodes and shortcuts that are on
+        * route to the destination.  Eventually we'll come to a slot that is
+        * either empty or contains a leaf at which point we've found a node in
+        * which the leaf we're looking for might be found or into which it
+        * should be inserted.
+        */
+jumped:
+       segments = ops->get_key_chunk(index_key, level);
+       pr_devel("segments[%d]: %lx\n", level, segments);
+
+       if (assoc_array_ptr_is_shortcut(cursor))
+               goto follow_shortcut;
+
+consider_node:
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+
+       slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       slot &= ASSOC_ARRAY_FAN_MASK;
+       ptr = ACCESS_ONCE(node->slots[slot]);
+
+       pr_devel("consider slot %x [ix=%d type=%lu]\n",
+                slot, level, (unsigned long)ptr & 3);
+
+       if (!assoc_array_ptr_is_meta(ptr)) {
+               /* The node doesn't have a node/shortcut pointer in the slot
+                * corresponding to the index key that we have to follow.
+                */
+               result->terminal_node.node = node;
+               result->terminal_node.level = level;
+               result->terminal_node.slot = slot;
+               pr_devel("<--%s() = terminal_node\n", __func__);
+               return assoc_array_walk_found_terminal_node;
+       }
+
+       if (assoc_array_ptr_is_node(ptr)) {
+               /* There is a pointer to a node in the slot corresponding to
+                * this index key segment, so we need to follow it.
+                */
+               cursor = ptr;
+               level += ASSOC_ARRAY_LEVEL_STEP;
+               if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+                       goto consider_node;
+               goto jumped;
+       }
+
+       /* There is a shortcut in the slot corresponding to the index key
+        * segment.  We follow the shortcut if its partial index key matches
+        * this leaf's.  Otherwise we need to split the shortcut.
+        */
+       cursor = ptr;
+follow_shortcut:
+       shortcut = assoc_array_ptr_to_shortcut(cursor);
+       smp_read_barrier_depends();
+       pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+       sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+       BUG_ON(sc_level > shortcut->skip_to_level);
+
+       do {
+               /* Check the leaf against the shortcut's index key a word at a
+                * time, trimming the final word (the shortcut stores the index
+                * key completely from the root to the shortcut's target).
+                */
+               if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+                       segments = ops->get_key_chunk(index_key, sc_level);
+
+               sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+               dissimilarity = segments ^ sc_segments;
+
+               if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+                       /* Trim segments that are beyond the shortcut */
+                       int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+                       dissimilarity &= ~(ULONG_MAX << shift);
+                       next_sc_level = shortcut->skip_to_level;
+               } else {
+                       next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+                       next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               }
+
+               if (dissimilarity != 0) {
+                       /* This shortcut points elsewhere */
+                       result->wrong_shortcut.shortcut = shortcut;
+                       result->wrong_shortcut.level = level;
+                       result->wrong_shortcut.sc_level = sc_level;
+                       result->wrong_shortcut.sc_segments = sc_segments;
+                       result->wrong_shortcut.dissimilarity = dissimilarity;
+                       return assoc_array_walk_found_wrong_shortcut;
+               }
+
+               sc_level = next_sc_level;
+       } while (sc_level < shortcut->skip_to_level);
+
+       /* The shortcut matches the leaf's index to this point. */
+       cursor = ACCESS_ONCE(shortcut->next_node);
+       if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+               level = sc_level;
+               goto jumped;
+       } else {
+               level = sc_level;
+               goto consider_node;
+       }
+}
+
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there.  NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+                      const struct assoc_array_ops *ops,
+                      const void *index_key)
+{
+       struct assoc_array_walk_result result;
+       const struct assoc_array_node *node;
+       const struct assoc_array_ptr *ptr;
+       const void *leaf;
+       int slot;
+
+       if (assoc_array_walk(array, ops, index_key, &result) !=
+           assoc_array_walk_found_terminal_node)
+               return NULL;
+
+       node = result.terminal_node.node;
+       smp_read_barrier_depends();
+
+       /* If the target key is available to us, it's has to be pointed to by
+        * the terminal node.
+        */
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                       /* We need a barrier between the read of the pointer
+                        * and dereferencing the pointer - but only if we are
+                        * actually going to dereference it.
+                        */
+                       leaf = assoc_array_ptr_to_leaf(ptr);
+                       smp_read_barrier_depends();
+                       if (ops->compare_object(leaf, index_key))
+                               return (void *)leaf;
+               }
+       }
+
+       return NULL;
+}
+
+/*
+ * Destructively iterate over an associative array.  The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+                                       const struct assoc_array_ops *ops)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *cursor, *parent = NULL;
+       int slot = -1;
+
+       pr_devel("-->%s()\n", __func__);
+
+       cursor = root;
+       if (!cursor) {
+               pr_devel("empty\n");
+               return;
+       }
+
+move_to_meta:
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               /* Descend through a shortcut */
+               pr_devel("[%d] shortcut\n", slot);
+               BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               BUG_ON(shortcut->back_pointer != parent);
+               BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+               parent = cursor;
+               cursor = shortcut->next_node;
+               slot = -1;
+               BUG_ON(!assoc_array_ptr_is_node(cursor));
+       }
+
+       pr_devel("[%d] node\n", slot);
+       node = assoc_array_ptr_to_node(cursor);
+       BUG_ON(node->back_pointer != parent);
+       BUG_ON(slot != -1 && node->parent_slot != slot);
+       slot = 0;
+
+continue_node:
+       pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               struct assoc_array_ptr *ptr = node->slots[slot];
+               if (!ptr)
+                       continue;
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       parent = cursor;
+                       cursor = ptr;
+                       goto move_to_meta;
+               }
+
+               if (ops) {
+                       pr_devel("[%d] free leaf\n", slot);
+                       ops->free_object(assoc_array_ptr_to_leaf(ptr));
+               }
+       }
+
+       parent = node->back_pointer;
+       slot = node->parent_slot;
+       pr_devel("free node\n");
+       kfree(node);
+       if (!parent)
+               return; /* Done */
+
+       /* Move back up to the parent (may need to free a shortcut on
+        * the way up) */
+       if (assoc_array_ptr_is_shortcut(parent)) {
+               shortcut = assoc_array_ptr_to_shortcut(parent);
+               BUG_ON(shortcut->next_node != cursor);
+               cursor = parent;
+               parent = shortcut->back_pointer;
+               slot = shortcut->parent_slot;
+               pr_devel("free shortcut\n");
+               kfree(shortcut);
+               if (!parent)
+                       return;
+
+               BUG_ON(!assoc_array_ptr_is_node(parent));
+       }
+
+       /* Ascend to next slot in parent node */
+       pr_devel("ascend to %p[%d]\n", parent, slot);
+       cursor = parent;
+       node = assoc_array_ptr_to_node(cursor);
+       slot++;
+       goto continue_node;
+}
+
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array.  The
+ * array will be empty and ready to use again upon completion.  This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue.  On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+                        const struct assoc_array_ops *ops)
+{
+       assoc_array_destroy_subtree(array->root, ops);
+       array->root = NULL;
+}
+
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+       struct assoc_array_node *new_n0;
+
+       pr_devel("-->%s()\n", __func__);
+
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       edit->leaf_p = &new_n0->slots[0];
+       edit->adjust_count_on = new_n0;
+       edit->set[0].ptr = &edit->array->root;
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+
+       pr_devel("<--%s() = ok [no root]\n", __func__);
+       return true;
+}
+
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+                                                 const struct assoc_array_ops *ops,
+                                                 const void *index_key,
+                                                 struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut, *new_s0;
+       struct assoc_array_node *node, *new_n0, *new_n1, *side;
+       struct assoc_array_ptr *ptr;
+       unsigned long dissimilarity, base_seg, blank;
+       size_t keylen;
+       bool have_meta;
+       int level, diff;
+       int slot, next_slot, free_slot, i, j;
+
+       node    = result->terminal_node.node;
+       level   = result->terminal_node.level;
+       edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* We arrived at a node which doesn't have an onward node or shortcut
+        * pointer that we have to follow.  This means that (a) the leaf we
+        * want must go here (either by insertion or replacement) or (b) we
+        * need to split this node and insert in one of the fragments.
+        */
+       free_slot = -1;
+
+       /* Firstly, we have to check the leaves in this node to see if there's
+        * a matching one we should replace in place.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               if (!ptr) {
+                       free_slot = i;
+                       continue;
+               }
+               if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+                       pr_devel("replace in slot %d\n", i);
+                       edit->leaf_p = &node->slots[i];
+                       edit->dead_leaf = node->slots[i];
+                       pr_devel("<--%s() = ok [replace]\n", __func__);
+                       return true;
+               }
+       }
+
+       /* If there is a free slot in this node then we can just insert the
+        * leaf here.
+        */
+       if (free_slot >= 0) {
+               pr_devel("insert in free slot %d\n", free_slot);
+               edit->leaf_p = &node->slots[free_slot];
+               edit->adjust_count_on = node;
+               pr_devel("<--%s() = ok [insert]\n", __func__);
+               return true;
+       }
+
+       /* The node has no spare slots - so we're either going to have to split
+        * it or insert another node before it.
+        *
+        * Whatever, we're going to need at least two new nodes - so allocate
+        * those now.  We may also need a new shortcut, but we deal with that
+        * when we need it.
+        */
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n1)
+               return false;
+       edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+
+       /* We need to find out how similar the leaves are. */
+       pr_devel("no spare slots\n");
+       have_meta = false;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       edit->segment_cache[i] = 0xff;
+                       have_meta = true;
+                       continue;
+               }
+               base_seg = ops->get_object_key_chunk(
+                       assoc_array_ptr_to_leaf(ptr), level);
+               base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+               edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       }
+
+       if (have_meta) {
+               pr_devel("have meta\n");
+               goto split_node;
+       }
+
+       /* The node contains only leaves */
+       dissimilarity = 0;
+       base_seg = edit->segment_cache[0];
+       for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+               dissimilarity |= edit->segment_cache[i] ^ base_seg;
+
+       pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+
+       if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+               /* The old leaves all cluster in the same slot.  We will need
+                * to insert a shortcut if the new node wants to cluster with them.
+                */
+               if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+                       goto all_leaves_cluster_together;
+
+               /* Otherwise we can just insert a new node ahead of the old
+                * one.
+                */
+               goto present_leaves_cluster_but_not_new_leaf;
+       }
+
+split_node:
+       pr_devel("split node\n");
+
+       /* We need to split the current node; we know that the node doesn't
+        * simply contain a full set of leaves that cluster together (it
+        * contains meta pointers and/or non-clustering leaves).
+        *
+        * We need to expel at least two leaves out of a set consisting of the
+        * leaves in the node and the new leaf.
+        *
+        * We need a new node (n0) to replace the current one and a new node to
+        * take the expelled nodes (n1).
+        */
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+       new_n0->back_pointer = node->back_pointer;
+       new_n0->parent_slot = node->parent_slot;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = -1; /* Need to calculate this */
+
+do_split_node:
+       pr_devel("do_split_node\n");
+
+       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       new_n1->nr_leaves_on_branch = 0;
+
+       /* Begin by finding two matching leaves.  There have to be at least two
+        * that match - even if there are meta pointers - because any leaf that
+        * would match a slot with a meta pointer in it must be somewhere
+        * behind that meta pointer and cannot be here.  Further, given N
+        * remaining leaf slots, we now have N+1 leaves to go in them.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               slot = edit->segment_cache[i];
+               if (slot != 0xff)
+                       for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+                               if (edit->segment_cache[j] == slot)
+                                       goto found_slot_for_multiple_occupancy;
+       }
+found_slot_for_multiple_occupancy:
+       pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+       BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+       BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+       BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+
+       new_n1->parent_slot = slot;
+
+       /* Metadata pointers cannot change slot */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+               if (assoc_array_ptr_is_meta(node->slots[i]))
+                       new_n0->slots[i] = node->slots[i];
+               else
+                       new_n0->slots[i] = NULL;
+       BUG_ON(new_n0->slots[slot] != NULL);
+       new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+
+       /* Filter the leaf pointers between the new nodes */
+       free_slot = -1;
+       next_slot = 0;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               if (assoc_array_ptr_is_meta(node->slots[i]))
+                       continue;
+               if (edit->segment_cache[i] == slot) {
+                       new_n1->slots[next_slot++] = node->slots[i];
+                       new_n1->nr_leaves_on_branch++;
+               } else {
+                       do {
+                               free_slot++;
+                       } while (new_n0->slots[free_slot] != NULL);
+                       new_n0->slots[free_slot] = node->slots[i];
+               }
+       }
+
+       pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+
+       if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+               do {
+                       free_slot++;
+               } while (new_n0->slots[free_slot] != NULL);
+               edit->leaf_p = &new_n0->slots[free_slot];
+               edit->adjust_count_on = new_n0;
+       } else {
+               edit->leaf_p = &new_n1->slots[next_slot++];
+               edit->adjust_count_on = new_n1;
+       }
+
+       BUG_ON(next_slot <= 1);
+
+       edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               if (edit->segment_cache[i] == 0xff) {
+                       ptr = node->slots[i];
+                       BUG_ON(assoc_array_ptr_is_leaf(ptr));
+                       if (assoc_array_ptr_is_node(ptr)) {
+                               side = assoc_array_ptr_to_node(ptr);
+                               edit->set_backpointers[i] = &side->back_pointer;
+                       } else {
+                               shortcut = assoc_array_ptr_to_shortcut(ptr);
+                               edit->set_backpointers[i] = &shortcut->back_pointer;
+                       }
+               }
+       }
+
+       ptr = node->back_pointer;
+       if (!ptr)
+               edit->set[0].ptr = &edit->array->root;
+       else if (assoc_array_ptr_is_node(ptr))
+               edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+       else
+               edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+       edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+       pr_devel("<--%s() = ok [split node]\n", __func__);
+       return true;
+
+present_leaves_cluster_but_not_new_leaf:
+       /* All the old leaves cluster in the same slot, but the new leaf wants
+        * to go into a different slot, so we create a new node to hold the new
+        * leaf and a pointer to a new node holding all the old leaves.
+        */
+       pr_devel("present leaves cluster but not new leaf\n");
+
+       new_n0->back_pointer = node->back_pointer;
+       new_n0->parent_slot = node->parent_slot;
+       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = edit->segment_cache[0];
+       new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       edit->adjust_count_on = new_n0;
+
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+               new_n1->slots[i] = node->slots[i];
+
+       new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+       edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+
+       edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+       edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+       pr_devel("<--%s() = ok [insert node before]\n", __func__);
+       return true;
+
+all_leaves_cluster_together:
+       /* All the leaves, new and old, want to cluster together in this node
+        * in the same slot, so we have to replace this node with a shortcut to
+        * skip over the identical parts of the key and then place a pair of
+        * nodes, one inside the other, at the end of the shortcut and
+        * distribute the keys between them.
+        *
+        * Firstly we need to work out where the leaves start diverging as a
+        * bit position into their keys so that we know how big the shortcut
+        * needs to be.
+        *
+        * We only need to make a single pass of N of the N+1 leaves because if
+        * any keys differ between themselves at bit X then at least one of
+        * them must also differ with the base key at bit X or before.
+        */
+       pr_devel("all leaves cluster together\n");
+       diff = INT_MAX;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+                                         assoc_array_ptr_to_leaf(node->slots[i]));
+               if (x < diff) {
+                       BUG_ON(x < 0);
+                       diff = x;
+               }
+       }
+       BUG_ON(diff == INT_MAX);
+       BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+
+       keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+       keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+       new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                        keylen * sizeof(unsigned long), GFP_KERNEL);
+       if (!new_s0)
+               return false;
+       edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+
+       edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+       new_s0->back_pointer = node->back_pointer;
+       new_s0->parent_slot = node->parent_slot;
+       new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+       new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+       new_n0->parent_slot = 0;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = -1; /* Need to calculate this */
+
+       new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+       pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+       BUG_ON(level <= 0);
+
+       for (i = 0; i < keylen; i++)
+               new_s0->index_key[i] =
+                       ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+
+       blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+       new_s0->index_key[keylen - 1] &= ~blank;
+
+       /* This now reduces to a node splitting exercise for which we'll need
+        * to regenerate the disparity table.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+                                                    level);
+               base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+               edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       }
+
+       base_seg = ops->get_key_chunk(index_key, level);
+       base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+       edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       goto do_split_node;
+}
+
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+                                           const struct assoc_array_ops *ops,
+                                           struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+       struct assoc_array_node *node, *new_n0, *side;
+       unsigned long sc_segments, dissimilarity, blank;
+       size_t keylen;
+       int level, sc_level, diff;
+       int sc_slot;
+
+       shortcut        = result->wrong_shortcut.shortcut;
+       level           = result->wrong_shortcut.level;
+       sc_level        = result->wrong_shortcut.sc_level;
+       sc_segments     = result->wrong_shortcut.sc_segments;
+       dissimilarity   = result->wrong_shortcut.dissimilarity;
+
+       pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+                __func__, level, dissimilarity, sc_level);
+
+       /* We need to split a shortcut and insert a node between the two
+        * pieces.  Zero-length pieces will be dispensed with entirely.
+        *
+        * First of all, we need to find out in which level the first
+        * difference was.
+        */
+       diff = __ffs(dissimilarity);
+       diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+       diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+       pr_devel("diff=%d\n", diff);
+
+       if (!shortcut->back_pointer) {
+               edit->set[0].ptr = &edit->array->root;
+       } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+               node = assoc_array_ptr_to_node(shortcut->back_pointer);
+               edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+       } else {
+               BUG();
+       }
+
+       edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+
+       /* Create a new node now since we're going to need it anyway */
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       edit->adjust_count_on = new_n0;
+
+       /* Insert a new shortcut before the new node if this segment isn't of
+        * zero length - otherwise we just connect the new node directly to the
+        * parent.
+        */
+       level += ASSOC_ARRAY_LEVEL_STEP;
+       if (diff > level) {
+               pr_devel("pre-shortcut %d...%d\n", level, diff);
+               keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+               new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s0)
+                       return false;
+               edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+               edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+               new_s0->back_pointer = shortcut->back_pointer;
+               new_s0->parent_slot = shortcut->parent_slot;
+               new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+               new_s0->skip_to_level = diff;
+
+               new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+               new_n0->parent_slot = 0;
+
+               memcpy(new_s0->index_key, shortcut->index_key,
+                      keylen * sizeof(unsigned long));
+
+               blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+               pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+               new_s0->index_key[keylen - 1] &= ~blank;
+       } else {
+               pr_devel("no pre-shortcut\n");
+               edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+               new_n0->back_pointer = shortcut->back_pointer;
+               new_n0->parent_slot = shortcut->parent_slot;
+       }
+
+       side = assoc_array_ptr_to_node(shortcut->next_node);
+       new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+
+       /* We need to know which slot in the new node is going to take a
+        * metadata pointer.
+        */
+       sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       sc_slot &= ASSOC_ARRAY_FAN_MASK;
+
+       pr_devel("new slot %lx >> %d -> %d\n",
+                sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+
+       /* Determine whether we need to follow the new node with a replacement
+        * for the current shortcut.  We could in theory reuse the current
+        * shortcut if its parent slot number doesn't change - but that's a
+        * 1-in-16 chance so not worth expending the code upon.
+        */
+       level = diff + ASSOC_ARRAY_LEVEL_STEP;
+       if (level < shortcut->skip_to_level) {
+               pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+               keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+               new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s1)
+                       return false;
+               edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+
+               new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+               new_s1->parent_slot = sc_slot;
+               new_s1->next_node = shortcut->next_node;
+               new_s1->skip_to_level = shortcut->skip_to_level;
+
+               new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+
+               memcpy(new_s1->index_key, shortcut->index_key,
+                      keylen * sizeof(unsigned long));
+
+               edit->set[1].ptr = &side->back_pointer;
+               edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+       } else {
+               pr_devel("no post-shortcut\n");
+
+               /* We don't have to replace the pointed-to node as long as we
+                * use memory barriers to make sure the parent slot number is
+                * changed before the back pointer (the parent slot number is
+                * irrelevant to the old parent shortcut).
+                */
+               new_n0->slots[sc_slot] = shortcut->next_node;
+               edit->set_parent_slot[0].p = &side->parent_slot;
+               edit->set_parent_slot[0].to = sc_slot;
+               edit->set[1].ptr = &side->back_pointer;
+               edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+       }
+
+       /* Install the new leaf in a spare slot in the new node. */
+       if (sc_slot == 0)
+               edit->leaf_p = &new_n0->slots[1];
+       else
+               edit->leaf_p = &new_n0->slots[0];
+
+       pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+       return edit;
+}
+
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array.  This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops,
+                                           const void *index_key,
+                                           void *object)
+{
+       struct assoc_array_walk_result result;
+       struct assoc_array_edit *edit;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* The leaf pointer we're given must not have the bottom bit set as we
+        * use those for type-marking the pointer.  NULL pointers are also not
+        * allowed as they indicate an empty slot but we have to allow them
+        * here as they can be updated later.
+        */
+       BUG_ON(assoc_array_ptr_is_meta(object));
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->leaf = assoc_array_leaf_to_ptr(object);
+       edit->adjust_count_by = 1;
+
+       switch (assoc_array_walk(array, ops, index_key, &result)) {
+       case assoc_array_walk_tree_empty:
+               /* Allocate a root node if there isn't one yet */
+               if (!assoc_array_insert_in_empty_tree(edit))
+                       goto enomem;
+               return edit;
+
+       case assoc_array_walk_found_terminal_node:
+               /* We found a node that doesn't have a node/shortcut pointer in
+                * the slot corresponding to the index key that we have to
+                * follow.
+                */
+               if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+                                                          &result))
+                       goto enomem;
+               return edit;
+
+       case assoc_array_walk_found_wrong_shortcut:
+               /* We found a shortcut that didn't match our key in a slot we
+                * needed to follow.
+                */
+               if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+                       goto enomem;
+               return edit;
+       }
+
+enomem:
+       /* Clean up after an out of memory error */
+       pr_devel("enomem\n");
+       assoc_array_cancel_edit(edit);
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script.  The object pointed to
+ * by the old object is not freed.  This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+       BUG_ON(!object);
+       edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+
+struct assoc_array_delete_collapse_context {
+       struct assoc_array_node *node;
+       const void              *skip_leaf;
+       int                     slot;
+};
+
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+                                               void *iterator_data)
+{
+       struct assoc_array_delete_collapse_context *collapse = iterator_data;
+
+       if (leaf == collapse->skip_leaf)
+               return 0;
+
+       BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+
+       collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+       return 0;
+}
+
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array.  This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops,
+                                           const void *index_key)
+{
+       struct assoc_array_delete_collapse_context collapse;
+       struct assoc_array_walk_result result;
+       struct assoc_array_node *node, *new_n0;
+       struct assoc_array_edit *edit;
+       struct assoc_array_ptr *ptr;
+       bool has_meta;
+       int slot, i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->adjust_count_by = -1;
+
+       switch (assoc_array_walk(array, ops, index_key, &result)) {
+       case assoc_array_walk_found_terminal_node:
+               /* We found a node that should contain the leaf we've been
+                * asked to remove - *if* it's in the tree.
+                */
+               pr_devel("terminal_node\n");
+               node = result.terminal_node.node;
+
+               for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                       ptr = node->slots[slot];
+                       if (ptr &&
+                           assoc_array_ptr_is_leaf(ptr) &&
+                           ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+                                               index_key))
+                               goto found_leaf;
+               }
+       case assoc_array_walk_tree_empty:
+       case assoc_array_walk_found_wrong_shortcut:
+       default:
+               assoc_array_cancel_edit(edit);
+               pr_devel("not found\n");
+               return NULL;
+       }
+
+found_leaf:
+       BUG_ON(array->nr_leaves_on_tree <= 0);
+
+       /* In the simplest form of deletion we just clear the slot and release
+        * the leaf after a suitable interval.
+        */
+       edit->dead_leaf = node->slots[slot];
+       edit->set[0].ptr = &node->slots[slot];
+       edit->set[0].to = NULL;
+       edit->adjust_count_on = node;
+
+       /* If that concludes erasure of the last leaf, then delete the entire
+        * internal array.
+        */
+       if (array->nr_leaves_on_tree == 1) {
+               edit->set[1].ptr = &array->root;
+               edit->set[1].to = NULL;
+               edit->adjust_count_on = NULL;
+               edit->excised_subtree = array->root;
+               pr_devel("all gone\n");
+               return edit;
+       }
+
+       /* However, we'd also like to clear up some metadata blocks if we
+        * possibly can.
+        *
+        * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+        * leaves in it, then attempt to collapse it - and attempt to
+        * recursively collapse up the tree.
+        *
+        * We could also try and collapse in partially filled subtrees to take
+        * up space in this node.
+        */
+       if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+               struct assoc_array_node *parent, *grandparent;
+               struct assoc_array_ptr *ptr;
+
+               /* First of all, we need to know if this node has metadata so
+                * that we don't try collapsing if all the leaves are already
+                * here.
+                */
+               has_meta = false;
+               for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                       ptr = node->slots[i];
+                       if (assoc_array_ptr_is_meta(ptr)) {
+                               has_meta = true;
+                               break;
+                       }
+               }
+
+               pr_devel("leaves: %ld [m=%d]\n",
+                        node->nr_leaves_on_branch - 1, has_meta);
+
+               /* Look further up the tree to see if we can collapse this node
+                * into a more proximal node too.
+                */
+               parent = node;
+       collapse_up:
+               pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+
+               ptr = parent->back_pointer;
+               if (!ptr)
+                       goto do_collapse;
+               if (assoc_array_ptr_is_shortcut(ptr)) {
+                       struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+                       ptr = s->back_pointer;
+                       if (!ptr)
+                               goto do_collapse;
+               }
+
+               grandparent = assoc_array_ptr_to_node(ptr);
+               if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+                       parent = grandparent;
+                       goto collapse_up;
+               }
+
+       do_collapse:
+               /* There's no point collapsing if the original node has no meta
+                * pointers to discard and if we didn't merge into one of that
+                * node's ancestry.
+                */
+               if (has_meta || parent != node) {
+                       node = parent;
+
+                       /* Create a new node to collapse into */
+                       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+                       if (!new_n0)
+                               goto enomem;
+                       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+
+                       new_n0->back_pointer = node->back_pointer;
+                       new_n0->parent_slot = node->parent_slot;
+                       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+                       edit->adjust_count_on = new_n0;
+
+                       collapse.node = new_n0;
+                       collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+                       collapse.slot = 0;
+                       assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+                                                   node->back_pointer,
+                                                   assoc_array_delete_collapse_iterator,
+                                                   &collapse);
+                       pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+                       BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+
+                       if (!node->back_pointer) {
+                               edit->set[1].ptr = &array->root;
+                       } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+                               BUG();
+                       } else if (assoc_array_ptr_is_node(node->back_pointer)) {
+                               struct assoc_array_node *p =
+                                       assoc_array_ptr_to_node(node->back_pointer);
+                               edit->set[1].ptr = &p->slots[node->parent_slot];
+                       } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+                               struct assoc_array_shortcut *s =
+                                       assoc_array_ptr_to_shortcut(node->back_pointer);
+                               edit->set[1].ptr = &s->next_node;
+                       }
+                       edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+                       edit->excised_subtree = assoc_array_node_to_ptr(node);
+               }
+       }
+
+       return edit;
+
+enomem:
+       /* Clean up after an out of memory error */
+       pr_devel("enomem\n");
+       assoc_array_cancel_edit(edit);
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array.  This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                          const struct assoc_array_ops *ops)
+{
+       struct assoc_array_edit *edit;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (!array->root)
+               return NULL;
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->set[1].ptr = &array->root;
+       edit->set[1].to = NULL;
+       edit->excised_subtree = array->root;
+       edit->ops_for_excised_subtree = ops;
+       pr_devel("all gone\n");
+       return edit;
+}
+
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+       struct assoc_array_edit *edit =
+               container_of(head, struct assoc_array_edit, rcu);
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (edit->dead_leaf)
+               edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+       for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+               if (edit->excised_meta[i])
+                       kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+
+       if (edit->excised_subtree) {
+               BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+               if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+                       struct assoc_array_node *n =
+                               assoc_array_ptr_to_node(edit->excised_subtree);
+                       n->back_pointer = NULL;
+               } else {
+                       struct assoc_array_shortcut *s =
+                               assoc_array_ptr_to_shortcut(edit->excised_subtree);
+                       s->back_pointer = NULL;
+               }
+               assoc_array_destroy_subtree(edit->excised_subtree,
+                                           edit->ops_for_excised_subtree);
+       }
+
+       kfree(edit);
+}
+
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance.  As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *ptr;
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       smp_wmb();
+       if (edit->leaf_p)
+               *edit->leaf_p = edit->leaf;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+               if (edit->set_parent_slot[i].p)
+                       *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+               if (edit->set_backpointers[i])
+                       *edit->set_backpointers[i] = edit->set_backpointers_to;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+               if (edit->set[i].ptr)
+                       *edit->set[i].ptr = edit->set[i].to;
+
+       if (edit->array->root == NULL) {
+               edit->array->nr_leaves_on_tree = 0;
+       } else if (edit->adjust_count_on) {
+               node = edit->adjust_count_on;
+               for (;;) {
+                       node->nr_leaves_on_branch += edit->adjust_count_by;
+
+                       ptr = node->back_pointer;
+                       if (!ptr)
+                               break;
+                       if (assoc_array_ptr_is_shortcut(ptr)) {
+                               shortcut = assoc_array_ptr_to_shortcut(ptr);
+                               ptr = shortcut->back_pointer;
+                               if (!ptr)
+                                       break;
+                       }
+                       BUG_ON(!assoc_array_ptr_is_node(ptr));
+                       node = assoc_array_ptr_to_node(ptr);
+               }
+
+               edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+       }
+
+       call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted.  That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+       struct assoc_array_ptr *ptr;
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* Clean up after an out of memory error */
+       for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+               ptr = edit->new_meta[i];
+               if (ptr) {
+                       if (assoc_array_ptr_is_node(ptr))
+                               kfree(assoc_array_ptr_to_node(ptr));
+                       else
+                               kfree(assoc_array_ptr_to_shortcut(ptr));
+               }
+       }
+       kfree(edit);
+}
+
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array.  If it returns false, the object is discard and if it returns true,
+ * the object is kept.  If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+                  const struct assoc_array_ops *ops,
+                  bool (*iterator)(void *object, void *iterator_data),
+                  void *iterator_data)
+{
+       struct assoc_array_shortcut *shortcut, *new_s;
+       struct assoc_array_node *node, *new_n;
+       struct assoc_array_edit *edit;
+       struct assoc_array_ptr *cursor, *ptr;
+       struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+       unsigned long nr_leaves_on_tree;
+       int keylen, slot, nr_free, next_slot, i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (!array->root)
+               return 0;
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return -ENOMEM;
+       edit->array = array;
+       edit->ops = ops;
+       edit->ops_for_excised_subtree = ops;
+       edit->set[0].ptr = &array->root;
+       edit->excised_subtree = array->root;
+
+       new_root = new_parent = NULL;
+       new_ptr_pp = &new_root;
+       cursor = array->root;
+
+descend:
+       /* If this point is a shortcut, then we need to duplicate it and
+        * advance the target cursor.
+        */
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+               new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+                               keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s)
+                       goto enomem;
+               pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+               memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+                                        keylen * sizeof(unsigned long)));
+               new_s->back_pointer = new_parent;
+               new_s->parent_slot = shortcut->parent_slot;
+               *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+               new_ptr_pp = &new_s->next_node;
+               cursor = shortcut->next_node;
+       }
+
+       /* Duplicate the node at this position */
+       node = assoc_array_ptr_to_node(cursor);
+       new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n)
+               goto enomem;
+       pr_devel("dup node %p -> %p\n", node, new_n);
+       new_n->back_pointer = new_parent;
+       new_n->parent_slot = node->parent_slot;
+       *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+       new_ptr_pp = NULL;
+       slot = 0;
+
+continue_node:
+       /* Filter across any leaves and gc any subtrees */
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = node->slots[slot];
+               if (!ptr)
+                       continue;
+
+               if (assoc_array_ptr_is_leaf(ptr)) {
+                       if (iterator(assoc_array_ptr_to_leaf(ptr),
+                                    iterator_data))
+                               /* The iterator will have done any reference
+                                * counting on the object for us.
+                                */
+                               new_n->slots[slot] = ptr;
+                       continue;
+               }
+
+               new_ptr_pp = &new_n->slots[slot];
+               cursor = ptr;
+               goto descend;
+       }
+
+       pr_devel("-- compress node %p --\n", new_n);
+
+       /* Count up the number of empty slots in this node and work out the
+        * subtree leaf count.
+        */
+       new_n->nr_leaves_on_branch = 0;
+       nr_free = 0;
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = new_n->slots[slot];
+               if (!ptr)
+                       nr_free++;
+               else if (assoc_array_ptr_is_leaf(ptr))
+                       new_n->nr_leaves_on_branch++;
+       }
+       pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+       /* See what we can fold in */
+       next_slot = 0;
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               struct assoc_array_shortcut *s;
+               struct assoc_array_node *child;
+
+               ptr = new_n->slots[slot];
+               if (!ptr || assoc_array_ptr_is_leaf(ptr))
+                       continue;
+
+               s = NULL;
+               if (assoc_array_ptr_is_shortcut(ptr)) {
+                       s = assoc_array_ptr_to_shortcut(ptr);
+                       ptr = s->next_node;
+               }
+
+               child = assoc_array_ptr_to_node(ptr);
+               new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+
+               if (child->nr_leaves_on_branch <= nr_free + 1) {
+                       /* Fold the child node into this one */
+                       pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+                                slot, child->nr_leaves_on_branch, nr_free + 1,
+                                next_slot);
+
+                       /* We would already have reaped an intervening shortcut
+                        * on the way back up the tree.
+                        */
+                       BUG_ON(s);
+
+                       new_n->slots[slot] = NULL;
+                       nr_free++;
+                       if (slot < next_slot)
+                               next_slot = slot;
+                       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                               struct assoc_array_ptr *p = child->slots[i];
+                               if (!p)
+                                       continue;
+                               BUG_ON(assoc_array_ptr_is_meta(p));
+                               while (new_n->slots[next_slot])
+                                       next_slot++;
+                               BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+                               new_n->slots[next_slot++] = p;
+                               nr_free--;
+                       }
+                       kfree(child);
+               } else {
+                       pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+                                slot, child->nr_leaves_on_branch, nr_free + 1,
+                                next_slot);
+               }
+       }
+
+       pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+       nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+
+       /* Excise this node if it is singly occupied by a shortcut */
+       if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+               for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+                       if ((ptr = new_n->slots[slot]))
+                               break;
+
+               if (assoc_array_ptr_is_meta(ptr) &&
+                   assoc_array_ptr_is_shortcut(ptr)) {
+                       pr_devel("excise node %p with 1 shortcut\n", new_n);
+                       new_s = assoc_array_ptr_to_shortcut(ptr);
+                       new_parent = new_n->back_pointer;
+                       slot = new_n->parent_slot;
+                       kfree(new_n);
+                       if (!new_parent) {
+                               new_s->back_pointer = NULL;
+                               new_s->parent_slot = 0;
+                               new_root = ptr;
+                               goto gc_complete;
+                       }
+
+                       if (assoc_array_ptr_is_shortcut(new_parent)) {
+                               /* We can discard any preceding shortcut also */
+                               struct assoc_array_shortcut *s =
+                                       assoc_array_ptr_to_shortcut(new_parent);
+
+                               pr_devel("excise preceding shortcut\n");
+
+                               new_parent = new_s->back_pointer = s->back_pointer;
+                               slot = new_s->parent_slot = s->parent_slot;
+                               kfree(s);
+                               if (!new_parent) {
+                                       new_s->back_pointer = NULL;
+                                       new_s->parent_slot = 0;
+                                       new_root = ptr;
+                                       goto gc_complete;
+                               }
+                       }
+
+                       new_s->back_pointer = new_parent;
+                       new_s->parent_slot = slot;
+                       new_n = assoc_array_ptr_to_node(new_parent);
+                       new_n->slots[slot] = ptr;
+                       goto ascend_old_tree;
+               }
+       }
+
+       /* Excise any shortcuts we might encounter that point to nodes that
+        * only contain leaves.
+        */
+       ptr = new_n->back_pointer;
+       if (!ptr)
+               goto gc_complete;
+
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               new_s = assoc_array_ptr_to_shortcut(ptr);
+               new_parent = new_s->back_pointer;
+               slot = new_s->parent_slot;
+
+               if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+                       struct assoc_array_node *n;
+
+                       pr_devel("excise shortcut\n");
+                       new_n->back_pointer = new_parent;
+                       new_n->parent_slot = slot;
+                       kfree(new_s);
+                       if (!new_parent) {
+                               new_root = assoc_array_node_to_ptr(new_n);
+                               goto gc_complete;
+                       }
+
+                       n = assoc_array_ptr_to_node(new_parent);
+                       n->slots[slot] = assoc_array_node_to_ptr(new_n);
+               }
+       } else {
+               new_parent = ptr;
+       }
+       new_n = assoc_array_ptr_to_node(new_parent);
+
+ascend_old_tree:
+       ptr = node->back_pointer;
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               slot = shortcut->parent_slot;
+               cursor = shortcut->back_pointer;
+       } else {
+               slot = node->parent_slot;
+               cursor = ptr;
+       }
+       BUG_ON(!ptr);
+       node = assoc_array_ptr_to_node(cursor);
+       slot++;
+       goto continue_node;
+
+gc_complete:
+       edit->set[0].to = new_root;
+       assoc_array_apply_edit(edit);
+       edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+       return 0;
+
+enomem:
+       pr_devel("enomem\n");
+       assoc_array_destroy_subtree(new_root, edit->ops);
+       kfree(edit);
+       return -ENOMEM;
+}
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c

index 657979f71bef0a0b3331eb804c0921d1f7afe6a8..bf076d281d4045da0b4b780ecd19a19db3987485 100644 (file)
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
         kfree(a);
  }
  EXPORT_SYMBOL_GPL(mpi_free);
+
+MODULE_DESCRIPTION("Multiprecision maths library");
+MODULE_LICENSE("GPL");
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 7d57af21f49e920776979dfe11023780ebff6afb..dee6cf4e6d34135e1880c5c01c7627aa1a33c69a 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
         return 0;
  }
  
-static void copy_gigantic_page(struct page *dst, struct page *src)
-{
-       int i;
-       struct hstate *h = page_hstate(src);
-       struct page *dst_base = dst;
-       struct page *src_base = src;
-
-       for (i = 0; i < pages_per_huge_page(h); ) {
-               cond_resched();
-               copy_highpage(dst, src);
-
-               i++;
-               dst = mem_map_next(dst, dst_base, i);
-               src = mem_map_next(src, src_base, i);
-       }
-}
-
-void copy_huge_page(struct page *dst, struct page *src)
-{
-       int i;
-       struct hstate *h = page_hstate(src);
-
-       if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
-               copy_gigantic_page(dst, src);
-               return;
-       }
-
-       might_sleep();
-       for (i = 0; i < pages_per_huge_page(h); i++) {
-               cond_resched();
-               copy_highpage(dst + i, src + i);
-       }
-}
-
  static void enqueue_huge_page(struct hstate *h, struct page *page)
  {
         int nid = page_to_nid(page);
@@ -736,6 +702,23 @@ int PageHuge(struct page *page)
  }
  EXPORT_SYMBOL_GPL(PageHuge);
  
+/*
+ * PageHeadHuge() only returns true for hugetlbfs head page, but not for
+ * normal or transparent huge pages.
+ */
+int PageHeadHuge(struct page *page_head)
+{
+       compound_page_dtor *dtor;
+
+       if (!PageHead(page_head))
+               return 0;
+
+       dtor = get_compound_page_dtor(page_head);
+
+       return dtor == free_huge_page;
+}
+EXPORT_SYMBOL_GPL(PageHeadHuge);
+
  pgoff_t __basepage_index(struct page *page)
  {
         struct page *page_head = compound_head(page);
diff --git a/mm/memory.c b/mm/memory.c

index 0409e8f43fa0f3719fadf3f19afda45164bbaa94..5d9025f3b3e1cd65bd97655ee95d6cd2f390ce5b 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4272,13 +4272,6 @@ void copy_user_huge_page(struct page *dst, struct page *src,
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
  
  #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
-static struct kmem_cache *page_ptl_cachep;
-void __init ptlock_cache_init(void)
-{
-       page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
-                       SLAB_PANIC, NULL);
-}
-
  bool ptlock_alloc(struct page *page)
  {
         spinlock_t *ptl;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index c4403cdf3433ddc79515dac25e5eedd0b39fa320..eca4a3129129751208b41cfe808e9e31e5dc7b5f 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2950,7 +2950,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
                 return;
         }
  
-       p += snprintf(p, maxlen, policy_modes[mode]);
+       p += snprintf(p, maxlen, "%s", policy_modes[mode]);
  
         if (flags & MPOL_MODE_FLAGS) {
                 p += snprintf(p, buffer + maxlen - p, "=");
diff --git a/mm/migrate.c b/mm/migrate.c

index 316e720a2023c260b0205cdd2cd6be7d257986ac..bb940045fe8595842ed58f2e32f87b83d40485e1 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -441,6 +441,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
         return MIGRATEPAGE_SUCCESS;
  }
  
+/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page.  We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+                               int nr_pages)
+{
+       int i;
+       struct page *dst_base = dst;
+       struct page *src_base = src;
+
+       for (i = 0; i < nr_pages; ) {
+               cond_resched();
+               copy_highpage(dst, src);
+
+               i++;
+               dst = mem_map_next(dst, dst_base, i);
+               src = mem_map_next(src, src_base, i);
+       }
+}
+
+static void copy_huge_page(struct page *dst, struct page *src)
+{
+       int i;
+       int nr_pages;
+
+       if (PageHuge(src)) {
+               /* hugetlbfs page */
+               struct hstate *h = page_hstate(src);
+               nr_pages = pages_per_huge_page(h);
+
+               if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+                       __copy_gigantic_page(dst, src, nr_pages);
+                       return;
+               }
+       } else {
+               /* thp page */
+               BUG_ON(!PageTransHuge(src));
+               nr_pages = hpage_nr_pages(src);
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               cond_resched();
+               copy_highpage(dst + i, src + i);
+       }
+}
+
  /*
   * Copy the page to its new location
   */
diff --git a/mm/slab.c b/mm/slab.c

index 0c8967bb201878e567c4d949ae1d9ee57cf04d0b..eb043bf05f4c57687c0644f11c12bd6b3a654795 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -163,72 +163,6 @@
   */
  static bool pfmemalloc_active __read_mostly;
  
-/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END     (((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE    (((kmem_bufctl_t)(~0U))-1)
-#define        BUFCTL_ACTIVE   (((kmem_bufctl_t)(~0U))-2)
-#define        SLAB_LIMIT      (((kmem_bufctl_t)(~0U))-3)
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- */
-struct slab_rcu {
-       struct rcu_head head;
-       struct kmem_cache *cachep;
-       void *addr;
-};
-
-/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-       union {
-               struct {
-                       struct list_head list;
-                       unsigned long colouroff;
-                       void *s_mem;            /* including colour offset */
-                       unsigned int inuse;     /* num of objs active in slab */
-                       kmem_bufctl_t free;
-                       unsigned short nodeid;
-               };
-               struct slab_rcu __slab_cover_slab_rcu;
-       };
-};
-
  /*
   * struct array_cache
   *
@@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
         return page->slab_cache;
  }
  
-static inline struct slab *virt_to_slab(const void *obj)
-{
-       struct page *page = virt_to_head_page(obj);
-
-       VM_BUG_ON(!PageSlab(page));
-       return page->slab_page;
-}
-
-static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
                                  unsigned int idx)
  {
-       return slab->s_mem + cache->size * idx;
+       return page->s_mem + cache->size * idx;
  }
  
  /*
@@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
   *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
   */
  static inline unsigned int obj_to_index(const struct kmem_cache *cache,
-                                       const struct slab *slab, void *obj)
+                                       const struct page *page, void *obj)
  {
-       u32 offset = (obj - slab->s_mem);
+       u32 offset = (obj - page->s_mem);
         return reciprocal_divide(offset, cache->reciprocal_buffer_size);
  }
  
@@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
  
  static size_t slab_mgmt_size(size_t nr_objs, size_t align)
  {
-       return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+       return ALIGN(nr_objs * sizeof(unsigned int), align);
  }
  
  /*
@@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
          * on it. For the latter case, the memory allocated for a
          * slab is used for:
          *
-        * - The struct slab
-        * - One kmem_bufctl_t for each object
+        * - One unsigned int for each object
          * - Padding to respect alignment of @align
          * - @buffer_size bytes for each object
          *
@@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
                 mgmt_size = 0;
                 nr_objs = slab_size / buffer_size;
  
-               if (nr_objs > SLAB_LIMIT)
-                       nr_objs = SLAB_LIMIT;
         } else {
                 /*
                  * Ignore padding for the initial guess. The padding
@@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
                  * into the memory allocation when taking the padding
                  * into account.
                  */
-               nr_objs = (slab_size - sizeof(struct slab)) /
-                         (buffer_size + sizeof(kmem_bufctl_t));
+               nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
  
                 /*
                  * This calculated number will be either the right
@@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
                        > slab_size)
                         nr_objs--;
  
-               if (nr_objs > SLAB_LIMIT)
-                       nr_objs = SLAB_LIMIT;
-
                 mgmt_size = slab_mgmt_size(nr_objs, align);
         }
         *num = nr_objs;
@@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries,
         return nc;
  }
  
-static inline bool is_slab_pfmemalloc(struct slab *slabp)
+static inline bool is_slab_pfmemalloc(struct page *page)
  {
-       struct page *page = virt_to_page(slabp->s_mem);
-
         return PageSlabPfmemalloc(page);
  }
  
@@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
                                                 struct array_cache *ac)
  {
         struct kmem_cache_node *n = cachep->node[numa_mem_id()];
-       struct slab *slabp;
+       struct page *page;
         unsigned long flags;
  
         if (!pfmemalloc_active)
                 return;
  
         spin_lock_irqsave(&n->list_lock, flags);
-       list_for_each_entry(slabp, &n->slabs_full, list)
-               if (is_slab_pfmemalloc(slabp))
+       list_for_each_entry(page, &n->slabs_full, lru)
+               if (is_slab_pfmemalloc(page))
                         goto out;
  
-       list_for_each_entry(slabp, &n->slabs_partial, list)
-               if (is_slab_pfmemalloc(slabp))
+       list_for_each_entry(page, &n->slabs_partial, lru)
+               if (is_slab_pfmemalloc(page))
                         goto out;
  
-       list_for_each_entry(slabp, &n->slabs_free, list)
-               if (is_slab_pfmemalloc(slabp))
+       list_for_each_entry(page, &n->slabs_free, lru)
+               if (is_slab_pfmemalloc(page))
                         goto out;
  
         pfmemalloc_active = false;
@@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
                  */
                 n = cachep->node[numa_mem_id()];
                 if (!list_empty(&n->slabs_free) && force_refill) {
-                       struct slab *slabp = virt_to_slab(objp);
-                       ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
+                       struct page *page = virt_to_head_page(objp);
+                       ClearPageSlabPfmemalloc(page);
                         clear_obj_pfmemalloc(&objp);
                         recheck_pfmemalloc_active(cachep, ac);
                         return objp;
@@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
  
  static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
-       struct slab *slabp = virt_to_slab(objp);
-       int nodeid = slabp->nodeid;
+       int nodeid = page_to_nid(virt_to_page(objp));
         struct kmem_cache_node *n;
         struct array_cache *alien = NULL;
         int node;
@@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
          * Make sure we are not freeing a object from another node to the array
          * cache on this cpu.
          */
-       if (likely(slabp->nodeid == node))
+       if (likely(nodeid == node))
                 return 0;
  
         n = cachep->node[node];
@@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void)
  {
         int i;
  
+       BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
+                                       sizeof(struct rcu_head));
         kmem_cache = &kmem_cache_boot;
         setup_node_pointer(kmem_cache);
  
@@ -1687,7 +1605,7 @@ static noinline void
  slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
  {
         struct kmem_cache_node *n;
-       struct slab *slabp;
+       struct page *page;
         unsigned long flags;
         int node;
  
@@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
                         continue;
  
                 spin_lock_irqsave(&n->list_lock, flags);
-               list_for_each_entry(slabp, &n->slabs_full, list) {
+               list_for_each_entry(page, &n->slabs_full, lru) {
                         active_objs += cachep->num;
                         active_slabs++;
                 }
-               list_for_each_entry(slabp, &n->slabs_partial, list) {
-                       active_objs += slabp->inuse;
+               list_for_each_entry(page, &n->slabs_partial, lru) {
+                       active_objs += page->active;
                         active_slabs++;
                 }
-               list_for_each_entry(slabp, &n->slabs_free, list)
+               list_for_each_entry(page, &n->slabs_free, lru)
                         num_slabs++;
  
                 free_objects += n->free_objects;
@@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
   * did not request dmaable memory, we might get it, but that
   * would be relatively rare and ignorable.
   */
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
+                                                               int nodeid)
  {
         struct page *page;
         int nr_pages;
-       int i;
-
-#ifndef CONFIG_MMU
-       /*
-        * Nommu uses slab's for process anonymous memory allocations, and thus
-        * requires __GFP_COMP to properly refcount higher order allocations
-        */
-       flags |= __GFP_COMP;
-#endif
  
         flags |= cachep->allocflags;
         if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
         else
                 add_zone_page_state(page_zone(page),
                         NR_SLAB_UNRECLAIMABLE, nr_pages);
-       for (i = 0; i < nr_pages; i++) {
-               __SetPageSlab(page + i);
-
-               if (page->pfmemalloc)
-                       SetPageSlabPfmemalloc(page + i);
-       }
+       __SetPageSlab(page);
+       if (page->pfmemalloc)
+               SetPageSlabPfmemalloc(page);
         memcg_bind_pages(cachep, cachep->gfporder);
  
         if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
@@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                         kmemcheck_mark_unallocated_pages(page, nr_pages);
         }
  
-       return page_address(page);
+       return page;
  }
  
  /*
   * Interface to system's page release.
   */
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
  {
-       unsigned long i = (1 << cachep->gfporder);
-       struct page *page = virt_to_page(addr);
-       const unsigned long nr_freed = i;
+       const unsigned long nr_freed = (1 << cachep->gfporder);
  
         kmemcheck_free_shadow(page, cachep->gfporder);
  
@@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
         else
                 sub_zone_page_state(page_zone(page),
                                 NR_SLAB_UNRECLAIMABLE, nr_freed);
-       while (i--) {
-               BUG_ON(!PageSlab(page));
-               __ClearPageSlabPfmemalloc(page);
-               __ClearPageSlab(page);
-               page++;
-       }
+
+       BUG_ON(!PageSlab(page));
+       __ClearPageSlabPfmemalloc(page);
+       __ClearPageSlab(page);
+       page_mapcount_reset(page);
+       page->mapping = NULL;
  
         memcg_release_pages(cachep, cachep->gfporder);
         if (current->reclaim_state)
                 current->reclaim_state->reclaimed_slab += nr_freed;
-       free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
+       __free_memcg_kmem_pages(page, cachep->gfporder);
  }
  
  static void kmem_rcu_free(struct rcu_head *head)
  {
-       struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
-       struct kmem_cache *cachep = slab_rcu->cachep;
+       struct kmem_cache *cachep;
+       struct page *page;
  
-       kmem_freepages(cachep, slab_rcu->addr);
-       if (OFF_SLAB(cachep))
-               kmem_cache_free(cachep->slabp_cache, slab_rcu);
+       page = container_of(head, struct page, rcu_head);
+       cachep = page->slab_cache;
+
+       kmem_freepages(cachep, page);
  }
  
  #if DEBUG
@@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                 /* Print some data about the neighboring objects, if they
                  * exist:
                  */
-               struct slab *slabp = virt_to_slab(objp);
+               struct page *page = virt_to_head_page(objp);
                 unsigned int objnr;
  
-               objnr = obj_to_index(cachep, slabp, objp);
+               objnr = obj_to_index(cachep, page, objp);
                 if (objnr) {
-                       objp = index_to_obj(cachep, slabp, objnr - 1);
+                       objp = index_to_obj(cachep, page, objnr - 1);
                         realobj = (char *)objp + obj_offset(cachep);
                         printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
                                realobj, size);
                         print_objinfo(cachep, objp, 2);
                 }
                 if (objnr + 1 < cachep->num) {
-                       objp = index_to_obj(cachep, slabp, objnr + 1);
+                       objp = index_to_obj(cachep, page, objnr + 1);
                         realobj = (char *)objp + obj_offset(cachep);
                         printk(KERN_ERR "Next obj: start=%p, len=%d\n",
                                realobj, size);
@@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
  #endif
  
  #if DEBUG
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+                                               struct page *page)
  {
         int i;
         for (i = 0; i < cachep->num; i++) {
-               void *objp = index_to_obj(cachep, slabp, i);
+               void *objp = index_to_obj(cachep, page, i);
  
                 if (cachep->flags & SLAB_POISON) {
  #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
         }
  }
  #else
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+                                               struct page *page)
  {
  }
  #endif
@@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
   * Before calling the slab must have been unlinked from the cache.  The
   * cache-lock is not held/needed.
   */
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct page *page)
  {
-       void *addr = slabp->s_mem - slabp->colouroff;
+       void *freelist;
  
-       slab_destroy_debugcheck(cachep, slabp);
+       freelist = page->freelist;
+       slab_destroy_debugcheck(cachep, page);
         if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
-               struct slab_rcu *slab_rcu;
+               struct rcu_head *head;
+
+               /*
+                * RCU free overloads the RCU head over the LRU.
+                * slab_page has been overloeaded over the LRU,
+                * however it is not used from now on so that
+                * we can use it safely.
+                */
+               head = (void *)&page->rcu_head;
+               call_rcu(head, kmem_rcu_free);
  
-               slab_rcu = (struct slab_rcu *)slabp;
-               slab_rcu->cachep = cachep;
-               slab_rcu->addr = addr;
-               call_rcu(&slab_rcu->head, kmem_rcu_free);
         } else {
-               kmem_freepages(cachep, addr);
-               if (OFF_SLAB(cachep))
-                       kmem_cache_free(cachep->slabp_cache, slabp);
+               kmem_freepages(cachep, page);
         }
+
+       /*
+        * From now on, we don't use freelist
+        * although actual page can be freed in rcu context
+        */
+       if (OFF_SLAB(cachep))
+               kmem_cache_free(cachep->freelist_cache, freelist);
  }
  
  /**
@@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
                          * use off-slab slabs. Needed to avoid a possible
                          * looping condition in cache_grow().
                          */
-                       offslab_limit = size - sizeof(struct slab);
-                       offslab_limit /= sizeof(kmem_bufctl_t);
+                       offslab_limit = size;
+                       offslab_limit /= sizeof(unsigned int);
  
                         if (num > offslab_limit)
                                 break;
@@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  int
  __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
  {
-       size_t left_over, slab_size, ralign;
+       size_t left_over, freelist_size, ralign;
         gfp_t gfp;
         int err;
         size_t size = cachep->size;
@@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
         if (!cachep->num)
                 return -E2BIG;
  
-       slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
-                         + sizeof(struct slab), cachep->align);
+       freelist_size =
+               ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
  
         /*
          * If the slab has been placed off-slab, and we have enough space then
          * move it on-slab. This is at the expense of any extra colouring.
          */
-       if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+       if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
                 flags &= ~CFLGS_OFF_SLAB;
-               left_over -= slab_size;
+               left_over -= freelist_size;
         }
  
         if (flags & CFLGS_OFF_SLAB) {
                 /* really off slab. No need for manual alignment */
-               slab_size =
-                   cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+               freelist_size = cachep->num * sizeof(unsigned int);
  
  #ifdef CONFIG_PAGE_POISONING
                 /* If we're going to use the generic kernel_map_pages()
@@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
         if (cachep->colour_off < cachep->align)
                 cachep->colour_off = cachep->align;
         cachep->colour = left_over / cachep->colour_off;
-       cachep->slab_size = slab_size;
+       cachep->freelist_size = freelist_size;
         cachep->flags = flags;
-       cachep->allocflags = 0;
+       cachep->allocflags = __GFP_COMP;
         if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
                 cachep->allocflags |= GFP_DMA;
         cachep->size = size;
         cachep->reciprocal_buffer_size = reciprocal_value(size);
  
         if (flags & CFLGS_OFF_SLAB) {
-               cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
+               cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
                 /*
                  * This is a possibility for one of the malloc_sizes caches.
                  * But since we go off slab only for object size greater than
@@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                  * this should not happen at all.
                  * But leave a BUG_ON for some lucky dude.
                  */
-               BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
+               BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
         }
  
         err = setup_cpu_cache(cachep, gfp);
@@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache,
  {
         struct list_head *p;
         int nr_freed;
-       struct slab *slabp;
+       struct page *page;
  
         nr_freed = 0;
         while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
@@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache,
                         goto out;
                 }
  
-               slabp = list_entry(p, struct slab, list);
+               page = list_entry(p, struct page, lru);
  #if DEBUG
-               BUG_ON(slabp->inuse);
+               BUG_ON(page->active);
  #endif
-               list_del(&slabp->list);
+               list_del(&page->lru);
                 /*
                  * Safe to drop the lock. The slab is no longer linked
                  * to the cache.
                  */
                 n->free_objects -= cache->num;
                 spin_unlock_irq(&n->list_lock);
-               slab_destroy(cache, slabp);
+               slab_destroy(cache, page);
                 nr_freed++;
         }
  out:
@@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
   * descriptors in kmem_cache_create, we search through the malloc_sizes array.
   * If we are creating a malloc_sizes cache here it would not be visible to
   * kmem_find_general_cachep till the initialization is complete.
- * Hence we cannot have slabp_cache same as the original cache.
+ * Hence we cannot have freelist_cache same as the original cache.
   */
-static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
-                                  int colour_off, gfp_t local_flags,
-                                  int nodeid)
+static void *alloc_slabmgmt(struct kmem_cache *cachep,
+                                  struct page *page, int colour_off,
+                                  gfp_t local_flags, int nodeid)
  {
-       struct slab *slabp;
+       void *freelist;
+       void *addr = page_address(page);
  
         if (OFF_SLAB(cachep)) {
                 /* Slab management obj is off-slab. */
-               slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+               freelist = kmem_cache_alloc_node(cachep->freelist_cache,
                                               local_flags, nodeid);
-               /*
-                * If the first object in the slab is leaked (it's allocated
-                * but no one has a reference to it), we want to make sure
-                * kmemleak does not treat the ->s_mem pointer as a reference
-                * to the object. Otherwise we will not report the leak.
-                */
-               kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
-                                  local_flags);
-               if (!slabp)
+               if (!freelist)
                         return NULL;
         } else {
-               slabp = objp + colour_off;
-               colour_off += cachep->slab_size;
+               freelist = addr + colour_off;
+               colour_off += cachep->freelist_size;
         }
-       slabp->inuse = 0;
-       slabp->colouroff = colour_off;
-       slabp->s_mem = objp + colour_off;
-       slabp->nodeid = nodeid;
-       slabp->free = 0;
-       return slabp;
+       page->active = 0;
+       page->s_mem = addr + colour_off;
+       return freelist;
  }
  
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+static inline unsigned int *slab_freelist(struct page *page)
  {
-       return (kmem_bufctl_t *) (slabp + 1);
+       return (unsigned int *)(page->freelist);
  }
  
  static void cache_init_objs(struct kmem_cache *cachep,
-                           struct slab *slabp)
+                           struct page *page)
  {
         int i;
  
         for (i = 0; i < cachep->num; i++) {
-               void *objp = index_to_obj(cachep, slabp, i);
+               void *objp = index_to_obj(cachep, page, i);
  #if DEBUG
                 /* need to poison the objs? */
                 if (cachep->flags & SLAB_POISON)
@@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
                 if (cachep->ctor)
                         cachep->ctor(objp);
  #endif
-               slab_bufctl(slabp)[i] = i + 1;
+               slab_freelist(page)[i] = i;
         }
-       slab_bufctl(slabp)[i - 1] = BUFCTL_END;
  }
  
  static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
@@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
         }
  }
  
-static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
                                 int nodeid)
  {
-       void *objp = index_to_obj(cachep, slabp, slabp->free);
-       kmem_bufctl_t next;
+       void *objp;
  
-       slabp->inuse++;
-       next = slab_bufctl(slabp)[slabp->free];
+       objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]);
+       page->active++;
  #if DEBUG
-       slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
-       WARN_ON(slabp->nodeid != nodeid);
+       WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
  #endif
-       slabp->free = next;
  
         return objp;
  }
  
-static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
                                 void *objp, int nodeid)
  {
-       unsigned int objnr = obj_to_index(cachep, slabp, objp);
-
+       unsigned int objnr = obj_to_index(cachep, page, objp);
  #if DEBUG
+       unsigned int i;
+
         /* Verify that the slab belongs to the intended node */
-       WARN_ON(slabp->nodeid != nodeid);
+       WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
  
-       if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
-               printk(KERN_ERR "slab: double free detected in cache "
-                               "'%s', objp %p\n", cachep->name, objp);
-               BUG();
+       /* Verify double free bug */
+       for (i = page->active; i < cachep->num; i++) {
+               if (slab_freelist(page)[i] == objnr) {
+                       printk(KERN_ERR "slab: double free detected in cache "
+                                       "'%s', objp %p\n", cachep->name, objp);
+                       BUG();
+               }
         }
  #endif
-       slab_bufctl(slabp)[objnr] = slabp->free;
-       slabp->free = objnr;
-       slabp->inuse--;
+       page->active--;
+       slab_freelist(page)[page->active] = objnr;
  }
  
  /*
@@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
   * for the slab allocator to be able to lookup the cache and slab of a
   * virtual address for kfree, ksize, and slab debugging.
   */
-static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
-                          void *addr)
+static void slab_map_pages(struct kmem_cache *cache, struct page *page,
+                          void *freelist)
  {
-       int nr_pages;
-       struct page *page;
-
-       page = virt_to_page(addr);
-
-       nr_pages = 1;
-       if (likely(!PageCompound(page)))
-               nr_pages <<= cache->gfporder;
-
-       do {
-               page->slab_cache = cache;
-               page->slab_page = slab;
-               page++;
-       } while (--nr_pages);
+       page->slab_cache = cache;
+       page->freelist = freelist;
  }
  
  /*
@@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
   * kmem_cache_alloc() when there are no active objs left in a cache.
   */
  static int cache_grow(struct kmem_cache *cachep,
-               gfp_t flags, int nodeid, void *objp)
+               gfp_t flags, int nodeid, struct page *page)
  {
-       struct slab *slabp;
+       void *freelist;
         size_t offset;
         gfp_t local_flags;
         struct kmem_cache_node *n;
@@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep,
          * Get mem for the objs.  Attempt to allocate a physical page from
          * 'nodeid'.
          */
-       if (!objp)
-               objp = kmem_getpages(cachep, local_flags, nodeid);
-       if (!objp)
+       if (!page)
+               page = kmem_getpages(cachep, local_flags, nodeid);
+       if (!page)
                 goto failed;
  
         /* Get slab management. */
-       slabp = alloc_slabmgmt(cachep, objp, offset,
+       freelist = alloc_slabmgmt(cachep, page, offset,
                         local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
-       if (!slabp)
+       if (!freelist)
                 goto opps1;
  
-       slab_map_pages(cachep, slabp, objp);
+       slab_map_pages(cachep, page, freelist);
  
-       cache_init_objs(cachep, slabp);
+       cache_init_objs(cachep, page);
  
         if (local_flags & __GFP_WAIT)
                 local_irq_disable();
@@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep,
         spin_lock(&n->list_lock);
  
         /* Make slab active. */
-       list_add_tail(&slabp->list, &(n->slabs_free));
+       list_add_tail(&page->lru, &(n->slabs_free));
         STATS_INC_GROWN(cachep);
         n->free_objects += cachep->num;
         spin_unlock(&n->list_lock);
         return 1;
  opps1:
-       kmem_freepages(cachep, objp);
+       kmem_freepages(cachep, page);
  failed:
         if (local_flags & __GFP_WAIT)
                 local_irq_disable();
@@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
  static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
                                    unsigned long caller)
  {
-       struct page *page;
         unsigned int objnr;
-       struct slab *slabp;
+       struct page *page;
  
         BUG_ON(virt_to_cache(objp) != cachep);
  
@@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
         kfree_debugcheck(objp);
         page = virt_to_head_page(objp);
  
-       slabp = page->slab_page;
-
         if (cachep->flags & SLAB_RED_ZONE) {
                 verify_redzone_free(cachep, objp);
                 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
@@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
         if (cachep->flags & SLAB_STORE_USER)
                 *dbg_userword(cachep, objp) = (void *)caller;
  
-       objnr = obj_to_index(cachep, slabp, objp);
+       objnr = obj_to_index(cachep, page, objp);
  
         BUG_ON(objnr >= cachep->num);
-       BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
+       BUG_ON(objp != index_to_obj(cachep, page, objnr));
  
-#ifdef CONFIG_DEBUG_SLAB_LEAK
-       slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
-#endif
         if (cachep->flags & SLAB_POISON) {
  #ifdef CONFIG_DEBUG_PAGEALLOC
                 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
         return objp;
  }
  
-static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
-{
-       kmem_bufctl_t i;
-       int entries = 0;
-
-       /* Check slab's freelist to see if this obj is there. */
-       for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
-               entries++;
-               if (entries > cachep->num || i >= cachep->num)
-                       goto bad;
-       }
-       if (entries != cachep->num - slabp->inuse) {
-bad:
-               printk(KERN_ERR "slab: Internal list corruption detected in "
-                       "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
-                       cachep->name, cachep->num, slabp, slabp->inuse,
-                       print_tainted());
-               print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
-                       sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
-                       1);
-               BUG();
-       }
-}
  #else
  #define kfree_debugcheck(x) do { } while(0)
  #define cache_free_debugcheck(x,objp,z) (objp)
-#define check_slabp(x,y) do { } while(0)
  #endif
  
  static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
@@ -2989,7 +2854,7 @@ retry:
  
         while (batchcount > 0) {
                 struct list_head *entry;
-               struct slab *slabp;
+               struct page *page;
                 /* Get slab alloc is to come from. */
                 entry = n->slabs_partial.next;
                 if (entry == &n->slabs_partial) {
@@ -2999,8 +2864,7 @@ retry:
                                 goto must_grow;
                 }
  
-               slabp = list_entry(entry, struct slab, list);
-               check_slabp(cachep, slabp);
+               page = list_entry(entry, struct page, lru);
                 check_spinlock_acquired(cachep);
  
                 /*
@@ -3008,24 +2872,23 @@ retry:
                  * there must be at least one object available for
                  * allocation.
                  */
-               BUG_ON(slabp->inuse >= cachep->num);
+               BUG_ON(page->active >= cachep->num);
  
-               while (slabp->inuse < cachep->num && batchcount--) {
+               while (page->active < cachep->num && batchcount--) {
                         STATS_INC_ALLOCED(cachep);
                         STATS_INC_ACTIVE(cachep);
                         STATS_SET_HIGH(cachep);
  
-                       ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
+                       ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
                                                                         node));
                 }
-               check_slabp(cachep, slabp);
  
                 /* move slabp to correct slabp list: */
-               list_del(&slabp->list);
-               if (slabp->free == BUFCTL_END)
-                       list_add(&slabp->list, &n->slabs_full);
+               list_del(&page->lru);
+               if (page->active == cachep->num)
+                       list_add(&page->list, &n->slabs_full);
                 else
-                       list_add(&slabp->list, &n->slabs_partial);
+                       list_add(&page->list, &n->slabs_partial);
         }
  
  must_grow:
@@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
                 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
         }
-#ifdef CONFIG_DEBUG_SLAB_LEAK
-       {
-               struct slab *slabp;
-               unsigned objnr;
-
-               slabp = virt_to_head_page(objp)->slab_page;
-               objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
-               slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
-       }
-#endif
         objp += obj_offset(cachep);
         if (cachep->ctor && cachep->flags & SLAB_POISON)
                 cachep->ctor(objp);
@@ -3248,18 +3101,20 @@ retry:
                  * We may trigger various forms of reclaim on the allowed
                  * set and go into memory reserves if necessary.
                  */
+               struct page *page;
+
                 if (local_flags & __GFP_WAIT)
                         local_irq_enable();
                 kmem_flagcheck(cache, flags);
-               obj = kmem_getpages(cache, local_flags, numa_mem_id());
+               page = kmem_getpages(cache, local_flags, numa_mem_id());
                 if (local_flags & __GFP_WAIT)
                         local_irq_disable();
-               if (obj) {
+               if (page) {
                         /*
                          * Insert into the appropriate per node queues
                          */
-                       nid = page_to_nid(virt_to_page(obj));
-                       if (cache_grow(cache, flags, nid, obj)) {
+                       nid = page_to_nid(page);
+                       if (cache_grow(cache, flags, nid, page)) {
                                 obj = ____cache_alloc_node(cache,
                                         flags | GFP_THISNODE, nid);
                                 if (!obj)
@@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
                                 int nodeid)
  {
         struct list_head *entry;
-       struct slab *slabp;
+       struct page *page;
         struct kmem_cache_node *n;
         void *obj;
         int x;
@@ -3308,26 +3163,24 @@ retry:
                         goto must_grow;
         }
  
-       slabp = list_entry(entry, struct slab, list);
+       page = list_entry(entry, struct page, lru);
         check_spinlock_acquired_node(cachep, nodeid);
-       check_slabp(cachep, slabp);
  
         STATS_INC_NODEALLOCS(cachep);
         STATS_INC_ACTIVE(cachep);
         STATS_SET_HIGH(cachep);
  
-       BUG_ON(slabp->inuse == cachep->num);
+       BUG_ON(page->active == cachep->num);
  
-       obj = slab_get_obj(cachep, slabp, nodeid);
-       check_slabp(cachep, slabp);
+       obj = slab_get_obj(cachep, page, nodeid);
         n->free_objects--;
         /* move slabp to correct slabp list: */
-       list_del(&slabp->list);
+       list_del(&page->lru);
  
-       if (slabp->free == BUFCTL_END)
-               list_add(&slabp->list, &n->slabs_full);
+       if (page->active == cachep->num)
+               list_add(&page->lru, &n->slabs_full);
         else
-               list_add(&slabp->list, &n->slabs_partial);
+               list_add(&page->lru, &n->slabs_partial);
  
         spin_unlock(&n->list_lock);
         goto done;
@@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
  
         for (i = 0; i < nr_objects; i++) {
                 void *objp;
-               struct slab *slabp;
+               struct page *page;
  
                 clear_obj_pfmemalloc(&objpp[i]);
                 objp = objpp[i];
  
-               slabp = virt_to_slab(objp);
+               page = virt_to_head_page(objp);
                 n = cachep->node[node];
-               list_del(&slabp->list);
+               list_del(&page->lru);
                 check_spinlock_acquired_node(cachep, node);
-               check_slabp(cachep, slabp);
-               slab_put_obj(cachep, slabp, objp, node);
+               slab_put_obj(cachep, page, objp, node);
                 STATS_DEC_ACTIVE(cachep);
                 n->free_objects++;
-               check_slabp(cachep, slabp);
  
                 /* fixup slab chains */
-               if (slabp->inuse == 0) {
+               if (page->active == 0) {
                         if (n->free_objects > n->free_limit) {
                                 n->free_objects -= cachep->num;
                                 /* No need to drop any previously held
@@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                                  * a different cache, refer to comments before
                                  * alloc_slabmgmt.
                                  */
-                               slab_destroy(cachep, slabp);
+                               slab_destroy(cachep, page);
                         } else {
-                               list_add(&slabp->list, &n->slabs_free);
+                               list_add(&page->lru, &n->slabs_free);
                         }
                 } else {
                         /* Unconditionally move a slab to the end of the
                          * partial list on free - maximum time for the
                          * other objects to be freed, too.
                          */
-                       list_add_tail(&slabp->list, &n->slabs_partial);
+                       list_add_tail(&page->lru, &n->slabs_partial);
                 }
         }
  }
@@ -3551,10 +3402,10 @@ free_done:
  
                 p = n->slabs_free.next;
                 while (p != &(n->slabs_free)) {
-                       struct slab *slabp;
+                       struct page *page;
  
-                       slabp = list_entry(p, struct slab, list);
-                       BUG_ON(slabp->inuse);
+                       page = list_entry(p, struct page, lru);
+                       BUG_ON(page->active);
  
                         i++;
                         p = p->next;
@@ -4158,7 +4009,7 @@ out:
  #ifdef CONFIG_SLABINFO
  void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
  {
-       struct slab *slabp;
+       struct page *page;
         unsigned long active_objs;
         unsigned long num_objs;
         unsigned long active_slabs = 0;
@@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);
  
-               list_for_each_entry(slabp, &n->slabs_full, list) {
-                       if (slabp->inuse != cachep->num && !error)
+               list_for_each_entry(page, &n->slabs_full, lru) {
+                       if (page->active != cachep->num && !error)
                                 error = "slabs_full accounting error";
                         active_objs += cachep->num;
                         active_slabs++;
                 }
-               list_for_each_entry(slabp, &n->slabs_partial, list) {
-                       if (slabp->inuse == cachep->num && !error)
-                               error = "slabs_partial inuse accounting error";
-                       if (!slabp->inuse && !error)
-                               error = "slabs_partial/inuse accounting error";
-                       active_objs += slabp->inuse;
+               list_for_each_entry(page, &n->slabs_partial, lru) {
+                       if (page->active == cachep->num && !error)
+                               error = "slabs_partial accounting error";
+                       if (!page->active && !error)
+                               error = "slabs_partial accounting error";
+                       active_objs += page->active;
                         active_slabs++;
                 }
-               list_for_each_entry(slabp, &n->slabs_free, list) {
-                       if (slabp->inuse && !error)
-                               error = "slabs_free/inuse accounting error";
+               list_for_each_entry(page, &n->slabs_free, lru) {
+                       if (page->active && !error)
+                               error = "slabs_free accounting error";
                         num_slabs++;
                 }
                 free_objects += n->free_objects;
@@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v)
         return 1;
  }
  
-static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+static void handle_slab(unsigned long *n, struct kmem_cache *c,
+                                               struct page *page)
  {
         void *p;
-       int i;
+       int i, j;
+
         if (n[0] == n[1])
                 return;
-       for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
-               if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
+       for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
+               bool active = true;
+
+               for (j = page->active; j < c->num; j++) {
+                       /* Skip freed item */
+                       if (slab_freelist(page)[j] == i) {
+                               active = false;
+                               break;
+                       }
+               }
+               if (!active)
                         continue;
+
                 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
                         return;
         }
@@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
  static int leaks_show(struct seq_file *m, void *p)
  {
         struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
-       struct slab *slabp;
+       struct page *page;
         struct kmem_cache_node *n;
         const char *name;
         unsigned long *x = m->private;
@@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p)
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);
  
-               list_for_each_entry(slabp, &n->slabs_full, list)
-                       handle_slab(x, cachep, slabp);
-               list_for_each_entry(slabp, &n->slabs_partial, list)
-                       handle_slab(x, cachep, slabp);
+               list_for_each_entry(page, &n->slabs_full, lru)
+                       handle_slab(x, cachep, page);
+               list_for_each_entry(page, &n->slabs_partial, lru)
+                       handle_slab(x, cachep, page);
                 spin_unlock_irq(&n->list_lock);
         }
         name = cachep->name;
diff --git a/mm/slub.c b/mm/slub.c

index 7e8bd8d828bc0e5c7e96919c6e24186d8d5382df..545a170ebf9f66cf0e3716c9cd6f4cb7eef0eda6 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -155,7 +155,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
  /*
   * Maximum number of desirable partial slabs.
   * The existence of more partial slabs makes kmem_cache_shrink
- * sort the partial list by the number of objects in the.
+ * sort the partial list by the number of objects in use.
   */
  #define MAX_PARTIAL 10
  
@@ -933,6 +933,16 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
   * Hooks for other subsystems that check memory allocations. In a typical
   * production configuration these hooks all should produce no code at all.
   */
+static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
+{
+       kmemleak_alloc(ptr, size, 1, flags);
+}
+
+static inline void kfree_hook(const void *x)
+{
+       kmemleak_free(x);
+}
+
  static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
  {
         flags &= gfp_allowed_mask;
@@ -1217,8 +1227,8 @@ static unsigned long kmem_cache_flags(unsigned long object_size,
         /*
          * Enable debugging if selected on the kernel commandline.
          */
-       if (slub_debug && (!slub_debug_slabs ||
-               !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
+       if (slub_debug && (!slub_debug_slabs || (name &&
+               !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
                 flags |= slub_debug;
  
         return flags;
@@ -1260,13 +1270,30 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
  static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                         int objects) {}
  
+static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
+{
+       kmemleak_alloc(ptr, size, 1, flags);
+}
+
+static inline void kfree_hook(const void *x)
+{
+       kmemleak_free(x);
+}
+
  static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
                                                         { return 0; }
  
  static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
-               void *object) {}
+               void *object)
+{
+       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
+               flags & gfp_allowed_mask);
+}
  
-static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
+static inline void slab_free_hook(struct kmem_cache *s, void *x)
+{
+       kmemleak_free_recursive(x, s->flags);
+}
  
  #endif /* CONFIG_SLUB_DEBUG */
  
@@ -2829,8 +2856,8 @@ static struct kmem_cache *kmem_cache_node;
   * slab on the node for this slabcache. There are no concurrent accesses
   * possible.
   *
- * Note that this function only works on the kmalloc_node_cache
- * when allocating for the kmalloc_node_cache. This is used for bootstrapping
+ * Note that this function only works on the kmem_cache_node
+ * when allocating for the kmem_cache_node. This is used for bootstrapping
   * memory on a fresh node that has no slab structures yet.
   */
  static void early_kmem_cache_node_alloc(int node)
@@ -3272,7 +3299,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
         if (page)
                 ptr = page_address(page);
  
-       kmemleak_alloc(ptr, size, 1, flags);
+       kmalloc_large_node_hook(ptr, size, flags);
         return ptr;
  }
  
@@ -3336,7 +3363,7 @@ void kfree(const void *x)
         page = virt_to_head_page(x);
         if (unlikely(!PageSlab(page))) {
                 BUG_ON(!PageCompound(page));
-               kmemleak_free(x);
+               kfree_hook(x);
                 __free_memcg_kmem_pages(page, compound_order(page));
                 return;
         }
diff --git a/mm/swap.c b/mm/swap.c

index 7a9f80d451f548cd749331bc6143354eab53814d..84b26aaabd03b6d236ba82b84c713844916fd521 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page)
  
  static void put_compound_page(struct page *page)
  {
-       /*
-        * hugetlbfs pages cannot be split from under us.  If this is a
-        * hugetlbfs page, check refcount on head page and release the page if
-        * the refcount becomes zero.
-        */
-       if (PageHuge(page)) {
-               page = compound_head(page);
-               if (put_page_testzero(page))
-                       __put_compound_page(page);
-
-               return;
-       }
-
         if (unlikely(PageTail(page))) {
                 /* __split_huge_page_refcount can run under us */
                 struct page *page_head = compound_trans_head(page);
@@ -111,14 +98,31 @@ static void put_compound_page(struct page *page)
                          * still hot on arches that do not support
                          * this_cpu_cmpxchg_double().
                          */
-                       if (PageSlab(page_head)) {
-                               if (PageTail(page)) {
+                       if (PageSlab(page_head) || PageHeadHuge(page_head)) {
+                               if (likely(PageTail(page))) {
+                                       /*
+                                        * __split_huge_page_refcount
+                                        * cannot race here.
+                                        */
+                                       VM_BUG_ON(!PageHead(page_head));
+                                       atomic_dec(&page->_mapcount);
                                         if (put_page_testzero(page_head))
                                                 VM_BUG_ON(1);
-
-                                       atomic_dec(&page->_mapcount);
-                                       goto skip_lock_tail;
+                                       if (put_page_testzero(page_head))
+                                               __put_compound_page(page_head);
+                                       return;
                                 } else
+                                       /*
+                                        * __split_huge_page_refcount
+                                        * run before us, "page" was a
+                                        * THP tail. The split
+                                        * page_head has been freed
+                                        * and reallocated as slab or
+                                        * hugetlbfs page of smaller
+                                        * order (only possible if
+                                        * reallocated as slab on
+                                        * x86).
+                                        */
                                         goto skip_lock;
                         }
                         /*
@@ -132,8 +136,27 @@ static void put_compound_page(struct page *page)
                                 /* __split_huge_page_refcount run before us */
                                 compound_unlock_irqrestore(page_head, flags);
  skip_lock:
-                               if (put_page_testzero(page_head))
-                                       __put_single_page(page_head);
+                               if (put_page_testzero(page_head)) {
+                                       /*
+                                        * The head page may have been
+                                        * freed and reallocated as a
+                                        * compound page of smaller
+                                        * order and then freed again.
+                                        * All we know is that it
+                                        * cannot have become: a THP
+                                        * page, a compound page of
+                                        * higher order, a tail page.
+                                        * That is because we still
+                                        * hold the refcount of the
+                                        * split THP tail and
+                                        * page_head was the THP head
+                                        * before the split.
+                                        */
+                                       if (PageHead(page_head))
+                                               __put_compound_page(page_head);
+                                       else
+                                               __put_single_page(page_head);
+                               }
  out_put_single:
                                 if (put_page_testzero(page))
                                         __put_single_page(page);
@@ -155,7 +178,6 @@ out_put_single:
                         VM_BUG_ON(atomic_read(&page->_count) != 0);
                         compound_unlock_irqrestore(page_head, flags);
  
-skip_lock_tail:
                         if (put_page_testzero(page_head)) {
                                 if (PageHead(page_head))
                                         __put_compound_page(page_head);
@@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page)
          * proper PT lock that already serializes against
          * split_huge_page().
          */
+       unsigned long flags;
         bool got = false;
-       struct page *page_head;
-
-       /*
-        * If this is a hugetlbfs page it cannot be split under us.  Simply
-        * increment refcount for the head page.
-        */
-       if (PageHuge(page)) {
-               page_head = compound_head(page);
-               atomic_inc(&page_head->_count);
-               got = true;
-       } else {
-               unsigned long flags;
+       struct page *page_head = compound_trans_head(page);
  
-               page_head = compound_trans_head(page);
-               if (likely(page != page_head &&
-                                       get_page_unless_zero(page_head))) {
-
-                       /* Ref to put_compound_page() comment. */
-                       if (PageSlab(page_head)) {
-                               if (likely(PageTail(page))) {
-                                       __get_page_tail_foll(page, false);
-                                       return true;
-                               } else {
-                                       put_page(page_head);
-                                       return false;
-                               }
-                       }
-
-                       /*
-                        * page_head wasn't a dangling pointer but it
-                        * may not be a head page anymore by the time
-                        * we obtain the lock. That is ok as long as it
-                        * can't be freed from under us.
-                        */
-                       flags = compound_lock_irqsave(page_head);
-                       /* here __split_huge_page_refcount won't run anymore */
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /* Ref to put_compound_page() comment. */
+               if (PageSlab(page_head) || PageHeadHuge(page_head)) {
                         if (likely(PageTail(page))) {
+                               /*
+                                * This is a hugetlbfs page or a slab
+                                * page. __split_huge_page_refcount
+                                * cannot race here.
+                                */
+                               VM_BUG_ON(!PageHead(page_head));
                                 __get_page_tail_foll(page, false);
-                               got = true;
-                       }
-                       compound_unlock_irqrestore(page_head, flags);
-                       if (unlikely(!got))
+                               return true;
+                       } else {
+                               /*
+                                * __split_huge_page_refcount run
+                                * before us, "page" was a THP
+                                * tail. The split page_head has been
+                                * freed and reallocated as slab or
+                                * hugetlbfs page of smaller order
+                                * (only possible if reallocated as
+                                * slab on x86).
+                                */
                                 put_page(page_head);
+                               return false;
+                       }
+               }
+
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+                * we obtain the lock. That is ok as long as it
+                * can't be freed from under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               /* here __split_huge_page_refcount won't run anymore */
+               if (likely(PageTail(page))) {
+                       __get_page_tail_foll(page, false);
+                       got = true;
                 }
+               compound_unlock_irqrestore(page_head, flags);
+               if (unlikely(!got))
+                       put_page(page_head);
         }
         return got;
  }
diff --git a/net/Kconfig b/net/Kconfig

index 0715db64a5c3dd7ea0cd12f09c3fe62ee8385f3a..d334678c0bd8706d7c3efb6c0f35d0d62102c360 100644 (file)
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -224,7 +224,7 @@ source "net/hsr/Kconfig"
  
  config RPS
         boolean
-       depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+       depends on SMP && SYSFS
         default y
  
  config RFS_ACCEL
@@ -235,7 +235,7 @@ config RFS_ACCEL
  
  config XPS
         boolean
-       depends on SMP && USE_GENERIC_SMP_HELPERS
+       depends on SMP
         default y
  
  config NETPRIO_CGROUP
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index 3dc0c6cf02a896e66071cd5f66b0a93f0dd3fc06..c4638e6f023843bedbceb91f1bef3c424ad3b666 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1425,7 +1425,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
         do {
                 if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
                                               last_issued, &done,
-                                             &used) == DMA_SUCCESS) {
+                                             &used) == DMA_COMPLETE) {
                         /* Safe to free early-copied skbs now */
                         __skb_queue_purge(&sk->sk_async_wait_queue);
                         break;
@@ -1433,7 +1433,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
                         struct sk_buff *skb;
                         while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
                                (dma_async_is_complete(skb->dma_cookie, done,
-                                                     used) == DMA_SUCCESS)) {
+                                                     used) == DMA_COMPLETE)) {
                                 __skb_dequeue(&sk->sk_async_wait_queue);
                                 kfree_skb(skb);
                         }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index d0d14a04dce1eb2e4274e3111d9008b71da918aa..bf04b30a788a5425b28ba70c11a50b77bfc9c697 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -471,15 +471,6 @@ struct rpc_filelist {
         umode_t mode;
  };
  
-static int rpc_delete_dentry(const struct dentry *dentry)
-{
-       return 1;
-}
-
-static const struct dentry_operations rpc_dentry_operations = {
-       .d_delete = rpc_delete_dentry,
-};
-
  static struct inode *
  rpc_get_inode(struct super_block *sb, umode_t mode)
  {
@@ -1266,7 +1257,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
         sb->s_magic = RPCAUTH_GSSMAGIC;
         sb->s_op = &s_ops;
-       sb->s_d_op = &rpc_dentry_operations;
+       sb->s_d_op = &simple_dentry_operations;
         sb->s_time_gran = 1;
  
         inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c

index db0e5cd34c70866e097e0af91d745d1d96d579fe..91c4117637ae1fdf33d385ea5c3f9eba601e03a8 100644 (file)
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out)
                         render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act);
                         render_opcode(out, "_jump_target(%u),\n", entry);
                         break;
+               default:
+                       break;
                 }
                 if (e->action)
                         render_opcode(out, "_action(ACT_%s),\n",
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl

index 61090e0ff613aefeb5429c26d4efc7775ad97739..9c98100303774cd228ad83f3ff7bfcaeeb7af6fd 100755 (executable)
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3289,6 +3289,7 @@ sub process {
                         }
                 }
                 if (!defined $suppress_whiletrailers{$linenr} &&
+                   defined($stat) && defined($cond) &&
                     $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
                         my ($s, $c) = ($stat, $cond);
  
diff --git a/security/Makefile b/security/Makefile

index c26c81e925712fbc2ba38264f477bdd0a02f548d..a5918e01a4f71a6e97abc664682bd23f75bc544f 100644 (file)
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_MMU)                     += min_addr.o
  # Object file lists
  obj-$(CONFIG_SECURITY)                 += security.o capability.o
  obj-$(CONFIG_SECURITYFS)               += inode.o
-# Must precede capability.o in order to stack properly.
  obj-$(CONFIG_SECURITY_SELINUX)         += selinux/built-in.o
  obj-$(CONFIG_SECURITY_SMACK)           += smack/built-in.o
  obj-$(CONFIG_AUDIT)                    += lsm_audit.o
diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c

index 031d2d9dd6950b7e6c1bf6f3b16f2a37a9ab9e22..89c78658031f10bfc0527030ed970aebe3ff5471 100644 (file)
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -111,7 +111,6 @@ static const char *const aa_audit_type[] = {
  static void audit_pre(struct audit_buffer *ab, void *ca)
  {
         struct common_audit_data *sa = ca;
-       struct task_struct *tsk = sa->aad->tsk ? sa->aad->tsk : current;
  
         if (aa_g_audit_header) {
                 audit_log_format(ab, "apparmor=");
@@ -132,11 +131,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
  
         if (sa->aad->profile) {
                 struct aa_profile *profile = sa->aad->profile;
-               pid_t pid;
-               rcu_read_lock();
-               pid = rcu_dereference(tsk->real_parent)->pid;
-               rcu_read_unlock();
-               audit_log_format(ab, " parent=%d", pid);
                 if (profile->ns != root_ns) {
                         audit_log_format(ab, " namespace=");
                         audit_log_untrustedstring(ab, profile->ns->base.hname);
@@ -149,12 +143,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
                 audit_log_format(ab, " name=");
                 audit_log_untrustedstring(ab, sa->aad->name);
         }
-
-       if (sa->aad->tsk) {
-               audit_log_format(ab, " pid=%d comm=", tsk->pid);
-               audit_log_untrustedstring(ab, tsk->comm);
-       }
-
  }
  
  /**
@@ -212,7 +200,7 @@ int aa_audit(int type, struct aa_profile *profile, gfp_t gfp,
  
         if (sa->aad->type == AUDIT_APPARMOR_KILL)
                 (void)send_sig_info(SIGKILL, NULL,
-                                   sa->aad->tsk ?  sa->aad->tsk : current);
+                                   sa->u.tsk ?  sa->u.tsk : current);
  
         if (sa->aad->type == AUDIT_APPARMOR_ALLOWED)
                 return complain_error(sa->aad->error);
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c

index 84d1f5f538778b58f0b60c48d4a55ede44ff4c4f..1101c6f64bb7cb36602957ef2bcbc1538ee8aa63 100644 (file)
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -53,8 +53,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
  
  /**
   * audit_caps - audit a capability
- * @profile: profile confining task (NOT NULL)
- * @task: task capability test was performed against (NOT NULL)
+ * @profile: profile being tested for confinement (NOT NULL)
   * @cap: capability tested
   * @error: error code returned by test
   *
@@ -63,8 +62,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
   *
   * Returns: 0 or sa->error on success,  error code on failure
   */
-static int audit_caps(struct aa_profile *profile, struct task_struct *task,
-                     int cap, int error)
+static int audit_caps(struct aa_profile *profile, int cap, int error)
  {
         struct audit_cache *ent;
         int type = AUDIT_APPARMOR_AUTO;
@@ -73,7 +71,6 @@ static int audit_caps(struct aa_profile *profile, struct task_struct *task,
         sa.type = LSM_AUDIT_DATA_CAP;
         sa.aad = &aad;
         sa.u.cap = cap;
-       sa.aad->tsk = task;
         sa.aad->op = OP_CAPABLE;
         sa.aad->error = error;
  
@@ -124,8 +121,7 @@ static int profile_capable(struct aa_profile *profile, int cap)
  
  /**
   * aa_capable - test permission to use capability
- * @task: task doing capability test against (NOT NULL)
- * @profile: profile confining @task (NOT NULL)
+ * @profile: profile being tested against (NOT NULL)
   * @cap: capability to be tested
   * @audit: whether an audit record should be generated
   *
@@ -133,8 +129,7 @@ static int profile_capable(struct aa_profile *profile, int cap)
   *
   * Returns: 0 on success, or else an error code.
   */
-int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
-              int audit)
+int aa_capable(struct aa_profile *profile, int cap, int audit)
  {
         int error = profile_capable(profile, cap);
  
@@ -144,5 +139,5 @@ int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
                 return error;
         }
  
-       return audit_caps(profile, task, cap, error);
+       return audit_caps(profile, cap, error);
  }
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c

index 26c607c971f5656da192698602dbdb85afcfa8f1..452567d3a08e7ccfc5a7e3ae20ae95307554f85c 100644 (file)
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -50,23 +50,21 @@ void aa_free_domain_entries(struct aa_domain *domain)
  
  /**
   * may_change_ptraced_domain - check if can change profile on ptraced task
- * @task: task we want to change profile of   (NOT NULL)
   * @to_profile: profile to change to  (NOT NULL)
   *
- * Check if the task is ptraced and if so if the tracing task is allowed
+ * Check if current is ptraced and if so if the tracing task is allowed
   * to trace the new domain
   *
   * Returns: %0 or error if change not allowed
   */
-static int may_change_ptraced_domain(struct task_struct *task,
-                                    struct aa_profile *to_profile)
+static int may_change_ptraced_domain(struct aa_profile *to_profile)
  {
         struct task_struct *tracer;
         struct aa_profile *tracerp = NULL;
         int error = 0;
  
         rcu_read_lock();
-       tracer = ptrace_parent(task);
+       tracer = ptrace_parent(current);
         if (tracer)
                 /* released below */
                 tracerp = aa_get_task_profile(tracer);
@@ -75,7 +73,7 @@ static int may_change_ptraced_domain(struct task_struct *task,
         if (!tracer || unconfined(tracerp))
                 goto out;
  
-       error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH);
+       error = aa_may_ptrace(tracerp, to_profile, PTRACE_MODE_ATTACH);
  
  out:
         rcu_read_unlock();
@@ -477,7 +475,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
         }
  
         if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
-               error = may_change_ptraced_domain(current, new_profile);
+               error = may_change_ptraced_domain(new_profile);
                 if (error) {
                         aa_put_profile(new_profile);
                         goto audit;
@@ -690,7 +688,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
                         }
                 }
  
-               error = may_change_ptraced_domain(current, hat);
+               error = may_change_ptraced_domain(hat);
                 if (error) {
                         info = "ptraced";
                         error = -EPERM;
@@ -829,7 +827,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
         }
  
         /* check if tracing task is allowed to trace target domain */
-       error = may_change_ptraced_domain(current, target);
+       error = may_change_ptraced_domain(target);
         if (error) {
                 info = "ptrace prevents transition";
                 goto audit;
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h

index 30e8d7687259aaef15defab3883e8e1e52d91c1f..ba3dfd17f23f2671b20512a63c06ba75928ed0fc 100644 (file)
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -109,7 +109,6 @@ struct apparmor_audit_data {
         void *profile;
         const char *name;
         const char *info;
-       struct task_struct *tsk;
         union {
                 void *target;
                 struct {
diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h

index 2e7c9d6a2f3bb3f7b7ab6f4aab3b63a46a56a10a..fc3fa381d8506c5dc249e93809b15292c87ffb84 100644 (file)
--- a/security/apparmor/include/capability.h
+++ b/security/apparmor/include/capability.h
@@ -4,7 +4,7 @@
   * This file contains AppArmor capability mediation definitions.
   *
   * Copyright (C) 1998-2008 Novell/SUSE
- * Copyright 2009-2010 Canonical Ltd.
+ * Copyright 2009-2013 Canonical Ltd.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License as
@@ -38,8 +38,7 @@ struct aa_caps {
  
  extern struct aa_fs_entry aa_fs_entry_caps[];
  
-int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
-              int audit);
+int aa_capable(struct aa_profile *profile, int cap, int audit);
  
  static inline void aa_free_cap_rules(struct aa_caps *caps)
  {
diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h

index aeda0fbc8b2fe2273278edf84903dfe0a1b13925..288ca76e2fb116a6ccb1c366220ef1d918f7b67c 100644 (file)
--- a/security/apparmor/include/ipc.h
+++ b/security/apparmor/include/ipc.h
@@ -19,8 +19,8 @@
  
  struct aa_profile;
  
-int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
-                 struct aa_profile *tracee, unsigned int mode);
+int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee,
+                 unsigned int mode);
  
  int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee,
               unsigned int mode);
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c

index c51d2266587e8b62d09394c0d8320a4b53001569..777ac1c47253ef4f88aa1bc97c0539a9e0b96e83 100644 (file)
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -54,15 +54,14 @@ static int aa_audit_ptrace(struct aa_profile *profile,
  
  /**
   * aa_may_ptrace - test if tracer task can trace the tracee
- * @tracer_task: task who will do the tracing  (NOT NULL)
   * @tracer: profile of the task doing the tracing  (NOT NULL)
   * @tracee: task to be traced
   * @mode: whether PTRACE_MODE_READ || PTRACE_MODE_ATTACH
   *
   * Returns: %0 else error code if permission denied or error
   */
-int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
-                 struct aa_profile *tracee, unsigned int mode)
+int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee,
+                 unsigned int mode)
  {
         /* TODO: currently only based on capability, not extended ptrace
          *       rules,
@@ -72,7 +71,7 @@ int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
         if (unconfined(tracer) || tracer == tracee)
                 return 0;
         /* log this capability request */
-       return aa_capable(tracer_task, tracer, CAP_SYS_PTRACE, 1);
+       return aa_capable(tracer, CAP_SYS_PTRACE, 1);
  }
  
  /**
@@ -101,7 +100,7 @@ int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee,
         if (!unconfined(tracer_p)) {
                 struct aa_profile *tracee_p = aa_get_task_profile(tracee);
  
-               error = aa_may_ptrace(tracer, tracer_p, tracee_p, mode);
+               error = aa_may_ptrace(tracer_p, tracee_p, mode);
                 error = aa_audit_ptrace(tracer_p, tracee_p, error);
  
                 aa_put_profile(tracee_p);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c

index fb99e18123b41b4f049fd98078e88aafccb7729b..4257b7e2796bf16e41db9ddca0e1f7c8e0d08a02 100644 (file)
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -145,7 +145,7 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
         if (!error) {
                 profile = aa_cred_profile(cred);
                 if (!unconfined(profile))
-                       error = aa_capable(current, profile, cap, audit);
+                       error = aa_capable(profile, cap, audit);
         }
         return error;
  }
diff --git a/security/capability.c b/security/capability.c

index dbeb9bc27b24a14b7f546a44843bba2757db77cb..8b4f24ae43381de05af67271edd9a8ddd57c651f 100644 (file)
--- a/security/capability.c
+++ b/security/capability.c
@@ -777,9 +777,15 @@ static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx)
         return 0;
  }
  
-static int cap_xfrm_state_alloc_security(struct xfrm_state *x,
-                                        struct xfrm_user_sec_ctx *sec_ctx,
-                                        u32 secid)
+static int cap_xfrm_state_alloc(struct xfrm_state *x,
+                               struct xfrm_user_sec_ctx *sec_ctx)
+{
+       return 0;
+}
+
+static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                       struct xfrm_sec_ctx *polsec,
+                                       u32 secid)
  {
         return 0;
  }
@@ -1101,7 +1107,8 @@ void __init security_fixup_ops(struct security_operations *ops)
         set_to_cap_if_null(ops, xfrm_policy_clone_security);
         set_to_cap_if_null(ops, xfrm_policy_free_security);
         set_to_cap_if_null(ops, xfrm_policy_delete_security);
-       set_to_cap_if_null(ops, xfrm_state_alloc_security);
+       set_to_cap_if_null(ops, xfrm_state_alloc);
+       set_to_cap_if_null(ops, xfrm_state_alloc_acquire);
         set_to_cap_if_null(ops, xfrm_state_free_security);
         set_to_cap_if_null(ops, xfrm_state_delete_security);
         set_to_cap_if_null(ops, xfrm_policy_lookup);
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c

index 0b759e17a1311abc3a2fb1f6fbf7e554b8f71ce5..77ca965ab684e67e2809dc16f024728d8df41c70 100644 (file)
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -13,7 +13,9 @@
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/err.h>
+#include <linux/sched.h>
  #include <linux/rbtree.h>
+#include <linux/cred.h>
  #include <linux/key-type.h>
  #include <linux/digsig.h>
  
@@ -21,21 +23,29 @@
  
  static struct key *keyring[INTEGRITY_KEYRING_MAX];
  
+#ifdef CONFIG_IMA_TRUSTED_KEYRING
+static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
+       ".evm",
+       ".module",
+       ".ima",
+};
+#else
  static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
         "_evm",
         "_module",
         "_ima",
  };
+#endif
  
  int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
-                                       const char *digest, int digestlen)
+                           const char *digest, int digestlen)
  {
         if (id >= INTEGRITY_KEYRING_MAX)
                 return -EINVAL;
  
         if (!keyring[id]) {
                 keyring[id] =
-                       request_key(&key_type_keyring, keyring_name[id], NULL);
+                   request_key(&key_type_keyring, keyring_name[id], NULL);
                 if (IS_ERR(keyring[id])) {
                         int err = PTR_ERR(keyring[id]);
                         pr_err("no %s keyring: %d\n", keyring_name[id], err);
@@ -44,9 +54,10 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                 }
         }
  
-       switch (sig[0]) {
+       switch (sig[1]) {
         case 1:
-               return digsig_verify(keyring[id], sig, siglen,
+               /* v1 API expect signature without xattr type */
+               return digsig_verify(keyring[id], sig + 1, siglen - 1,
                                      digest, digestlen);
         case 2:
                 return asymmetric_verify(keyring[id], sig, siglen,
@@ -55,3 +66,21 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
  
         return -EOPNOTSUPP;
  }
+
+int integrity_init_keyring(const unsigned int id)
+{
+       const struct cred *cred = current_cred();
+       const struct user_struct *user = cred->user;
+
+       keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
+                                   KGIDT_INIT(0), cred,
+                                   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                    KEY_USR_VIEW | KEY_USR_READ),
+                                   KEY_ALLOC_NOT_IN_QUOTA, user->uid_keyring);
+       if (!IS_ERR(keyring[id]))
+               set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags);
+       else
+               pr_info("Can't allocate %s keyring (%ld)\n",
+                       keyring_name[id], PTR_ERR(keyring[id]));
+       return 0;
+}
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c

index b4754667659da1d6d8896a07f689c4b3f53fc2c8..9eae4809006be6f364ed9e5fe31ccb897381b856 100644 (file)
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -19,17 +19,6 @@
  
  #include "integrity.h"
  
-/*
- * signature format v2 - for using with asymmetric keys
- */
-struct signature_v2_hdr {
-       uint8_t version;        /* signature format version */
-       uint8_t hash_algo;      /* Digest algorithm [enum pkey_hash_algo] */
-       uint32_t keyid;         /* IMA key identifier - not X509/PGP specific*/
-       uint16_t sig_size;      /* signature size */
-       uint8_t sig[0];         /* signature payload */
-} __packed;
-
  /*
   * Request an asymmetric key.
   */
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c

index af9b6852f4e1bf571b55a2010fd6ab0488119cda..336b3ddfe63f5304374f860419cd3f5251bde5b8 100644 (file)
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -123,7 +123,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                 goto out;
         }
  
-       xattr_len = rc - 1;
+       xattr_len = rc;
  
         /* check value type */
         switch (xattr_data->type) {
@@ -143,7 +143,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                 if (rc)
                         break;
                 rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM,
-                                       xattr_data->digest, xattr_len,
+                                       (const char *)xattr_data, xattr_len,
                                         calc.digest, sizeof(calc.digest));
                 if (!rc) {
                         /* we probably want to replace rsa with hmac here */
diff --git a/security/integrity/evm/evm_posix_acl.c b/security/integrity/evm/evm_posix_acl.c

index b1753e98bf9aff919ab2b832de59e0858ae37abb..46408b9e62e876e4f711a5231ab0584aa5c7c1fd 100644 (file)
--- a/security/integrity/evm/evm_posix_acl.c
+++ b/security/integrity/evm/evm_posix_acl.c
@@ -11,8 +11,9 @@
  
  #include <linux/module.h>
  #include <linux/xattr.h>
+#include <linux/evm.h>
  
-int posix_xattr_acl(char *xattr)
+int posix_xattr_acl(const char *xattr)
  {
         int xattr_len = strlen(xattr);
  
diff --git a/security/integrity/iint.c b/security/integrity/iint.c

index 74522dbd10a6e093fe293786f83a10ca8d7361bc..c49d3f14cbec96b49e2b8bedf15b4be570c5a7ef 100644 (file)
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -70,6 +70,8 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode)
  
  static void iint_free(struct integrity_iint_cache *iint)
  {
+       kfree(iint->ima_hash);
+       iint->ima_hash = NULL;
         iint->version = 0;
         iint->flags = 0UL;
         iint->ima_file_status = INTEGRITY_UNKNOWN;
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig

index 39196abaff0d69d7d600ecd53847ba62b8cebed1..dad8d4ca2437fd608b73218993b23c3275c65b8d 100644 (file)
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -9,6 +9,7 @@ config IMA
         select CRYPTO_HMAC
         select CRYPTO_MD5
         select CRYPTO_SHA1
+       select CRYPTO_HASH_INFO
         select TCG_TPM if HAS_IOMEM && !UML
         select TCG_TIS if TCG_TPM && X86
         select TCG_IBMVTPM if TCG_TPM && PPC64
@@ -45,6 +46,69 @@ config IMA_LSM_RULES
         help
           Disabling this option will disregard LSM based policy rules.
  
+choice
+       prompt "Default template"
+       default IMA_NG_TEMPLATE
+       depends on IMA
+       help
+         Select the default IMA measurement template.
+
+         The original 'ima' measurement list template contains a
+         hash, defined as 20 bytes, and a null terminated pathname,
+         limited to 255 characters.  The 'ima-ng' measurement list
+         template permits both larger hash digests and longer
+         pathnames.
+
+       config IMA_TEMPLATE
+               bool "ima"
+       config IMA_NG_TEMPLATE
+               bool "ima-ng (default)"
+       config IMA_SIG_TEMPLATE
+               bool "ima-sig"
+endchoice
+
+config IMA_DEFAULT_TEMPLATE
+       string
+       depends on IMA
+       default "ima" if IMA_TEMPLATE
+       default "ima-ng" if IMA_NG_TEMPLATE
+       default "ima-sig" if IMA_SIG_TEMPLATE
+
+choice
+       prompt "Default integrity hash algorithm"
+       default IMA_DEFAULT_HASH_SHA1
+       depends on IMA
+       help
+          Select the default hash algorithm used for the measurement
+          list, integrity appraisal and audit log.  The compiled default
+          hash algorithm can be overwritten using the kernel command
+          line 'ima_hash=' option.
+
+       config IMA_DEFAULT_HASH_SHA1
+               bool "SHA1 (default)"
+               depends on CRYPTO_SHA1
+
+       config IMA_DEFAULT_HASH_SHA256
+               bool "SHA256"
+               depends on CRYPTO_SHA256 && !IMA_TEMPLATE
+
+       config IMA_DEFAULT_HASH_SHA512
+               bool "SHA512"
+               depends on CRYPTO_SHA512 && !IMA_TEMPLATE
+
+       config IMA_DEFAULT_HASH_WP512
+               bool "WP512"
+               depends on CRYPTO_WP512 && !IMA_TEMPLATE
+endchoice
+
+config IMA_DEFAULT_HASH
+       string
+       depends on IMA
+       default "sha1" if IMA_DEFAULT_HASH_SHA1
+       default "sha256" if IMA_DEFAULT_HASH_SHA256
+       default "sha512" if IMA_DEFAULT_HASH_SHA512
+       default "wp512" if IMA_DEFAULT_HASH_WP512
+
  config IMA_APPRAISE
         bool "Appraise integrity measurements"
         depends on IMA
@@ -59,3 +123,11 @@ config IMA_APPRAISE
           For more information on integrity appraisal refer to:
           <http://linux-ima.sourceforge.net>
           If unsure, say N.
+
+config IMA_TRUSTED_KEYRING
+       bool "Require all keys on the _ima keyring be signed"
+       depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING
+       default y
+       help
+          This option requires that all keys added to the _ima
+          keyring be signed by a key on the system trusted keyring.
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile

index 56dfee7cbf61c6605adf103dbf91393b33fe9256..d79263d2fdbfd0098666f541db32552c784688ce 100644 (file)
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -6,5 +6,5 @@
  obj-$(CONFIG_IMA) += ima.o
  
  ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
-        ima_policy.o
+        ima_policy.o ima_template.o ima_template_lib.o
  ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h

index b3dd616560f72054e13ddeaedaa39017585c2e5c..bf03c6a16cc83ace2f47a3ea7571ef79a63ed1d9 100644 (file)
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -36,23 +36,48 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };
  #define IMA_HASH_BITS 9
  #define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS)
  
+#define IMA_TEMPLATE_FIELD_ID_MAX_LEN  16
+#define IMA_TEMPLATE_NUM_FIELDS_MAX    15
+
+#define IMA_TEMPLATE_IMA_NAME "ima"
+#define IMA_TEMPLATE_IMA_FMT "d|n"
+
  /* set during initialization */
  extern int ima_initialized;
  extern int ima_used_chip;
-extern char *ima_hash;
+extern int ima_hash_algo;
  extern int ima_appraise;
  
-/* IMA inode template definition */
-struct ima_template_data {
-       u8 digest[IMA_DIGEST_SIZE];     /* sha1/md5 measurement hash */
-       char file_name[IMA_EVENT_NAME_LEN_MAX + 1];     /* name + \0 */
+/* IMA template field data definition */
+struct ima_field_data {
+       u8 *data;
+       u32 len;
+};
+
+/* IMA template field definition */
+struct ima_template_field {
+       const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN];
+       int (*field_init) (struct integrity_iint_cache *iint, struct file *file,
+                          const unsigned char *filename,
+                          struct evm_ima_xattr_data *xattr_value,
+                          int xattr_len, struct ima_field_data *field_data);
+       void (*field_show) (struct seq_file *m, enum ima_show_type show,
+                           struct ima_field_data *field_data);
+};
+
+/* IMA template descriptor definition */
+struct ima_template_desc {
+       char *name;
+       char *fmt;
+       int num_fields;
+       struct ima_template_field **fields;
  };
  
  struct ima_template_entry {
-       u8 digest[IMA_DIGEST_SIZE];     /* sha1 or md5 measurement hash */
-       const char *template_name;
-       int template_len;
-       struct ima_template_data template;
+       u8 digest[TPM_DIGEST_SIZE];     /* sha1 or md5 measurement hash */
+       struct ima_template_desc *template_desc; /* template descriptor */
+       u32 template_data_len;
+       struct ima_field_data template_data[0]; /* template related data */
  };
  
  struct ima_queue_entry {
@@ -69,13 +94,21 @@ int ima_fs_init(void);
  void ima_fs_cleanup(void);
  int ima_inode_alloc(struct inode *inode);
  int ima_add_template_entry(struct ima_template_entry *entry, int violation,
-                          const char *op, struct inode *inode);
-int ima_calc_file_hash(struct file *file, char *digest);
-int ima_calc_buffer_hash(const void *data, int len, char *digest);
-int ima_calc_boot_aggregate(char *digest);
-void ima_add_violation(struct inode *inode, const unsigned char *filename,
+                          const char *op, struct inode *inode,
+                          const unsigned char *filename);
+int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash);
+int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields,
+                             struct ima_digest_data *hash);
+int __init ima_calc_boot_aggregate(struct ima_digest_data *hash);
+void ima_add_violation(struct file *file, const unsigned char *filename,
                        const char *op, const char *cause);
  int ima_init_crypto(void);
+void ima_putc(struct seq_file *m, void *data, int datalen);
+void ima_print_digest(struct seq_file *m, u8 *digest, int size);
+struct ima_template_desc *ima_template_desc_current(void);
+int ima_init_template(void);
+
+int ima_init_template(void);
  
  /*
   * used to protect h_table and sha_table
@@ -98,14 +131,21 @@ static inline unsigned long ima_hash_key(u8 *digest)
  int ima_get_action(struct inode *inode, int mask, int function);
  int ima_must_measure(struct inode *inode, int mask, int function);
  int ima_collect_measurement(struct integrity_iint_cache *iint,
-                           struct file *file);
+                           struct file *file,
+                           struct evm_ima_xattr_data **xattr_value,
+                           int *xattr_len);
  void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
-                          const unsigned char *filename);
+                          const unsigned char *filename,
+                          struct evm_ima_xattr_data *xattr_value,
+                          int xattr_len);
  void ima_audit_measurement(struct integrity_iint_cache *iint,
                            const unsigned char *filename);
+int ima_alloc_init_template(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_template_entry **entry);
  int ima_store_template(struct ima_template_entry *entry, int violation,
-                      struct inode *inode);
-void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show);
+                      struct inode *inode, const unsigned char *filename);
  const char *ima_d_path(struct path *path, char **pathbuf);
  
  /* rbtree tree calls to lookup, insert, delete
@@ -131,17 +171,25 @@ void ima_delete_rules(void);
  
  #ifdef CONFIG_IMA_APPRAISE
  int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
-                            struct file *file, const unsigned char *filename);
+                            struct file *file, const unsigned char *filename,
+                            struct evm_ima_xattr_data *xattr_value,
+                            int xattr_len);
  int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
  void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
  enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
                                            int func);
+void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_digest_data *hash);
+int ima_read_xattr(struct dentry *dentry,
+                  struct evm_ima_xattr_data **xattr_value);
  
  #else
  static inline int ima_appraise_measurement(int func,
                                            struct integrity_iint_cache *iint,
                                            struct file *file,
-                                          const unsigned char *filename)
+                                          const unsigned char *filename,
+                                          struct evm_ima_xattr_data *xattr_value,
+                                          int xattr_len)
  {
         return INTEGRITY_UNKNOWN;
  }
@@ -162,6 +210,19 @@ static inline enum integrity_status ima_get_cache_status(struct integrity_iint_c
  {
         return INTEGRITY_UNKNOWN;
  }
+
+static inline void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
+                                    int xattr_len,
+                                    struct ima_digest_data *hash)
+{
+}
+
+static inline int ima_read_xattr(struct dentry *dentry,
+                                struct evm_ima_xattr_data **xattr_value)
+{
+       return 0;
+}
+
  #endif
  
  /* LSM based policy rules require audit */
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c

index 1c03e8f1e0e125cc948854e033d689a2aab22303..0e7540863fc299687877ae4591961b13b60fcaa9 100644 (file)
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -18,9 +18,46 @@
  #include <linux/fs.h>
  #include <linux/xattr.h>
  #include <linux/evm.h>
+#include <crypto/hash_info.h>
  #include "ima.h"
  
-static const char *IMA_TEMPLATE_NAME = "ima";
+/*
+ * ima_alloc_init_template - create and initialize a new template entry
+ */
+int ima_alloc_init_template(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_template_entry **entry)
+{
+       struct ima_template_desc *template_desc = ima_template_desc_current();
+       int i, result = 0;
+
+       *entry = kzalloc(sizeof(**entry) + template_desc->num_fields *
+                        sizeof(struct ima_field_data), GFP_NOFS);
+       if (!*entry)
+               return -ENOMEM;
+
+       for (i = 0; i < template_desc->num_fields; i++) {
+               struct ima_template_field *field = template_desc->fields[i];
+               u32 len;
+
+               result = field->field_init(iint, file, filename,
+                                          xattr_value, xattr_len,
+                                          &((*entry)->template_data[i]));
+               if (result != 0)
+                       goto out;
+
+               len = (*entry)->template_data[i].len;
+               (*entry)->template_data_len += sizeof(len);
+               (*entry)->template_data_len += len;
+       }
+       (*entry)->template_desc = template_desc;
+       return 0;
+out:
+       kfree(*entry);
+       *entry = NULL;
+       return result;
+}
  
  /*
   * ima_store_template - store ima template measurements
@@ -39,28 +76,34 @@ static const char *IMA_TEMPLATE_NAME = "ima";
   * Returns 0 on success, error code otherwise
   */
  int ima_store_template(struct ima_template_entry *entry,
-                      int violation, struct inode *inode)
+                      int violation, struct inode *inode,
+                      const unsigned char *filename)
  {
         const char *op = "add_template_measure";
         const char *audit_cause = "hashing_error";
+       char *template_name = entry->template_desc->name;
         int result;
-
-       memset(entry->digest, 0, sizeof(entry->digest));
-       entry->template_name = IMA_TEMPLATE_NAME;
-       entry->template_len = sizeof(entry->template);
+       struct {
+               struct ima_digest_data hdr;
+               char digest[TPM_DIGEST_SIZE];
+       } hash;
  
         if (!violation) {
-               result = ima_calc_buffer_hash(&entry->template,
-                                               entry->template_len,
-                                               entry->digest);
+               int num_fields = entry->template_desc->num_fields;
+
+               /* this function uses default algo */
+               hash.hdr.algo = HASH_ALGO_SHA1;
+               result = ima_calc_field_array_hash(&entry->template_data[0],
+                                                  num_fields, &hash.hdr);
                 if (result < 0) {
                         integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-                                           entry->template_name, op,
+                                           template_name, op,
                                             audit_cause, result, 0);
                         return result;
                 }
+               memcpy(entry->digest, hash.hdr.digest, hash.hdr.length);
         }
-       result = ima_add_template_entry(entry, violation, op, inode);
+       result = ima_add_template_entry(entry, violation, op, inode, filename);
         return result;
  }
  
@@ -71,24 +114,24 @@ int ima_store_template(struct ima_template_entry *entry,
   * By extending the PCR with 0xFF's instead of with zeroes, the PCR
   * value is invalidated.
   */
-void ima_add_violation(struct inode *inode, const unsigned char *filename,
+void ima_add_violation(struct file *file, const unsigned char *filename,
                        const char *op, const char *cause)
  {
         struct ima_template_entry *entry;
+       struct inode *inode = file->f_dentry->d_inode;
         int violation = 1;
         int result;
  
         /* can overflow, only indicator */
         atomic_long_inc(&ima_htable.violations);
  
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry) {
+       result = ima_alloc_init_template(NULL, file, filename,
+                                        NULL, 0, &entry);
+       if (result < 0) {
                 result = -ENOMEM;
                 goto err_out;
         }
-       memset(&entry->template, 0, sizeof(entry->template));
-       strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX);
-       result = ima_store_template(entry, violation, inode);
+       result = ima_store_template(entry, violation, inode, filename);
         if (result < 0)
                 kfree(entry);
  err_out:
@@ -138,20 +181,42 @@ int ima_must_measure(struct inode *inode, int mask, int function)
   * Return 0 on success, error code otherwise
   */
  int ima_collect_measurement(struct integrity_iint_cache *iint,
-                           struct file *file)
+                           struct file *file,
+                           struct evm_ima_xattr_data **xattr_value,
+                           int *xattr_len)
  {
         struct inode *inode = file_inode(file);
         const char *filename = file->f_dentry->d_name.name;
         int result = 0;
+       struct {
+               struct ima_digest_data hdr;
+               char digest[IMA_MAX_DIGEST_SIZE];
+       } hash;
+
+       if (xattr_value)
+               *xattr_len = ima_read_xattr(file->f_dentry, xattr_value);
  
         if (!(iint->flags & IMA_COLLECTED)) {
                 u64 i_version = file_inode(file)->i_version;
  
-               iint->ima_xattr.type = IMA_XATTR_DIGEST;
-               result = ima_calc_file_hash(file, iint->ima_xattr.digest);
+               /* use default hash algorithm */
+               hash.hdr.algo = ima_hash_algo;
+
+               if (xattr_value)
+                       ima_get_hash_algo(*xattr_value, *xattr_len, &hash.hdr);
+
+               result = ima_calc_file_hash(file, &hash.hdr);
                 if (!result) {
-                       iint->version = i_version;
-                       iint->flags |= IMA_COLLECTED;
+                       int length = sizeof(hash.hdr) + hash.hdr.length;
+                       void *tmpbuf = krealloc(iint->ima_hash, length,
+                                               GFP_NOFS);
+                       if (tmpbuf) {
+                               iint->ima_hash = tmpbuf;
+                               memcpy(iint->ima_hash, &hash, length);
+                               iint->version = i_version;
+                               iint->flags |= IMA_COLLECTED;
+                       } else
+                               result = -ENOMEM;
                 }
         }
         if (result)
@@ -177,7 +242,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
   * Must be called with iint->mutex held.
   */
  void ima_store_measurement(struct integrity_iint_cache *iint,
-                          struct file *file, const unsigned char *filename)
+                          struct file *file, const unsigned char *filename,
+                          struct evm_ima_xattr_data *xattr_value,
+                          int xattr_len)
  {
         const char *op = "add_template_measure";
         const char *audit_cause = "ENOMEM";
@@ -189,19 +256,15 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
         if (iint->flags & IMA_MEASURED)
                 return;
  
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry) {
+       result = ima_alloc_init_template(iint, file, filename,
+                                        xattr_value, xattr_len, &entry);
+       if (result < 0) {
                 integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
                                     op, audit_cause, result, 0);
                 return;
         }
-       memset(&entry->template, 0, sizeof(entry->template));
-       memcpy(entry->template.digest, iint->ima_xattr.digest, IMA_DIGEST_SIZE);
-       strcpy(entry->template.file_name,
-              (strlen(filename) > IMA_EVENT_NAME_LEN_MAX) ?
-              file->f_dentry->d_name.name : filename);
  
-       result = ima_store_template(entry, violation, inode);
+       result = ima_store_template(entry, violation, inode, filename);
         if (!result || result == -EEXIST)
                 iint->flags |= IMA_MEASURED;
         if (result < 0)
@@ -212,14 +275,16 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
                            const unsigned char *filename)
  {
         struct audit_buffer *ab;
-       char hash[(IMA_DIGEST_SIZE * 2) + 1];
+       char hash[(iint->ima_hash->length * 2) + 1];
+       const char *algo_name = hash_algo_name[iint->ima_hash->algo];
+       char algo_hash[sizeof(hash) + strlen(algo_name) + 2];
         int i;
  
         if (iint->flags & IMA_AUDITED)
                 return;
  
-       for (i = 0; i < IMA_DIGEST_SIZE; i++)
-               hex_byte_pack(hash + (i * 2), iint->ima_xattr.digest[i]);
+       for (i = 0; i < iint->ima_hash->length; i++)
+               hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
         hash[i * 2] = '\0';
  
         ab = audit_log_start(current->audit_context, GFP_KERNEL,
@@ -230,7 +295,8 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
         audit_log_format(ab, "file=");
         audit_log_untrustedstring(ab, filename);
         audit_log_format(ab, " hash=");
-       audit_log_untrustedstring(ab, hash);
+       snprintf(algo_hash, sizeof(algo_hash), "%s:%s", algo_name, hash);
+       audit_log_untrustedstring(ab, algo_hash);
  
         audit_log_task_info(ab, current);
         audit_log_end(ab);
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c

index 2d4becab8918053a6d3520dc32527a33d92a3717..46353ee517f6f321a738b067584d7e62dd8872fe 100644 (file)
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -15,6 +15,7 @@
  #include <linux/magic.h>
  #include <linux/ima.h>
  #include <linux/evm.h>
+#include <crypto/hash_info.h>
  
  #include "ima.h"
  
@@ -43,19 +44,31 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
  }
  
  static int ima_fix_xattr(struct dentry *dentry,
-                         struct integrity_iint_cache *iint)
+                        struct integrity_iint_cache *iint)
  {
-       iint->ima_xattr.type = IMA_XATTR_DIGEST;
-       return __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
-                                    (u8 *)&iint->ima_xattr,
-                                     sizeof(iint->ima_xattr), 0);
+       int rc, offset;
+       u8 algo = iint->ima_hash->algo;
+
+       if (algo <= HASH_ALGO_SHA1) {
+               offset = 1;
+               iint->ima_hash->xattr.sha1.type = IMA_XATTR_DIGEST;
+       } else {
+               offset = 0;
+               iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG;
+               iint->ima_hash->xattr.ng.algo = algo;
+       }
+       rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
+                                  &iint->ima_hash->xattr.data[offset],
+                                  (sizeof(iint->ima_hash->xattr) - offset) +
+                                  iint->ima_hash->length, 0);
+       return rc;
  }
  
  /* Return specific func appraised cached result */
  enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
                                            int func)
  {
-       switch(func) {
+       switch (func) {
         case MMAP_CHECK:
                 return iint->ima_mmap_status;
         case BPRM_CHECK:
@@ -71,7 +84,7 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
  static void ima_set_cache_status(struct integrity_iint_cache *iint,
                                  int func, enum integrity_status status)
  {
-       switch(func) {
+       switch (func) {
         case MMAP_CHECK:
                 iint->ima_mmap_status = status;
                 break;
@@ -90,7 +103,7 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint,
  
  static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
  {
-       switch(func) {
+       switch (func) {
         case MMAP_CHECK:
                 iint->flags |= (IMA_MMAP_APPRAISED | IMA_APPRAISED);
                 break;
@@ -107,6 +120,50 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
         }
  }
  
+void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_digest_data *hash)
+{
+       struct signature_v2_hdr *sig;
+
+       if (!xattr_value || xattr_len < 2)
+               return;
+
+       switch (xattr_value->type) {
+       case EVM_IMA_XATTR_DIGSIG:
+               sig = (typeof(sig))xattr_value;
+               if (sig->version != 2 || xattr_len <= sizeof(*sig))
+                       return;
+               hash->algo = sig->hash_algo;
+               break;
+       case IMA_XATTR_DIGEST_NG:
+               hash->algo = xattr_value->digest[0];
+               break;
+       case IMA_XATTR_DIGEST:
+               /* this is for backward compatibility */
+               if (xattr_len == 21) {
+                       unsigned int zero = 0;
+                       if (!memcmp(&xattr_value->digest[16], &zero, 4))
+                               hash->algo = HASH_ALGO_MD5;
+                       else
+                               hash->algo = HASH_ALGO_SHA1;
+               } else if (xattr_len == 17)
+                       hash->algo = HASH_ALGO_MD5;
+               break;
+       }
+}
+
+int ima_read_xattr(struct dentry *dentry,
+                  struct evm_ima_xattr_data **xattr_value)
+{
+       struct inode *inode = dentry->d_inode;
+
+       if (!inode->i_op->getxattr)
+               return 0;
+
+       return vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)xattr_value,
+                                 0, GFP_NOFS);
+}
+
  /*
   * ima_appraise_measurement - appraise file measurement
   *
@@ -116,23 +173,22 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
   * Return 0 on success, error code otherwise
   */
  int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
-                            struct file *file, const unsigned char *filename)
+                            struct file *file, const unsigned char *filename,
+                            struct evm_ima_xattr_data *xattr_value,
+                            int xattr_len)
  {
         struct dentry *dentry = file->f_dentry;
         struct inode *inode = dentry->d_inode;
-       struct evm_ima_xattr_data *xattr_value = NULL;
         enum integrity_status status = INTEGRITY_UNKNOWN;
         const char *op = "appraise_data";
         char *cause = "unknown";
-       int rc;
+       int rc = xattr_len, hash_start = 0;
  
         if (!ima_appraise)
                 return 0;
         if (!inode->i_op->getxattr)
                 return INTEGRITY_UNKNOWN;
  
-       rc = vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)&xattr_value,
-                               0, GFP_NOFS);
         if (rc <= 0) {
                 if (rc && rc != -ENODATA)
                         goto out;
@@ -153,14 +209,25 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
                 goto out;
         }
         switch (xattr_value->type) {
+       case IMA_XATTR_DIGEST_NG:
+               /* first byte contains algorithm id */
+               hash_start = 1;
         case IMA_XATTR_DIGEST:
                 if (iint->flags & IMA_DIGSIG_REQUIRED) {
                         cause = "IMA signature required";
                         status = INTEGRITY_FAIL;
                         break;
                 }
-               rc = memcmp(xattr_value->digest, iint->ima_xattr.digest,
-                           IMA_DIGEST_SIZE);
+               if (xattr_len - sizeof(xattr_value->type) - hash_start >=
+                               iint->ima_hash->length)
+                       /* xattr length may be longer. md5 hash in previous
+                          version occupied 20 bytes in xattr, instead of 16
+                        */
+                       rc = memcmp(&xattr_value->digest[hash_start],
+                                   iint->ima_hash->digest,
+                                   iint->ima_hash->length);
+               else
+                       rc = -EINVAL;
                 if (rc) {
                         cause = "invalid-hash";
                         status = INTEGRITY_FAIL;
@@ -171,9 +238,9 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
         case EVM_IMA_XATTR_DIGSIG:
                 iint->flags |= IMA_DIGSIG;
                 rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
-                                            xattr_value->digest, rc - 1,
-                                            iint->ima_xattr.digest,
-                                            IMA_DIGEST_SIZE);
+                                            (const char *)xattr_value, rc,
+                                            iint->ima_hash->digest,
+                                            iint->ima_hash->length);
                 if (rc == -EOPNOTSUPP) {
                         status = INTEGRITY_UNKNOWN;
                 } else if (rc) {
@@ -203,7 +270,6 @@ out:
                 ima_cache_flags(iint, func);
         }
         ima_set_cache_status(iint, func, status);
-       kfree(xattr_value);
         return status;
  }
  
@@ -219,7 +285,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
         if (iint->flags & IMA_DIGSIG)
                 return;
  
-       rc = ima_collect_measurement(iint, file);
+       rc = ima_collect_measurement(iint, file, NULL, NULL);
         if (rc < 0)
                 return;
  
@@ -315,3 +381,14 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
         }
         return result;
  }
+
+#ifdef CONFIG_IMA_TRUSTED_KEYRING
+static int __init init_ima_keyring(void)
+{
+       int ret;
+
+       ret = integrity_init_keyring(INTEGRITY_KEYRING_IMA);
+       return 0;
+}
+late_initcall(init_ima_keyring);
+#endif
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c

index a02e0791cf15c7add98bd922ebcc08cd3db0f725..676e0292dfecf6744b720b8a7e103415b924cc37 100644 (file)
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -20,6 +20,7 @@
  #include <linux/err.h>
  #include <linux/slab.h>
  #include <crypto/hash.h>
+#include <crypto/hash_info.h>
  #include "ima.h"
  
  static struct crypto_shash *ima_shash_tfm;
@@ -28,31 +29,58 @@ int ima_init_crypto(void)
  {
         long rc;
  
-       ima_shash_tfm = crypto_alloc_shash(ima_hash, 0, 0);
+       ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0);
         if (IS_ERR(ima_shash_tfm)) {
                 rc = PTR_ERR(ima_shash_tfm);
-               pr_err("Can not allocate %s (reason: %ld)\n", ima_hash, rc);
+               pr_err("Can not allocate %s (reason: %ld)\n",
+                      hash_algo_name[ima_hash_algo], rc);
                 return rc;
         }
         return 0;
  }
  
+static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo)
+{
+       struct crypto_shash *tfm = ima_shash_tfm;
+       int rc;
+
+       if (algo != ima_hash_algo && algo < HASH_ALGO__LAST) {
+               tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0);
+               if (IS_ERR(tfm)) {
+                       rc = PTR_ERR(tfm);
+                       pr_err("Can not allocate %s (reason: %d)\n",
+                              hash_algo_name[algo], rc);
+               }
+       }
+       return tfm;
+}
+
+static void ima_free_tfm(struct crypto_shash *tfm)
+{
+       if (tfm != ima_shash_tfm)
+               crypto_free_shash(tfm);
+}
+
  /*
   * Calculate the MD5/SHA1 file digest
   */
-int ima_calc_file_hash(struct file *file, char *digest)
+static int ima_calc_file_hash_tfm(struct file *file,
+                                 struct ima_digest_data *hash,
+                                 struct crypto_shash *tfm)
  {
         loff_t i_size, offset = 0;
         char *rbuf;
         int rc, read = 0;
         struct {
                 struct shash_desc shash;
-               char ctx[crypto_shash_descsize(ima_shash_tfm)];
+               char ctx[crypto_shash_descsize(tfm)];
         } desc;
  
-       desc.shash.tfm = ima_shash_tfm;
+       desc.shash.tfm = tfm;
         desc.shash.flags = 0;
  
+       hash->length = crypto_shash_digestsize(tfm);
+
         rc = crypto_shash_init(&desc.shash);
         if (rc != 0)
                 return rc;
@@ -85,27 +113,83 @@ int ima_calc_file_hash(struct file *file, char *digest)
         }
         kfree(rbuf);
         if (!rc)
-               rc = crypto_shash_final(&desc.shash, digest);
+               rc = crypto_shash_final(&desc.shash, hash->digest);
         if (read)
                 file->f_mode &= ~FMODE_READ;
  out:
         return rc;
  }
  
+int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
+{
+       struct crypto_shash *tfm;
+       int rc;
+
+       tfm = ima_alloc_tfm(hash->algo);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       rc = ima_calc_file_hash_tfm(file, hash, tfm);
+
+       ima_free_tfm(tfm);
+
+       return rc;
+}
+
  /*
- * Calculate the hash of a given buffer
+ * Calculate the hash of template data
   */
-int ima_calc_buffer_hash(const void *data, int len, char *digest)
+static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data,
+                                        int num_fields,
+                                        struct ima_digest_data *hash,
+                                        struct crypto_shash *tfm)
  {
         struct {
                 struct shash_desc shash;
-               char ctx[crypto_shash_descsize(ima_shash_tfm)];
+               char ctx[crypto_shash_descsize(tfm)];
         } desc;
+       int rc, i;
  
-       desc.shash.tfm = ima_shash_tfm;
+       desc.shash.tfm = tfm;
         desc.shash.flags = 0;
  
-       return crypto_shash_digest(&desc.shash, data, len, digest);
+       hash->length = crypto_shash_digestsize(tfm);
+
+       rc = crypto_shash_init(&desc.shash);
+       if (rc != 0)
+               return rc;
+
+       for (i = 0; i < num_fields; i++) {
+               rc = crypto_shash_update(&desc.shash,
+                                        (const u8 *) &field_data[i].len,
+                                        sizeof(field_data[i].len));
+               rc = crypto_shash_update(&desc.shash, field_data[i].data,
+                                        field_data[i].len);
+               if (rc)
+                       break;
+       }
+
+       if (!rc)
+               rc = crypto_shash_final(&desc.shash, hash->digest);
+
+       return rc;
+}
+
+int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields,
+                             struct ima_digest_data *hash)
+{
+       struct crypto_shash *tfm;
+       int rc;
+
+       tfm = ima_alloc_tfm(hash->algo);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       rc = ima_calc_field_array_hash_tfm(field_data, num_fields, hash, tfm);
+
+       ima_free_tfm(tfm);
+
+       return rc;
  }
  
  static void __init ima_pcrread(int idx, u8 *pcr)
@@ -120,16 +204,17 @@ static void __init ima_pcrread(int idx, u8 *pcr)
  /*
   * Calculate the boot aggregate hash
   */
-int __init ima_calc_boot_aggregate(char *digest)
+static int __init ima_calc_boot_aggregate_tfm(char *digest,
+                                             struct crypto_shash *tfm)
  {
-       u8 pcr_i[IMA_DIGEST_SIZE];
+       u8 pcr_i[TPM_DIGEST_SIZE];
         int rc, i;
         struct {
                 struct shash_desc shash;
-               char ctx[crypto_shash_descsize(ima_shash_tfm)];
+               char ctx[crypto_shash_descsize(tfm)];
         } desc;
  
-       desc.shash.tfm = ima_shash_tfm;
+       desc.shash.tfm = tfm;
         desc.shash.flags = 0;
  
         rc = crypto_shash_init(&desc.shash);
@@ -140,9 +225,26 @@ int __init ima_calc_boot_aggregate(char *digest)
         for (i = TPM_PCR0; i < TPM_PCR8; i++) {
                 ima_pcrread(i, pcr_i);
                 /* now accumulate with current aggregate */
-               rc = crypto_shash_update(&desc.shash, pcr_i, IMA_DIGEST_SIZE);
+               rc = crypto_shash_update(&desc.shash, pcr_i, TPM_DIGEST_SIZE);
         }
         if (!rc)
                 crypto_shash_final(&desc.shash, digest);
         return rc;
  }
+
+int __init ima_calc_boot_aggregate(struct ima_digest_data *hash)
+{
+       struct crypto_shash *tfm;
+       int rc;
+
+       tfm = ima_alloc_tfm(hash->algo);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       hash->length = crypto_shash_digestsize(tfm);
+       rc = ima_calc_boot_aggregate_tfm(hash->digest, tfm);
+
+       ima_free_tfm(tfm);
+
+       return rc;
+}
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c

index 38477c9c3415cd9af47ec402e9694056a9ca3c22..d47a7c86a21d0d94f6c41933fd18311e553dafce 100644 (file)
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -88,8 +88,7 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
          * against concurrent list-extension
          */
         rcu_read_lock();
-       qe = list_entry_rcu(qe->later.next,
-                           struct ima_queue_entry, later);
+       qe = list_entry_rcu(qe->later.next, struct ima_queue_entry, later);
         rcu_read_unlock();
         (*pos)++;
  
@@ -100,7 +99,7 @@ static void ima_measurements_stop(struct seq_file *m, void *v)
  {
  }
  
-static void ima_putc(struct seq_file *m, void *data, int datalen)
+void ima_putc(struct seq_file *m, void *data, int datalen)
  {
         while (datalen--)
                 seq_putc(m, *(char *)data++);
@@ -111,6 +110,7 @@ static void ima_putc(struct seq_file *m, void *data, int datalen)
   *       char[20]=template digest
   *       32bit-le=template name size
   *       char[n]=template name
+ *       [eventdata length]
   *       eventdata[n]=template specific data
   */
  static int ima_measurements_show(struct seq_file *m, void *v)
@@ -120,6 +120,7 @@ static int ima_measurements_show(struct seq_file *m, void *v)
         struct ima_template_entry *e;
         int namelen;
         u32 pcr = CONFIG_IMA_MEASURE_PCR_IDX;
+       int i;
  
         /* get entry */
         e = qe->entry;
@@ -134,18 +135,25 @@ static int ima_measurements_show(struct seq_file *m, void *v)
         ima_putc(m, &pcr, sizeof pcr);
  
         /* 2nd: template digest */
-       ima_putc(m, e->digest, IMA_DIGEST_SIZE);
+       ima_putc(m, e->digest, TPM_DIGEST_SIZE);
  
         /* 3rd: template name size */
-       namelen = strlen(e->template_name);
+       namelen = strlen(e->template_desc->name);
         ima_putc(m, &namelen, sizeof namelen);
  
         /* 4th:  template name */
-       ima_putc(m, (void *)e->template_name, namelen);
+       ima_putc(m, e->template_desc->name, namelen);
+
+       /* 5th:  template length (except for 'ima' template) */
+       if (strcmp(e->template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
+               ima_putc(m, &e->template_data_len,
+                        sizeof(e->template_data_len));
  
-       /* 5th:  template specific data */
-       ima_template_show(m, (struct ima_template_data *)&e->template,
-                         IMA_SHOW_BINARY);
+       /* 6th:  template specific data */
+       for (i = 0; i < e->template_desc->num_fields; i++) {
+               e->template_desc->fields[i]->field_show(m, IMA_SHOW_BINARY,
+                                                       &e->template_data[i]);
+       }
         return 0;
  }
  
@@ -168,41 +176,21 @@ static const struct file_operations ima_measurements_ops = {
         .release = seq_release,
  };
  
-static void ima_print_digest(struct seq_file *m, u8 *digest)
+void ima_print_digest(struct seq_file *m, u8 *digest, int size)
  {
         int i;
  
-       for (i = 0; i < IMA_DIGEST_SIZE; i++)
+       for (i = 0; i < size; i++)
                 seq_printf(m, "%02x", *(digest + i));
  }
  
-void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show)
-{
-       struct ima_template_data *entry = e;
-       int namelen;
-
-       switch (show) {
-       case IMA_SHOW_ASCII:
-               ima_print_digest(m, entry->digest);
-               seq_printf(m, " %s\n", entry->file_name);
-               break;
-       case IMA_SHOW_BINARY:
-               ima_putc(m, entry->digest, IMA_DIGEST_SIZE);
-
-               namelen = strlen(entry->file_name);
-               ima_putc(m, &namelen, sizeof namelen);
-               ima_putc(m, entry->file_name, namelen);
-       default:
-               break;
-       }
-}
-
  /* print in ascii */
  static int ima_ascii_measurements_show(struct seq_file *m, void *v)
  {
         /* the list never shrinks, so we don't need a lock here */
         struct ima_queue_entry *qe = v;
         struct ima_template_entry *e;
+       int i;
  
         /* get entry */
         e = qe->entry;
@@ -213,14 +201,21 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
         seq_printf(m, "%2d ", CONFIG_IMA_MEASURE_PCR_IDX);
  
         /* 2nd: SHA1 template hash */
-       ima_print_digest(m, e->digest);
+       ima_print_digest(m, e->digest, TPM_DIGEST_SIZE);
  
         /* 3th:  template name */
-       seq_printf(m, " %s ", e->template_name);
+       seq_printf(m, " %s", e->template_desc->name);
  
         /* 4th:  template specific data */
-       ima_template_show(m, (struct ima_template_data *)&e->template,
-                         IMA_SHOW_ASCII);
+       for (i = 0; i < e->template_desc->num_fields; i++) {
+               seq_puts(m, " ");
+               if (e->template_data[i].len == 0)
+                       continue;
+
+               e->template_desc->fields[i]->field_show(m, IMA_SHOW_ASCII,
+                                                       &e->template_data[i]);
+       }
+       seq_puts(m, "\n");
         return 0;
  }
  
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c

index 162ea723db3df5f07a2dd23f4bcc66e1af03a898..15f34bd40abed1530216be53c59658b6245d280b 100644 (file)
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -18,6 +18,7 @@
  #include <linux/scatterlist.h>
  #include <linux/slab.h>
  #include <linux/err.h>
+#include <crypto/hash_info.h>
  #include "ima.h"
  
  /* name for boot aggregate entry */
@@ -42,28 +43,38 @@ int ima_used_chip;
  static void __init ima_add_boot_aggregate(void)
  {
         struct ima_template_entry *entry;
+       struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
         const char *op = "add_boot_aggregate";
         const char *audit_cause = "ENOMEM";
         int result = -ENOMEM;
-       int violation = 1;
+       int violation = 0;
+       struct {
+               struct ima_digest_data hdr;
+               char digest[TPM_DIGEST_SIZE];
+       } hash;
  
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry)
-               goto err_out;
+       memset(iint, 0, sizeof(*iint));
+       memset(&hash, 0, sizeof(hash));
+       iint->ima_hash = &hash.hdr;
+       iint->ima_hash->algo = HASH_ALGO_SHA1;
+       iint->ima_hash->length = SHA1_DIGEST_SIZE;
  
-       memset(&entry->template, 0, sizeof(entry->template));
-       strncpy(entry->template.file_name, boot_aggregate_name,
-               IMA_EVENT_NAME_LEN_MAX);
         if (ima_used_chip) {
-               violation = 0;
-               result = ima_calc_boot_aggregate(entry->template.digest);
+               result = ima_calc_boot_aggregate(&hash.hdr);
                 if (result < 0) {
                         audit_cause = "hashing_error";
                         kfree(entry);
                         goto err_out;
                 }
         }
-       result = ima_store_template(entry, violation, NULL);
+
+       result = ima_alloc_init_template(iint, NULL, boot_aggregate_name,
+                                        NULL, 0, &entry);
+       if (result < 0)
+               return;
+
+       result = ima_store_template(entry, violation, NULL,
+                                   boot_aggregate_name);
         if (result < 0)
                 kfree(entry);
         return;
@@ -74,7 +85,7 @@ err_out:
  
  int __init ima_init(void)
  {
-       u8 pcr_i[IMA_DIGEST_SIZE];
+       u8 pcr_i[TPM_DIGEST_SIZE];
         int rc;
  
         ima_used_chip = 0;
@@ -88,6 +99,10 @@ int __init ima_init(void)
         rc = ima_init_crypto();
         if (rc)
                 return rc;
+       rc = ima_init_template();
+       if (rc != 0)
+               return rc;
+
         ima_add_boot_aggregate();       /* boot aggregate must be first entry */
         ima_init_policy();
  
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c

index e9508d5bbfcff7ee087d069697c6088da7da0210..149ee1119f87ba37c7673efdd7fd4d0e50cae809 100644 (file)
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -24,6 +24,7 @@
  #include <linux/slab.h>
  #include <linux/xattr.h>
  #include <linux/ima.h>
+#include <crypto/hash_info.h>
  
  #include "ima.h"
  
@@ -35,11 +36,33 @@ int ima_appraise = IMA_APPRAISE_ENFORCE;
  int ima_appraise;
  #endif
  
-char *ima_hash = "sha1";
+int ima_hash_algo = HASH_ALGO_SHA1;
+static int hash_setup_done;
+
  static int __init hash_setup(char *str)
  {
-       if (strncmp(str, "md5", 3) == 0)
-               ima_hash = "md5";
+       struct ima_template_desc *template_desc = ima_template_desc_current();
+       int i;
+
+       if (hash_setup_done)
+               return 1;
+
+       if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
+               if (strncmp(str, "sha1", 4) == 0)
+                       ima_hash_algo = HASH_ALGO_SHA1;
+               else if (strncmp(str, "md5", 3) == 0)
+                       ima_hash_algo = HASH_ALGO_MD5;
+               goto out;
+       }
+
+       for (i = 0; i < HASH_ALGO__LAST; i++) {
+               if (strcmp(str, hash_algo_name[i]) == 0) {
+                       ima_hash_algo = i;
+                       break;
+               }
+       }
+out:
+       hash_setup_done = 1;
         return 1;
  }
  __setup("ima_hash=", hash_setup);
@@ -92,10 +115,9 @@ out:
                 pathname = dentry->d_name.name;
  
         if (send_tomtou)
-               ima_add_violation(inode, pathname,
-                                 "invalid_pcr", "ToMToU");
+               ima_add_violation(file, pathname, "invalid_pcr", "ToMToU");
         if (send_writers)
-               ima_add_violation(inode, pathname,
+               ima_add_violation(file, pathname,
                                   "invalid_pcr", "open_writers");
         kfree(pathbuf);
  }
@@ -144,9 +166,12 @@ static int process_measurement(struct file *file, const char *filename,
  {
         struct inode *inode = file_inode(file);
         struct integrity_iint_cache *iint;
+       struct ima_template_desc *template_desc = ima_template_desc_current();
         char *pathbuf = NULL;
         const char *pathname = NULL;
         int rc = -ENOMEM, action, must_appraise, _func;
+       struct evm_ima_xattr_data *xattr_value = NULL, **xattr_ptr = NULL;
+       int xattr_len = 0;
  
         if (!ima_initialized || !S_ISREG(inode->i_mode))
                 return 0;
@@ -185,7 +210,13 @@ static int process_measurement(struct file *file, const char *filename,
                 goto out_digsig;
         }
  
-       rc = ima_collect_measurement(iint, file);
+       if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
+               if (action & IMA_APPRAISE_SUBMASK)
+                       xattr_ptr = &xattr_value;
+       } else
+               xattr_ptr = &xattr_value;
+
+       rc = ima_collect_measurement(iint, file, xattr_ptr, &xattr_len);
         if (rc != 0)
                 goto out_digsig;
  
@@ -194,9 +225,11 @@ static int process_measurement(struct file *file, const char *filename,
                 pathname = (const char *)file->f_dentry->d_name.name;
  
         if (action & IMA_MEASURE)
-               ima_store_measurement(iint, file, pathname);
+               ima_store_measurement(iint, file, pathname,
+                                     xattr_value, xattr_len);
         if (action & IMA_APPRAISE_SUBMASK)
-               rc = ima_appraise_measurement(_func, iint, file, pathname);
+               rc = ima_appraise_measurement(_func, iint, file, pathname,
+                                             xattr_value, xattr_len);
         if (action & IMA_AUDIT)
                 ima_audit_measurement(iint, pathname);
         kfree(pathbuf);
@@ -205,6 +238,7 @@ out_digsig:
                 rc = -EACCES;
  out:
         mutex_unlock(&inode->i_mutex);
+       kfree(xattr_value);
         if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
                 return -EACCES;
         return 0;
@@ -244,9 +278,9 @@ int ima_file_mmap(struct file *file, unsigned long prot)
  int ima_bprm_check(struct linux_binprm *bprm)
  {
         return process_measurement(bprm->file,
-                                (strcmp(bprm->filename, bprm->interp) == 0) ?
-                                bprm->filename : bprm->interp,
-                                MAY_EXEC, BPRM_CHECK);
+                                  (strcmp(bprm->filename, bprm->interp) == 0) ?
+                                  bprm->filename : bprm->interp,
+                                  MAY_EXEC, BPRM_CHECK);
  }
  
  /**
@@ -263,8 +297,8 @@ int ima_file_check(struct file *file, int mask)
  {
         ima_rdwr_violation_check(file);
         return process_measurement(file, NULL,
-                                mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
-                                FILE_CHECK);
+                                  mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+                                  FILE_CHECK);
  }
  EXPORT_SYMBOL_GPL(ima_file_check);
  
@@ -294,6 +328,7 @@ static int __init init_ima(void)
  {
         int error;
  
+       hash_setup(CONFIG_IMA_DEFAULT_HASH);
         error = ima_init();
         if (!error)
                 ima_initialized = 1;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c

index 399433ad614e0d26cc05588014991632a80865b0..a9c3d3cd1990d506a431614ffcf1d63ee53434a3 100644 (file)
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -73,7 +73,6 @@ static struct ima_rule_entry default_rules[] = {
         {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
         {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
         {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
-       {.action = DONT_MEASURE,.fsmagic = RAMFS_MAGIC,.flags = IMA_FSMAGIC},
         {.action = DONT_MEASURE,.fsmagic = DEVPTS_SUPER_MAGIC,.flags = IMA_FSMAGIC},
         {.action = DONT_MEASURE,.fsmagic = BINFMTFS_MAGIC,.flags = IMA_FSMAGIC},
         {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c

index ff63fe00c19554921b172425d5261495a98a6bcf..d85e99761f4fc66afa251bb2bd657062d84d9b77 100644 (file)
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -50,7 +50,7 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value)
         key = ima_hash_key(digest_value);
         rcu_read_lock();
         hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) {
-               rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE);
+               rc = memcmp(qe->entry->digest, digest_value, TPM_DIGEST_SIZE);
                 if (rc == 0) {
                         ret = qe;
                         break;
@@ -104,9 +104,10 @@ static int ima_pcr_extend(const u8 *hash)
   * and extend the pcr.
   */
  int ima_add_template_entry(struct ima_template_entry *entry, int violation,
-                          const char *op, struct inode *inode)
+                          const char *op, struct inode *inode,
+                          const unsigned char *filename)
  {
-       u8 digest[IMA_DIGEST_SIZE];
+       u8 digest[TPM_DIGEST_SIZE];
         const char *audit_cause = "hash_added";
         char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX];
         int audit_info = 1;
@@ -141,8 +142,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
         }
  out:
         mutex_unlock(&ima_extend_list_mutex);
-       integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-                           entry->template.file_name,
+       integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
                             op, audit_cause, result, audit_info);
         return result;
  }
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c

new file mode 100644 (file)

index 0000000..4e5da99
--- /dev/null
+++ b/security/integrity/ima/ima_template.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template.c
+ *      Helpers to manage template descriptors.
+ */
+#include <crypto/hash_info.h>
+
+#include "ima.h"
+#include "ima_template_lib.h"
+
+static struct ima_template_desc defined_templates[] = {
+       {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
+       {.name = "ima-ng",.fmt = "d-ng|n-ng"},
+       {.name = "ima-sig",.fmt = "d-ng|n-ng|sig"},
+};
+
+static struct ima_template_field supported_fields[] = {
+       {.field_id = "d",.field_init = ima_eventdigest_init,
+        .field_show = ima_show_template_digest},
+       {.field_id = "n",.field_init = ima_eventname_init,
+        .field_show = ima_show_template_string},
+       {.field_id = "d-ng",.field_init = ima_eventdigest_ng_init,
+        .field_show = ima_show_template_digest_ng},
+       {.field_id = "n-ng",.field_init = ima_eventname_ng_init,
+        .field_show = ima_show_template_string},
+       {.field_id = "sig",.field_init = ima_eventsig_init,
+        .field_show = ima_show_template_sig},
+};
+
+static struct ima_template_desc *ima_template;
+static struct ima_template_desc *lookup_template_desc(const char *name);
+
+static int __init ima_template_setup(char *str)
+{
+       struct ima_template_desc *template_desc;
+       int template_len = strlen(str);
+
+       /*
+        * Verify that a template with the supplied name exists.
+        * If not, use CONFIG_IMA_DEFAULT_TEMPLATE.
+        */
+       template_desc = lookup_template_desc(str);
+       if (!template_desc)
+               return 1;
+
+       /*
+        * Verify whether the current hash algorithm is supported
+        * by the 'ima' template.
+        */
+       if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 &&
+           ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) {
+               pr_err("IMA: template does not support hash alg\n");
+               return 1;
+       }
+
+       ima_template = template_desc;
+       return 1;
+}
+__setup("ima_template=", ima_template_setup);
+
+static struct ima_template_desc *lookup_template_desc(const char *name)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
+               if (strcmp(defined_templates[i].name, name) == 0)
+                       return defined_templates + i;
+       }
+
+       return NULL;
+}
+
+static struct ima_template_field *lookup_template_field(const char *field_id)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(supported_fields); i++)
+               if (strncmp(supported_fields[i].field_id, field_id,
+                           IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0)
+                       return &supported_fields[i];
+       return NULL;
+}
+
+static int template_fmt_size(char *template_fmt)
+{
+       char c;
+       int template_fmt_len = strlen(template_fmt);
+       int i = 0, j = 0;
+
+       while (i < template_fmt_len) {
+               c = template_fmt[i];
+               if (c == '|')
+                       j++;
+               i++;
+       }
+
+       return j + 1;
+}
+
+static int template_desc_init_fields(char *template_fmt,
+                                    struct ima_template_field ***fields,
+                                    int *num_fields)
+{
+       char *c, *template_fmt_ptr = template_fmt;
+       int template_num_fields = template_fmt_size(template_fmt);
+       int i, result = 0;
+
+       if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX)
+               return -EINVAL;
+
+       *fields = kzalloc(template_num_fields * sizeof(*fields), GFP_KERNEL);
+       if (*fields == NULL) {
+               result = -ENOMEM;
+               goto out;
+       }
+       for (i = 0; (c = strsep(&template_fmt_ptr, "|")) != NULL &&
+            i < template_num_fields; i++) {
+               struct ima_template_field *f = lookup_template_field(c);
+
+               if (!f) {
+                       result = -ENOENT;
+                       goto out;
+               }
+               (*fields)[i] = f;
+       }
+       *num_fields = i;
+       return 0;
+out:
+       kfree(*fields);
+       *fields = NULL;
+       return result;
+}
+
+static int init_defined_templates(void)
+{
+       int i = 0;
+       int result = 0;
+
+       /* Init defined templates. */
+       for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
+               struct ima_template_desc *template = &defined_templates[i];
+
+               result = template_desc_init_fields(template->fmt,
+                                                  &(template->fields),
+                                                  &(template->num_fields));
+               if (result < 0)
+                       return result;
+       }
+       return result;
+}
+
+struct ima_template_desc *ima_template_desc_current(void)
+{
+       if (!ima_template)
+               ima_template =
+                   lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE);
+       return ima_template;
+}
+
+int ima_init_template(void)
+{
+       int result;
+
+       result = init_defined_templates();
+       if (result < 0)
+               return result;
+
+       return 0;
+}
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c

new file mode 100644 (file)

index 0000000..6d66ad6
--- /dev/null
+++ b/security/integrity/ima/ima_template_lib.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template_lib.c
+ *      Library of supported template fields.
+ */
+#include <crypto/hash_info.h>
+
+#include "ima_template_lib.h"
+
+static bool ima_template_hash_algo_allowed(u8 algo)
+{
+       if (algo == HASH_ALGO_SHA1 || algo == HASH_ALGO_MD5)
+               return true;
+
+       return false;
+}
+
+enum data_formats {
+       DATA_FMT_DIGEST = 0,
+       DATA_FMT_DIGEST_WITH_ALGO,
+       DATA_FMT_EVENT_NAME,
+       DATA_FMT_STRING,
+       DATA_FMT_HEX
+};
+
+static int ima_write_template_field_data(const void *data, const u32 datalen,
+                                        enum data_formats datafmt,
+                                        struct ima_field_data *field_data)
+{
+       u8 *buf, *buf_ptr;
+       u32 buflen;
+
+       switch (datafmt) {
+       case DATA_FMT_EVENT_NAME:
+               buflen = IMA_EVENT_NAME_LEN_MAX + 1;
+               break;
+       case DATA_FMT_STRING:
+               buflen = datalen + 1;
+               break;
+       default:
+               buflen = datalen;
+       }
+
+       buf = kzalloc(buflen, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       memcpy(buf, data, datalen);
+
+       /*
+        * Replace all space characters with underscore for event names and
+        * strings. This avoid that, during the parsing of a measurements list,
+        * filenames with spaces or that end with the suffix ' (deleted)' are
+        * split into multiple template fields (the space is the delimitator
+        * character for measurements lists in ASCII format).
+        */
+       if (datafmt == DATA_FMT_EVENT_NAME || datafmt == DATA_FMT_STRING) {
+               for (buf_ptr = buf; buf_ptr - buf < datalen; buf_ptr++)
+                       if (*buf_ptr == ' ')
+                               *buf_ptr = '_';
+       }
+
+       field_data->data = buf;
+       field_data->len = buflen;
+       return 0;
+}
+
+static void ima_show_template_data_ascii(struct seq_file *m,
+                                        enum ima_show_type show,
+                                        enum data_formats datafmt,
+                                        struct ima_field_data *field_data)
+{
+       u8 *buf_ptr = field_data->data, buflen = field_data->len;
+
+       switch (datafmt) {
+       case DATA_FMT_DIGEST_WITH_ALGO:
+               buf_ptr = strnchr(field_data->data, buflen, ':');
+               if (buf_ptr != field_data->data)
+                       seq_printf(m, "%s", field_data->data);
+
+               /* skip ':' and '\0' */
+               buf_ptr += 2;
+               buflen -= buf_ptr - field_data->data;
+       case DATA_FMT_DIGEST:
+       case DATA_FMT_HEX:
+               if (!buflen)
+                       break;
+               ima_print_digest(m, buf_ptr, buflen);
+               break;
+       case DATA_FMT_STRING:
+               seq_printf(m, "%s", buf_ptr);
+               break;
+       default:
+               break;
+       }
+}
+
+static void ima_show_template_data_binary(struct seq_file *m,
+                                         enum ima_show_type show,
+                                         enum data_formats datafmt,
+                                         struct ima_field_data *field_data)
+{
+       ima_putc(m, &field_data->len, sizeof(u32));
+       if (!field_data->len)
+               return;
+       ima_putc(m, field_data->data, field_data->len);
+}
+
+static void ima_show_template_field_data(struct seq_file *m,
+                                        enum ima_show_type show,
+                                        enum data_formats datafmt,
+                                        struct ima_field_data *field_data)
+{
+       switch (show) {
+       case IMA_SHOW_ASCII:
+               ima_show_template_data_ascii(m, show, datafmt, field_data);
+               break;
+       case IMA_SHOW_BINARY:
+               ima_show_template_data_binary(m, show, datafmt, field_data);
+               break;
+       default:
+               break;
+       }
+}
+
+void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_DIGEST, field_data);
+}
+
+void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
+                                struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_ALGO,
+                                    field_data);
+}
+
+void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_STRING, field_data);
+}
+
+void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
+                          struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
+}
+
+static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo,
+                                      struct ima_field_data *field_data,
+                                      bool size_limit)
+{
+       /*
+        * digest formats:
+        *  - DATA_FMT_DIGEST: digest
+        *  - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest,
+        *    where <hash algo> is provided if the hash algoritm is not
+        *    SHA1 or MD5
+        */
+       u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
+       enum data_formats fmt = DATA_FMT_DIGEST;
+       u32 offset = 0;
+
+       if (!size_limit) {
+               fmt = DATA_FMT_DIGEST_WITH_ALGO;
+               if (hash_algo < HASH_ALGO__LAST)
+                       offset += snprintf(buffer, CRYPTO_MAX_ALG_NAME + 1,
+                                          "%s", hash_algo_name[hash_algo]);
+               buffer[offset] = ':';
+               offset += 2;
+       }
+
+       if (digest)
+               memcpy(buffer + offset, digest, digestsize);
+       else
+               /*
+                * If digest is NULL, the event being recorded is a violation.
+                * Make room for the digest by increasing the offset of
+                * IMA_DIGEST_SIZE.
+                */
+               offset += IMA_DIGEST_SIZE;
+
+       return ima_write_template_field_data(buffer, offset + digestsize,
+                                            fmt, field_data);
+}
+
+/*
+ * This function writes the digest of an event (with size limit).
+ */
+int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file,
+                        const unsigned char *filename,
+                        struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                        struct ima_field_data *field_data)
+{
+       struct {
+               struct ima_digest_data hdr;
+               char digest[IMA_MAX_DIGEST_SIZE];
+       } hash;
+       u8 *cur_digest = NULL;
+       u32 cur_digestsize = 0;
+       struct inode *inode;
+       int result;
+
+       memset(&hash, 0, sizeof(hash));
+
+       if (!iint)              /* recording a violation. */
+               goto out;
+
+       if (ima_template_hash_algo_allowed(iint->ima_hash->algo)) {
+               cur_digest = iint->ima_hash->digest;
+               cur_digestsize = iint->ima_hash->length;
+               goto out;
+       }
+
+       if (!file)              /* missing info to re-calculate the digest */
+               return -EINVAL;
+
+       inode = file_inode(file);
+       hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ?
+           ima_hash_algo : HASH_ALGO_SHA1;
+       result = ima_calc_file_hash(file, &hash.hdr);
+       if (result) {
+               integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode,
+                                   filename, "collect_data",
+                                   "failed", result, 0);
+               return result;
+       }
+       cur_digest = hash.hdr.digest;
+       cur_digestsize = hash.hdr.length;
+out:
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize, -1,
+                                          field_data, true);
+}
+
+/*
+ * This function writes the digest of an event (without size limit).
+ */
+int ima_eventdigest_ng_init(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_field_data *field_data)
+{
+       u8 *cur_digest = NULL, hash_algo = HASH_ALGO__LAST;
+       u32 cur_digestsize = 0;
+
+       /* If iint is NULL, we are recording a violation. */
+       if (!iint)
+               goto out;
+
+       cur_digest = iint->ima_hash->digest;
+       cur_digestsize = iint->ima_hash->length;
+
+       hash_algo = iint->ima_hash->algo;
+out:
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize,
+                                          hash_algo, field_data, false);
+}
+
+static int ima_eventname_init_common(struct integrity_iint_cache *iint,
+                                    struct file *file,
+                                    const unsigned char *filename,
+                                    struct ima_field_data *field_data,
+                                    bool size_limit)
+{
+       const char *cur_filename = NULL;
+       u32 cur_filename_len = 0;
+       enum data_formats fmt = size_limit ?
+           DATA_FMT_EVENT_NAME : DATA_FMT_STRING;
+
+       BUG_ON(filename == NULL && file == NULL);
+
+       if (filename) {
+               cur_filename = filename;
+               cur_filename_len = strlen(filename);
+
+               if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX)
+                       goto out;
+       }
+
+       if (file) {
+               cur_filename = file->f_dentry->d_name.name;
+               cur_filename_len = strlen(cur_filename);
+       } else
+               /*
+                * Truncate filename if the latter is too long and
+                * the file descriptor is not available.
+                */
+               cur_filename_len = IMA_EVENT_NAME_LEN_MAX;
+out:
+       return ima_write_template_field_data(cur_filename, cur_filename_len,
+                                            fmt, field_data);
+}
+
+/*
+ * This function writes the name of an event (with size limit).
+ */
+int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file,
+                      const unsigned char *filename,
+                      struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_field_data *field_data)
+{
+       return ima_eventname_init_common(iint, file, filename,
+                                        field_data, true);
+}
+
+/*
+ * This function writes the name of an event (without size limit).
+ */
+int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file,
+                         const unsigned char *filename,
+                         struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                         struct ima_field_data *field_data)
+{
+       return ima_eventname_init_common(iint, file, filename,
+                                        field_data, false);
+}
+
+/*
+ *  ima_eventsig_init - include the file signature as part of the template data
+ */
+int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file,
+                     const unsigned char *filename,
+                     struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                     struct ima_field_data *field_data)
+{
+       enum data_formats fmt = DATA_FMT_HEX;
+       int rc = 0;
+
+       if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
+               goto out;
+
+       rc = ima_write_template_field_data(xattr_value, xattr_len, fmt,
+                                          field_data);
+out:
+       return rc;
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h

new file mode 100644 (file)

index 0000000..63f6b52
--- /dev/null
+++ b/security/integrity/ima/ima_template_lib.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template_lib.h
+ *      Header for the library of supported template fields.
+ */
+#ifndef __LINUX_IMA_TEMPLATE_LIB_H
+#define __LINUX_IMA_TEMPLATE_LIB_H
+
+#include <linux/seq_file.h>
+#include "ima.h"
+
+void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data);
+void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
+                                struct ima_field_data *field_data);
+void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data);
+void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
+                          struct ima_field_data *field_data);
+int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file,
+                        const unsigned char *filename,
+                        struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                        struct ima_field_data *field_data);
+int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file,
+                      const unsigned char *filename,
+                      struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_field_data *field_data);
+int ima_eventdigest_ng_init(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_field_data *field_data);
+int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file,
+                         const unsigned char *filename,
+                         struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                         struct ima_field_data *field_data);
+int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file,
+                     const unsigned char *filename,
+                     struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                     struct ima_field_data *field_data);
+#endif /* __LINUX_IMA_TEMPLATE_LIB_H */
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h

index c42fb7a70dee78dfdf1ebbc5f1d3a43fb6c6c233..b9e7c133734a2dc5796fe98f5c15f8c81ebc5d26 100644 (file)
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -54,25 +54,57 @@ enum evm_ima_xattr_type {
         IMA_XATTR_DIGEST = 0x01,
         EVM_XATTR_HMAC,
         EVM_IMA_XATTR_DIGSIG,
+       IMA_XATTR_DIGEST_NG,
  };
  
  struct evm_ima_xattr_data {
         u8 type;
         u8 digest[SHA1_DIGEST_SIZE];
-}  __attribute__((packed));
+} __packed;
+
+#define IMA_MAX_DIGEST_SIZE    64
+
+struct ima_digest_data {
+       u8 algo;
+       u8 length;
+       union {
+               struct {
+                       u8 unused;
+                       u8 type;
+               } sha1;
+               struct {
+                       u8 type;
+                       u8 algo;
+               } ng;
+               u8 data[2];
+       } xattr;
+       u8 digest[0];
+} __packed;
+
+/*
+ * signature format v2 - for using with asymmetric keys
+ */
+struct signature_v2_hdr {
+       uint8_t type;           /* xattr type */
+       uint8_t version;        /* signature format version */
+       uint8_t hash_algo;      /* Digest algorithm [enum pkey_hash_algo] */
+       uint32_t keyid;         /* IMA key identifier - not X509/PGP specific */
+       uint16_t sig_size;      /* signature size */
+       uint8_t sig[0];         /* signature payload */
+} __packed;
  
  /* integrity data associated with an inode */
  struct integrity_iint_cache {
-       struct rb_node rb_node; /* rooted in integrity_iint_tree */
+       struct rb_node rb_node; /* rooted in integrity_iint_tree */
         struct inode *inode;    /* back pointer to inode in question */
         u64 version;            /* track inode changes */
         unsigned long flags;
-       struct evm_ima_xattr_data ima_xattr;
         enum integrity_status ima_file_status:4;
         enum integrity_status ima_mmap_status:4;
         enum integrity_status ima_bprm_status:4;
         enum integrity_status ima_module_status:4;
         enum integrity_status evm_status:4;
+       struct ima_digest_data *ima_hash;
  };
  
  /* rbtree tree calls to lookup, insert, delete
@@ -89,7 +121,7 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
  #ifdef CONFIG_INTEGRITY_SIGNATURE
  
  int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
-                                       const char *digest, int digestlen);
+                           const char *digest, int digestlen);
  
  #else
  
@@ -105,12 +137,19 @@ static inline int integrity_digsig_verify(const unsigned int id,
  #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
  int asymmetric_verify(struct key *keyring, const char *sig,
                       int siglen, const char *data, int datalen);
+
+int integrity_init_keyring(const unsigned int id);
  #else
  static inline int asymmetric_verify(struct key *keyring, const char *sig,
                                     int siglen, const char *data, int datalen)
  {
         return -EOPNOTSUPP;
  }
+
+static int integrity_init_keyring(const unsigned int id)
+{
+       return 0;
+}
  #endif
  
  #ifdef CONFIG_INTEGRITY_AUDIT
diff --git a/security/keys/Kconfig b/security/keys/Kconfig

index a90d6d300dbd3b0b5849cae74af644e576e6dc45..a4f3f8c48d6e3f0aa8c5d7d21f900f425d517c71 100644 (file)
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -4,6 +4,7 @@
  
  config KEYS
         bool "Enable access key retention support"
+       select ASSOCIATIVE_ARRAY
         help
           This option provides support for retaining authentication tokens and
           access keys in the kernel.
@@ -19,6 +20,34 @@ config KEYS
  
           If you are unsure as to whether this is required, answer N.
  
+config PERSISTENT_KEYRINGS
+       bool "Enable register of persistent per-UID keyrings"
+       depends on KEYS
+       help
+         This option provides a register of persistent per-UID keyrings,
+         primarily aimed at Kerberos key storage.  The keyrings are persistent
+         in the sense that they stay around after all processes of that UID
+         have exited, not that they survive the machine being rebooted.
+
+         A particular keyring may be accessed by either the user whose keyring
+         it is or by a process with administrative privileges.  The active
+         LSMs gets to rule on which admin-level processes get to access the
+         cache.
+
+         Keyrings are created and added into the register upon demand and get
+         removed if they expire (a default timeout is set upon creation).
+
+config BIG_KEYS
+       bool "Large payload keys"
+       depends on KEYS
+       depends on TMPFS
+       help
+         This option provides support for holding large keys within the kernel
+         (for example Kerberos ticket caches).  The data may be stored out to
+         swapspace by tmpfs.
+
+         If you are unsure as to whether this is required, answer N.
+
  config TRUSTED_KEYS
         tristate "TRUSTED KEYS"
         depends on KEYS && TCG_TPM
diff --git a/security/keys/Makefile b/security/keys/Makefile

index 504aaa008388c1595716e40f0e62e5fcd44fba71..dfb3a7bededf548ac1eed24b094de858e7a07df6 100644 (file)
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -18,9 +18,11 @@ obj-y := \
  obj-$(CONFIG_KEYS_COMPAT) += compat.o
  obj-$(CONFIG_PROC_FS) += proc.o
  obj-$(CONFIG_SYSCTL) += sysctl.o
+obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
  
  #
  # Key types
  #
+obj-$(CONFIG_BIG_KEYS) += big_key.o
  obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
  obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
diff --git a/security/keys/big_key.c b/security/keys/big_key.c

new file mode 100644 (file)

index 0000000..7f44c32
--- /dev/null
+++ b/security/keys/big_key.c
@@ -0,0 +1,207 @@
+/* Large capacity key type
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/file.h>
+#include <linux/shmem_fs.h>
+#include <linux/err.h>
+#include <keys/user-type.h>
+#include <keys/big_key-type.h>
+
+MODULE_LICENSE("GPL");
+
+/*
+ * If the data is under this limit, there's no point creating a shm file to
+ * hold it as the permanently resident metadata for the shmem fs will be at
+ * least as large as the data.
+ */
+#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
+
+/*
+ * big_key defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_big_key = {
+       .name                   = "big_key",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
+       .instantiate            = big_key_instantiate,
+       .match                  = user_match,
+       .revoke                 = big_key_revoke,
+       .destroy                = big_key_destroy,
+       .describe               = big_key_describe,
+       .read                   = big_key_read,
+};
+
+/*
+ * Instantiate a big key
+ */
+int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+{
+       struct path *path = (struct path *)&key->payload.data2;
+       struct file *file;
+       ssize_t written;
+       size_t datalen = prep->datalen;
+       int ret;
+
+       ret = -EINVAL;
+       if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
+               goto error;
+
+       /* Set an arbitrary quota */
+       ret = key_payload_reserve(key, 16);
+       if (ret < 0)
+               goto error;
+
+       key->type_data.x[1] = datalen;
+
+       if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               /* Create a shmem file to store the data in.  This will permit the data
+                * to be swapped out if needed.
+                *
+                * TODO: Encrypt the stored data with a temporary key.
+                */
+               file = shmem_file_setup("", datalen, 0);
+               if (IS_ERR(file)) {
+                       ret = PTR_ERR(file);
+                       goto err_quota;
+               }
+
+               written = kernel_write(file, prep->data, prep->datalen, 0);
+               if (written != datalen) {
+                       ret = written;
+                       if (written >= 0)
+                               ret = -ENOMEM;
+                       goto err_fput;
+               }
+
+               /* Pin the mount and dentry to the key so that we can open it again
+                * later
+                */
+               *path = file->f_path;
+               path_get(path);
+               fput(file);
+       } else {
+               /* Just store the data in a buffer */
+               void *data = kmalloc(datalen, GFP_KERNEL);
+               if (!data) {
+                       ret = -ENOMEM;
+                       goto err_quota;
+               }
+
+               key->payload.data = memcpy(data, prep->data, prep->datalen);
+       }
+       return 0;
+
+err_fput:
+       fput(file);
+err_quota:
+       key_payload_reserve(key, 0);
+error:
+       return ret;
+}
+
+/*
+ * dispose of the links from a revoked keyring
+ * - called with the key sem write-locked
+ */
+void big_key_revoke(struct key *key)
+{
+       struct path *path = (struct path *)&key->payload.data2;
+
+       /* clear the quota */
+       key_payload_reserve(key, 0);
+       if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
+               vfs_truncate(path, 0);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a big_key key
+ */
+void big_key_destroy(struct key *key)
+{
+       if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
+               struct path *path = (struct path *)&key->payload.data2;
+               path_put(path);
+               path->mnt = NULL;
+               path->dentry = NULL;
+       } else {
+               kfree(key->payload.data);
+               key->payload.data = NULL;
+       }
+}
+
+/*
+ * describe the big_key key
+ */
+void big_key_describe(const struct key *key, struct seq_file *m)
+{
+       unsigned long datalen = key->type_data.x[1];
+
+       seq_puts(m, key->description);
+
+       if (key_is_instantiated(key))
+               seq_printf(m, ": %lu [%s]",
+                          datalen,
+                          datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
+}
+
+/*
+ * read the key data
+ * - the key's semaphore is read-locked
+ */
+long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
+{
+       unsigned long datalen = key->type_data.x[1];
+       long ret;
+
+       if (!buffer || buflen < datalen)
+               return datalen;
+
+       if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               struct path *path = (struct path *)&key->payload.data2;
+               struct file *file;
+               loff_t pos;
+
+               file = dentry_open(path, O_RDONLY, current_cred());
+               if (IS_ERR(file))
+                       return PTR_ERR(file);
+
+               pos = 0;
+               ret = vfs_read(file, buffer, datalen, &pos);
+               fput(file);
+               if (ret >= 0 && ret != datalen)
+                       ret = -EIO;
+       } else {
+               ret = datalen;
+               if (copy_to_user(buffer, key->payload.data, datalen) != 0)
+                       ret = -EFAULT;
+       }
+
+       return ret;
+}
+
+/*
+ * Module stuff
+ */
+static int __init big_key_init(void)
+{
+       return register_key_type(&key_type_big_key);
+}
+
+static void __exit big_key_cleanup(void)
+{
+       unregister_key_type(&key_type_big_key);
+}
+
+module_init(big_key_init);
+module_exit(big_key_cleanup);
diff --git a/security/keys/compat.c b/security/keys/compat.c

index d65fa7fa29ba1a53b1ef4fb6d76c7aeafb7da65a..bbd32c729dbb4e019d1461116b84c25107e35ab8 100644 (file)
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
         case KEYCTL_INVALIDATE:
                 return keyctl_invalidate_key(arg2);
  
+       case KEYCTL_GET_PERSISTENT:
+               return keyctl_get_persistent(arg2, arg3);
+
         default:
                 return -EOPNOTSUPP;
         }
diff --git a/security/keys/gc.c b/security/keys/gc.c

index d67c97bb10256d5dc5a9b74b3b8aaa37022f96b1..d3222b6d7d5979460ff63940066433e414b22ec4 100644 (file)
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -130,50 +130,6 @@ void key_gc_keytype(struct key_type *ktype)
         kleave("");
  }
  
-/*
- * Garbage collect pointers from a keyring.
- *
- * Not called with any locks held.  The keyring's key struct will not be
- * deallocated under us as only our caller may deallocate it.
- */
-static void key_gc_keyring(struct key *keyring, time_t limit)
-{
-       struct keyring_list *klist;
-       int loop;
-
-       kenter("%x", key_serial(keyring));
-
-       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                             (1 << KEY_FLAG_REVOKED)))
-               goto dont_gc;
-
-       /* scan the keyring looking for dead keys */
-       rcu_read_lock();
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (!klist)
-               goto unlock_dont_gc;
-
-       loop = klist->nkeys;
-       smp_rmb();
-       for (loop--; loop >= 0; loop--) {
-               struct key *key = rcu_dereference(klist->keys[loop]);
-               if (key_is_dead(key, limit))
-                       goto do_gc;
-       }
-
-unlock_dont_gc:
-       rcu_read_unlock();
-dont_gc:
-       kleave(" [no gc]");
-       return;
-
-do_gc:
-       rcu_read_unlock();
-
-       keyring_gc(keyring, limit);
-       kleave(" [gc]");
-}
-
  /*
   * Garbage collect a list of unreferenced, detached keys
   */
@@ -392,8 +348,7 @@ found_unreferenced_key:
          */
  found_keyring:
         spin_unlock(&key_serial_lock);
-       kdebug("scan keyring %d", key->serial);
-       key_gc_keyring(key, limit);
+       keyring_gc(key, limit);
         goto maybe_resched;
  
         /* We found a dead key that is still referenced.  Reset its type and
diff --git a/security/keys/internal.h b/security/keys/internal.h

index d4f1468b9b50f46cd7d739544902a77a3ff40384..80b2aac4f50ceda614d03c815f7638aa88a0c933 100644 (file)
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -89,42 +89,53 @@ extern struct key_type *key_type_lookup(const char *type);
  extern void key_type_put(struct key_type *ktype);
  
  extern int __key_link_begin(struct key *keyring,
-                           const struct key_type *type,
-                           const char *description,
-                           unsigned long *_prealloc);
+                           const struct keyring_index_key *index_key,
+                           struct assoc_array_edit **_edit);
  extern int __key_link_check_live_key(struct key *keyring, struct key *key);
-extern void __key_link(struct key *keyring, struct key *key,
-                      unsigned long *_prealloc);
+extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
  extern void __key_link_end(struct key *keyring,
-                          struct key_type *type,
-                          unsigned long prealloc);
+                          const struct keyring_index_key *index_key,
+                          struct assoc_array_edit *edit);
  
-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-                                     const struct key_type *type,
-                                     const char *description,
-                                     key_perm_t perm);
+extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
+                                   const struct keyring_index_key *index_key);
  
  extern struct key *keyring_search_instkey(struct key *keyring,
                                           key_serial_t target_id);
  
+extern int iterate_over_keyring(const struct key *keyring,
+                               int (*func)(const struct key *key, void *data),
+                               void *data);
+
  typedef int (*key_match_func_t)(const struct key *, const void *);
  
+struct keyring_search_context {
+       struct keyring_index_key index_key;
+       const struct cred       *cred;
+       key_match_func_t        match;
+       const void              *match_data;
+       unsigned                flags;
+#define KEYRING_SEARCH_LOOKUP_TYPE     0x0001  /* [as type->def_lookup_type] */
+#define KEYRING_SEARCH_NO_STATE_CHECK  0x0002  /* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK  0x0004  /* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME  0x0008  /* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM   0x0010  /* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020  /* Give an error on excessive depth */
+
+       int (*iterator)(const void *object, void *iterator_data);
+
+       /* Internal stuff */
+       int                     skipped_ret;
+       bool                    possessed;
+       key_ref_t               result;
+       struct timespec         now;
+};
+
  extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-                                   const struct cred *cred,
-                                   struct key_type *type,
-                                   const void *description,
-                                   key_match_func_t match,
-                                   bool no_state_check);
-
-extern key_ref_t search_my_process_keyrings(struct key_type *type,
-                                           const void *description,
-                                           key_match_func_t match,
-                                           bool no_state_check,
-                                           const struct cred *cred);
-extern key_ref_t search_process_keyrings(struct key_type *type,
-                                        const void *description,
-                                        key_match_func_t match,
-                                        const struct cred *cred);
+                                   struct keyring_search_context *ctx);
+
+extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
+extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
  
  extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
  
@@ -202,7 +213,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
  /*
   * Determine whether a key is dead.
   */
-static inline bool key_is_dead(struct key *key, time_t limit)
+static inline bool key_is_dead(const struct key *key, time_t limit)
  {
         return
                 key->flags & ((1 << KEY_FLAG_DEAD) |
@@ -244,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
  extern long keyctl_instantiate_key_common(key_serial_t,
                                           const struct iovec *,
                                           unsigned, size_t, key_serial_t);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+extern long keyctl_get_persistent(uid_t, key_serial_t);
+extern unsigned persistent_keyring_expiry;
+#else
+static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
+{
+       return -EOPNOTSUPP;
+}
+#endif
  
  /*
   * Debugging key validation
diff --git a/security/keys/key.c b/security/keys/key.c

index 8fb7c7bd465769cb5dca49e6d6f1ad011c75de63..55d110f0acedc96d17bcc5f5903aaa743ed6cb32 100644 (file)
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                 }
         }
  
-       desclen = strlen(desc) + 1;
-       quotalen = desclen + type->def_datalen;
+       desclen = strlen(desc);
+       quotalen = desclen + 1 + type->def_datalen;
  
         /* get hold of the key tracking for this user */
         user = key_user_lookup(uid);
@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                 goto no_memory_2;
  
         if (desc) {
-               key->description = kmemdup(desc, desclen, GFP_KERNEL);
+               key->index_key.desc_len = desclen;
+               key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
                 if (!key->description)
                         goto no_memory_3;
         }
@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
         atomic_set(&key->usage, 1);
         init_rwsem(&key->sem);
         lockdep_set_class(&key->sem, &type->lock_class);
-       key->type = type;
+       key->index_key.type = type;
         key->user = user;
         key->quotalen = quotalen;
         key->datalen = type->def_datalen;
@@ -299,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
  
         if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
                 key->flags |= 1 << KEY_FLAG_IN_QUOTA;
+       if (flags & KEY_ALLOC_TRUSTED)
+               key->flags |= 1 << KEY_FLAG_TRUSTED;
  
         memset(&key->type_data, 0, sizeof(key->type_data));
  
@@ -408,7 +411,7 @@ static int __key_instantiate_and_link(struct key *key,
                                       struct key_preparsed_payload *prep,
                                       struct key *keyring,
                                       struct key *authkey,
-                                     unsigned long *_prealloc)
+                                     struct assoc_array_edit **_edit)
  {
         int ret, awaken;
  
@@ -435,7 +438,7 @@ static int __key_instantiate_and_link(struct key *key,
  
                         /* and link it into the destination keyring */
                         if (keyring)
-                               __key_link(keyring, key, _prealloc);
+                               __key_link(key, _edit);
  
                         /* disable the authorisation key */
                         if (authkey)
@@ -475,7 +478,7 @@ int key_instantiate_and_link(struct key *key,
                              struct key *authkey)
  {
         struct key_preparsed_payload prep;
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
         int ret;
  
         memset(&prep, 0, sizeof(prep));
@@ -489,17 +492,15 @@ int key_instantiate_and_link(struct key *key,
         }
  
         if (keyring) {
-               ret = __key_link_begin(keyring, key->type, key->description,
-                                      &prealloc);
+               ret = __key_link_begin(keyring, &key->index_key, &edit);
                 if (ret < 0)
                         goto error_free_preparse;
         }
  
-       ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
-                                        &prealloc);
+       ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
  
         if (keyring)
-               __key_link_end(keyring, key->type, prealloc);
+               __key_link_end(keyring, &key->index_key, edit);
  
  error_free_preparse:
         if (key->type->preparse)
@@ -537,7 +538,7 @@ int key_reject_and_link(struct key *key,
                         struct key *keyring,
                         struct key *authkey)
  {
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
         struct timespec now;
         int ret, awaken, link_ret = 0;
  
@@ -548,8 +549,7 @@ int key_reject_and_link(struct key *key,
         ret = -EBUSY;
  
         if (keyring)
-               link_ret = __key_link_begin(keyring, key->type,
-                                           key->description, &prealloc);
+               link_ret = __key_link_begin(keyring, &key->index_key, &edit);
  
         mutex_lock(&key_construction_mutex);
  
@@ -557,9 +557,10 @@ int key_reject_and_link(struct key *key,
         if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
                 /* mark the key as being negatively instantiated */
                 atomic_inc(&key->user->nikeys);
+               key->type_data.reject_error = -error;
+               smp_wmb();
                 set_bit(KEY_FLAG_NEGATIVE, &key->flags);
                 set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
-               key->type_data.reject_error = -error;
                 now = current_kernel_time();
                 key->expiry = now.tv_sec + timeout;
                 key_schedule_gc(key->expiry + key_gc_delay);
@@ -571,7 +572,7 @@ int key_reject_and_link(struct key *key,
  
                 /* and link it into the destination keyring */
                 if (keyring && link_ret == 0)
-                       __key_link(keyring, key, &prealloc);
+                       __key_link(key, &edit);
  
                 /* disable the authorisation key */
                 if (authkey)
@@ -581,7 +582,7 @@ int key_reject_and_link(struct key *key,
         mutex_unlock(&key_construction_mutex);
  
         if (keyring)
-               __key_link_end(keyring, key->type, prealloc);
+               __key_link_end(keyring, &key->index_key, edit);
  
         /* wake up anyone waiting for a key to be constructed */
         if (awaken)
@@ -645,7 +646,7 @@ found:
         /* this races with key_put(), but that doesn't matter since key_put()
          * doesn't actually change the key
          */
-       atomic_inc(&key->usage);
+       __key_get(key);
  
  error:
         spin_unlock(&key_serial_lock);
@@ -780,25 +781,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
                                key_perm_t perm,
                                unsigned long flags)
  {
-       unsigned long prealloc;
+       struct keyring_index_key index_key = {
+               .description    = description,
+       };
         struct key_preparsed_payload prep;
+       struct assoc_array_edit *edit;
         const struct cred *cred = current_cred();
-       struct key_type *ktype;
         struct key *keyring, *key = NULL;
         key_ref_t key_ref;
         int ret;
  
         /* look up the key type to see if it's one of the registered kernel
          * types */
-       ktype = key_type_lookup(type);
-       if (IS_ERR(ktype)) {
+       index_key.type = key_type_lookup(type);
+       if (IS_ERR(index_key.type)) {
                 key_ref = ERR_PTR(-ENODEV);
                 goto error;
         }
  
         key_ref = ERR_PTR(-EINVAL);
-       if (!ktype->match || !ktype->instantiate ||
-           (!description && !ktype->preparse))
+       if (!index_key.type->match || !index_key.type->instantiate ||
+           (!index_key.description && !index_key.type->preparse))
                 goto error_put_type;
  
         keyring = key_ref_to_ptr(keyring_ref);
@@ -812,21 +815,28 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
         memset(&prep, 0, sizeof(prep));
         prep.data = payload;
         prep.datalen = plen;
-       prep.quotalen = ktype->def_datalen;
-       if (ktype->preparse) {
-               ret = ktype->preparse(&prep);
+       prep.quotalen = index_key.type->def_datalen;
+       prep.trusted = flags & KEY_ALLOC_TRUSTED;
+       if (index_key.type->preparse) {
+               ret = index_key.type->preparse(&prep);
                 if (ret < 0) {
                         key_ref = ERR_PTR(ret);
                         goto error_put_type;
                 }
-               if (!description)
-                       description = prep.description;
+               if (!index_key.description)
+                       index_key.description = prep.description;
                 key_ref = ERR_PTR(-EINVAL);
-               if (!description)
+               if (!index_key.description)
                         goto error_free_prep;
         }
+       index_key.desc_len = strlen(index_key.description);
+
+       key_ref = ERR_PTR(-EPERM);
+       if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags))
+               goto error_free_prep;
+       flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0;
  
-       ret = __key_link_begin(keyring, ktype, description, &prealloc);
+       ret = __key_link_begin(keyring, &index_key, &edit);
         if (ret < 0) {
                 key_ref = ERR_PTR(ret);
                 goto error_free_prep;
@@ -844,10 +854,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
          * key of the same type and description in the destination keyring and
          * update that instead if possible
          */
-       if (ktype->update) {
-               key_ref = __keyring_search_one(keyring_ref, ktype, description,
-                                              0);
-               if (!IS_ERR(key_ref))
+       if (index_key.type->update) {
+               key_ref = find_key_to_update(keyring_ref, &index_key);
+               if (key_ref)
                         goto found_matching_key;
         }
  
@@ -856,23 +865,24 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
                 perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
                 perm |= KEY_USR_VIEW;
  
-               if (ktype->read)
+               if (index_key.type->read)
                         perm |= KEY_POS_READ;
  
-               if (ktype == &key_type_keyring || ktype->update)
+               if (index_key.type == &key_type_keyring ||
+                   index_key.type->update)
                         perm |= KEY_POS_WRITE;
         }
  
         /* allocate a new key */
-       key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
-                       perm, flags);
+       key = key_alloc(index_key.type, index_key.description,
+                       cred->fsuid, cred->fsgid, cred, perm, flags);
         if (IS_ERR(key)) {
                 key_ref = ERR_CAST(key);
                 goto error_link_end;
         }
  
         /* instantiate it and link it into the target keyring */
-       ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
+       ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
         if (ret < 0) {
                 key_put(key);
                 key_ref = ERR_PTR(ret);
@@ -882,12 +892,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
         key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
  
  error_link_end:
-       __key_link_end(keyring, ktype, prealloc);
+       __key_link_end(keyring, &index_key, edit);
  error_free_prep:
-       if (ktype->preparse)
-               ktype->free_preparse(&prep);
+       if (index_key.type->preparse)
+               index_key.type->free_preparse(&prep);
  error_put_type:
-       key_type_put(ktype);
+       key_type_put(index_key.type);
  error:
         return key_ref;
  
@@ -895,7 +905,7 @@ error:
         /* we found a matching key, so we're going to try to update it
          * - we can drop the locks first as we have the key pinned
          */
-       __key_link_end(keyring, ktype, prealloc);
+       __key_link_end(keyring, &index_key, edit);
  
         key_ref = __key_update(key_ref, &prep);
         goto error_free_prep;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c

index 33cfd27b4de29650ae6ad0e1eb45646714a00f27..cee72ce642221e816968cb81069407fe01edb138 100644 (file)
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
         case KEYCTL_INVALIDATE:
                 return keyctl_invalidate_key((key_serial_t) arg2);
  
+       case KEYCTL_GET_PERSISTENT:
+               return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
+
         default:
                 return -EOPNOTSUPP;
         }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c

index 6ece7f2e5707f45c2736ca4a05504c2dd391ea00..69f0cb7bab7e873f8d8997d71db7c42430f72ce6 100644 (file)
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1,6 +1,6 @@
  /* Keyring handling
   *
- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
   * Written by David Howells (dhowells@redhat.com)
   *
   * This program is free software; you can redistribute it and/or
@@ -17,25 +17,11 @@
  #include <linux/seq_file.h>
  #include <linux/err.h>
  #include <keys/keyring-type.h>
+#include <keys/user-type.h>
+#include <linux/assoc_array_priv.h>
  #include <linux/uaccess.h>
  #include "internal.h"
  
-#define rcu_dereference_locked_keyring(keyring)                                \
-       (rcu_dereference_protected(                                     \
-               (keyring)->payload.subscriptions,                       \
-               rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define rcu_deref_link_locked(klist, index, keyring)                   \
-       (rcu_dereference_protected(                                     \
-               (klist)->keys[index],                                   \
-               rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define MAX_KEYRING_LINKS                                              \
-       min_t(size_t, USHRT_MAX - 1,                                    \
-             ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
-
-#define KEY_LINK_FIXQUOTA 1UL
-
  /*
   * When plumbing the depths of the key tree, this sets a hard limit
   * set on how deep we're willing to go.
@@ -47,6 +33,28 @@
   */
  #define KEYRING_NAME_HASH_SIZE (1 << 5)
  
+/*
+ * We mark pointers we pass to the associative array with bit 1 set if
+ * they're keyrings and clear otherwise.
+ */
+#define KEYRING_PTR_SUBTYPE    0x2UL
+
+static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & KEYRING_PTR_SUBTYPE;
+}
+static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
+{
+       void *object = assoc_array_ptr_to_leaf(x);
+       return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
+}
+static inline void *keyring_key_to_ptr(struct key *key)
+{
+       if (key->type == &key_type_keyring)
+               return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
+       return key;
+}
+
  static struct list_head        keyring_name_hash[KEYRING_NAME_HASH_SIZE];
  static DEFINE_RWLOCK(keyring_name_lock);
  
@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
   */
  static int keyring_instantiate(struct key *keyring,
                                struct key_preparsed_payload *prep);
-static int keyring_match(const struct key *keyring, const void *criterion);
  static void keyring_revoke(struct key *keyring);
  static void keyring_destroy(struct key *keyring);
  static void keyring_describe(const struct key *keyring, struct seq_file *m);
@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
  
  struct key_type key_type_keyring = {
         .name           = "keyring",
-       .def_datalen    = sizeof(struct keyring_list),
+       .def_datalen    = 0,
         .instantiate    = keyring_instantiate,
-       .match          = keyring_match,
+       .match          = user_match,
         .revoke         = keyring_revoke,
         .destroy        = keyring_destroy,
         .describe       = keyring_describe,
@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
  
         ret = -EINVAL;
         if (prep->datalen == 0) {
+               assoc_array_init(&keyring->keys);
                 /* make the keyring available by name if it has one */
                 keyring_publish_name(keyring);
                 ret = 0;
@@ -136,14 +144,225 @@ static int keyring_instantiate(struct key *keyring,
  }
  
  /*
- * Match keyrings on their name
+ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit.  Ideally we'd
+ * fold the carry back too, but that requires inline asm.
+ */
+static u64 mult_64x32_and_fold(u64 x, u32 y)
+{
+       u64 hi = (u64)(u32)(x >> 32) * y;
+       u64 lo = (u64)(u32)(x) * y;
+       return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
+}
+
+/*
+ * Hash a key type and description.
+ */
+static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
+{
+       const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
+       const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
+       const char *description = index_key->description;
+       unsigned long hash, type;
+       u32 piece;
+       u64 acc;
+       int n, desc_len = index_key->desc_len;
+
+       type = (unsigned long)index_key->type;
+
+       acc = mult_64x32_and_fold(type, desc_len + 13);
+       acc = mult_64x32_and_fold(acc, 9207);
+       for (;;) {
+               n = desc_len;
+               if (n <= 0)
+                       break;
+               if (n > 4)
+                       n = 4;
+               piece = 0;
+               memcpy(&piece, description, n);
+               description += n;
+               desc_len -= n;
+               acc = mult_64x32_and_fold(acc, piece);
+               acc = mult_64x32_and_fold(acc, 9207);
+       }
+
+       /* Fold the hash down to 32 bits if need be. */
+       hash = acc;
+       if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
+               hash ^= acc >> 32;
+
+       /* Squidge all the keyrings into a separate part of the tree to
+        * ordinary keys by making sure the lowest level segment in the hash is
+        * zero for keyrings and non-zero otherwise.
+        */
+       if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
+               return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
+       if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
+               return (hash + (hash << level_shift)) & ~level_mask;
+       return hash;
+}
+
+/*
+ * Build the next index key chunk.
+ *
+ * On 32-bit systems the index key is laid out as:
+ *
+ *     0       4       5       9...
+ *     hash    desclen typeptr desc[]
+ *
+ * On 64-bit systems:
+ *
+ *     0       8       9       17...
+ *     hash    desclen typeptr desc[]
+ *
+ * We return it one word-sized chunk at a time.
   */
-static int keyring_match(const struct key *keyring, const void *description)
+static unsigned long keyring_get_key_chunk(const void *data, int level)
+{
+       const struct keyring_index_key *index_key = data;
+       unsigned long chunk = 0;
+       long offset = 0;
+       int desc_len = index_key->desc_len, n = sizeof(chunk);
+
+       level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
+       switch (level) {
+       case 0:
+               return hash_key_type_and_desc(index_key);
+       case 1:
+               return ((unsigned long)index_key->type << 8) | desc_len;
+       case 2:
+               if (desc_len == 0)
+                       return (u8)((unsigned long)index_key->type >>
+                                   (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+               n--;
+               offset = 1;
+       default:
+               offset += sizeof(chunk) - 1;
+               offset += (level - 3) * sizeof(chunk);
+               if (offset >= desc_len)
+                       return 0;
+               desc_len -= offset;
+               if (desc_len > n)
+                       desc_len = n;
+               offset += desc_len;
+               do {
+                       chunk <<= 8;
+                       chunk |= ((u8*)index_key->description)[--offset];
+               } while (--desc_len > 0);
+
+               if (level == 2) {
+                       chunk <<= 8;
+                       chunk |= (u8)((unsigned long)index_key->type >>
+                                     (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+               }
+               return chunk;
+       }
+}
+
+static unsigned long keyring_get_object_key_chunk(const void *object, int level)
+{
+       const struct key *key = keyring_ptr_to_key(object);
+       return keyring_get_key_chunk(&key->index_key, level);
+}
+
+static bool keyring_compare_object(const void *object, const void *data)
  {
-       return keyring->description &&
-               strcmp(keyring->description, description) == 0;
+       const struct keyring_index_key *index_key = data;
+       const struct key *key = keyring_ptr_to_key(object);
+
+       return key->index_key.type == index_key->type &&
+               key->index_key.desc_len == index_key->desc_len &&
+               memcmp(key->index_key.description, index_key->description,
+                      index_key->desc_len) == 0;
  }
  
+/*
+ * Compare the index keys of a pair of objects and determine the bit position
+ * at which they differ - if they differ.
+ */
+static int keyring_diff_objects(const void *_a, const void *_b)
+{
+       const struct key *key_a = keyring_ptr_to_key(_a);
+       const struct key *key_b = keyring_ptr_to_key(_b);
+       const struct keyring_index_key *a = &key_a->index_key;
+       const struct keyring_index_key *b = &key_b->index_key;
+       unsigned long seg_a, seg_b;
+       int level, i;
+
+       level = 0;
+       seg_a = hash_key_type_and_desc(a);
+       seg_b = hash_key_type_and_desc(b);
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       /* The number of bits contributed by the hash is controlled by a
+        * constant in the assoc_array headers.  Everything else thereafter we
+        * can deal with as being machine word-size dependent.
+        */
+       level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
+       seg_a = a->desc_len;
+       seg_b = b->desc_len;
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       /* The next bit may not work on big endian */
+       level++;
+       seg_a = (unsigned long)a->type;
+       seg_b = (unsigned long)b->type;
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       level += sizeof(unsigned long);
+       if (a->desc_len == 0)
+               goto same;
+
+       i = 0;
+       if (((unsigned long)a->description | (unsigned long)b->description) &
+           (sizeof(unsigned long) - 1)) {
+               do {
+                       seg_a = *(unsigned long *)(a->description + i);
+                       seg_b = *(unsigned long *)(b->description + i);
+                       if ((seg_a ^ seg_b) != 0)
+                               goto differ_plus_i;
+                       i += sizeof(unsigned long);
+               } while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
+       }
+
+       for (; i < a->desc_len; i++) {
+               seg_a = *(unsigned char *)(a->description + i);
+               seg_b = *(unsigned char *)(b->description + i);
+               if ((seg_a ^ seg_b) != 0)
+                       goto differ_plus_i;
+       }
+
+same:
+       return -1;
+
+differ_plus_i:
+       level += i;
+differ:
+       i = level * 8 + __ffs(seg_a ^ seg_b);
+       return i;
+}
+
+/*
+ * Free an object after stripping the keyring flag off of the pointer.
+ */
+static void keyring_free_object(void *object)
+{
+       key_put(keyring_ptr_to_key(object));
+}
+
+/*
+ * Operations for keyring management by the index-tree routines.
+ */
+static const struct assoc_array_ops keyring_assoc_array_ops = {
+       .get_key_chunk          = keyring_get_key_chunk,
+       .get_object_key_chunk   = keyring_get_object_key_chunk,
+       .compare_object         = keyring_compare_object,
+       .diff_objects           = keyring_diff_objects,
+       .free_object            = keyring_free_object,
+};
+
  /*
   * Clean up a keyring when it is destroyed.  Unpublish its name if it had one
   * and dispose of its data.
@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
   */
  static void keyring_destroy(struct key *keyring)
  {
-       struct keyring_list *klist;
-       int loop;
-
         if (keyring->description) {
                 write_lock(&keyring_name_lock);
  
@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
                 write_unlock(&keyring_name_lock);
         }
  
-       klist = rcu_access_pointer(keyring->payload.subscriptions);
-       if (klist) {
-               for (loop = klist->nkeys - 1; loop >= 0; loop--)
-                       key_put(rcu_access_pointer(klist->keys[loop]));
-               kfree(klist);
-       }
+       assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
  }
  
  /*
@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
   */
  static void keyring_describe(const struct key *keyring, struct seq_file *m)
  {
-       struct keyring_list *klist;
-
         if (keyring->description)
                 seq_puts(m, keyring->description);
         else
                 seq_puts(m, "[anon]");
  
         if (key_is_instantiated(keyring)) {
-               rcu_read_lock();
-               klist = rcu_dereference(keyring->payload.subscriptions);
-               if (klist)
-                       seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
+               if (keyring->keys.nr_leaves_on_tree != 0)
+                       seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
                 else
                         seq_puts(m, ": empty");
-               rcu_read_unlock();
         }
  }
  
+struct keyring_read_iterator_context {
+       size_t                  qty;
+       size_t                  count;
+       key_serial_t __user     *buffer;
+};
+
+static int keyring_read_iterator(const void *object, void *data)
+{
+       struct keyring_read_iterator_context *ctx = data;
+       const struct key *key = keyring_ptr_to_key(object);
+       int ret;
+
+       kenter("{%s,%d},,{%zu/%zu}",
+              key->type->name, key->serial, ctx->count, ctx->qty);
+
+       if (ctx->count >= ctx->qty)
+               return 1;
+
+       ret = put_user(key->serial, ctx->buffer);
+       if (ret < 0)
+               return ret;
+       ctx->buffer++;
+       ctx->count += sizeof(key->serial);
+       return 0;
+}
+
  /*
   * Read a list of key IDs from the keyring's contents in binary form
   *
- * The keyring's semaphore is read-locked by the caller.
+ * The keyring's semaphore is read-locked by the caller.  This prevents someone
+ * from modifying it under us - which could cause us to read key IDs multiple
+ * times.
   */
  static long keyring_read(const struct key *keyring,
                          char __user *buffer, size_t buflen)
  {
-       struct keyring_list *klist;
-       struct key *key;
-       size_t qty, tmp;
-       int loop, ret;
+       struct keyring_read_iterator_context ctx;
+       unsigned long nr_keys;
+       int ret;
  
-       ret = 0;
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (klist) {
-               /* calculate how much data we could return */
-               qty = klist->nkeys * sizeof(key_serial_t);
-
-               if (buffer && buflen > 0) {
-                       if (buflen > qty)
-                               buflen = qty;
-
-                       /* copy the IDs of the subscribed keys into the
-                        * buffer */
-                       ret = -EFAULT;
-
-                       for (loop = 0; loop < klist->nkeys; loop++) {
-                               key = rcu_deref_link_locked(klist, loop,
-                                                           keyring);
-
-                               tmp = sizeof(key_serial_t);
-                               if (tmp > buflen)
-                                       tmp = buflen;
-
-                               if (copy_to_user(buffer,
-                                                &key->serial,
-                                                tmp) != 0)
-                                       goto error;
-
-                               buflen -= tmp;
-                               if (buflen == 0)
-                                       break;
-                               buffer += tmp;
-                       }
-               }
+       kenter("{%d},,%zu", key_serial(keyring), buflen);
+
+       if (buflen & (sizeof(key_serial_t) - 1))
+               return -EINVAL;
+
+       nr_keys = keyring->keys.nr_leaves_on_tree;
+       if (nr_keys == 0)
+               return 0;
  
-               ret = qty;
+       /* Calculate how much data we could return */
+       ctx.qty = nr_keys * sizeof(key_serial_t);
+
+       if (!buffer || !buflen)
+               return ctx.qty;
+
+       if (buflen > ctx.qty)
+               ctx.qty = buflen;
+
+       /* Copy the IDs of the subscribed keys into the buffer */
+       ctx.buffer = (key_serial_t __user *)buffer;
+       ctx.count = 0;
+       ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
+       if (ret < 0) {
+               kleave(" = %d [iterate]", ret);
+               return ret;
         }
  
-error:
-       return ret;
+       kleave(" = %zu [ok]", ctx.count);
+       return ctx.count;
  }
  
  /*
@@ -277,227 +500,361 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
  }
  EXPORT_SYMBOL(keyring_alloc);
  
-/**
- * keyring_search_aux - Search a keyring tree for a key matching some criteria
- * @keyring_ref: A pointer to the keyring with possession indicator.
- * @cred: The credentials to use for permissions checks.
- * @type: The type of key to search for.
- * @description: Parameter for @match.
- * @match: Function to rule on whether or not a key is the one required.
- * @no_state_check: Don't check if a matching key is bad
- *
- * Search the supplied keyring tree for a key that matches the criteria given.
- * The root keyring and any linked keyrings must grant Search permission to the
- * caller to be searchable and keys can only be found if they too grant Search
- * to the caller. The possession flag on the root keyring pointer controls use
- * of the possessor bits in permissions checking of the entire tree.  In
- * addition, the LSM gets to forbid keyring searches and key matches.
- *
- * The search is performed as a breadth-then-depth search up to the prescribed
- * limit (KEYRING_SEARCH_MAX_DEPTH).
- *
- * Keys are matched to the type provided and are then filtered by the match
- * function, which is given the description to use in any way it sees fit.  The
- * match function may use any attributes of a key that it wishes to to
- * determine the match.  Normally the match function from the key type would be
- * used.
- *
- * RCU is used to prevent the keyring key lists from disappearing without the
- * need to take lots of locks.
- *
- * Returns a pointer to the found key and increments the key usage count if
- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
- * specified keyring wasn't a keyring.
- *
- * In the case of a successful return, the possession attribute from
- * @keyring_ref is propagated to the returned key reference.
+/*
+ * Iteration function to consider each key found.
   */
-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-                            const struct cred *cred,
-                            struct key_type *type,
-                            const void *description,
-                            key_match_func_t match,
-                            bool no_state_check)
+static int keyring_search_iterator(const void *object, void *iterator_data)
  {
-       struct {
-               /* Need a separate keylist pointer for RCU purposes */
-               struct key *keyring;
-               struct keyring_list *keylist;
-               int kix;
-       } stack[KEYRING_SEARCH_MAX_DEPTH];
-
-       struct keyring_list *keylist;
-       struct timespec now;
-       unsigned long possessed, kflags;
-       struct key *keyring, *key;
-       key_ref_t key_ref;
-       long err;
-       int sp, nkeys, kix;
+       struct keyring_search_context *ctx = iterator_data;
+       const struct key *key = keyring_ptr_to_key(object);
+       unsigned long kflags = key->flags;
  
-       keyring = key_ref_to_ptr(keyring_ref);
-       possessed = is_key_possessed(keyring_ref);
-       key_check(keyring);
+       kenter("{%d}", key->serial);
  
-       /* top keyring must have search permission to begin the search */
-       err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
-       if (err < 0) {
-               key_ref = ERR_PTR(err);
-               goto error;
+       /* ignore keys not of this type */
+       if (key->type != ctx->index_key.type) {
+               kleave(" = 0 [!type]");
+               return 0;
         }
  
-       key_ref = ERR_PTR(-ENOTDIR);
-       if (keyring->type != &key_type_keyring)
-               goto error;
+       /* skip invalidated, revoked and expired keys */
+       if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+               if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED))) {
+                       ctx->result = ERR_PTR(-EKEYREVOKED);
+                       kleave(" = %d [invrev]", ctx->skipped_ret);
+                       goto skipped;
+               }
  
-       rcu_read_lock();
+               if (key->expiry && ctx->now.tv_sec >= key->expiry) {
+                       ctx->result = ERR_PTR(-EKEYEXPIRED);
+                       kleave(" = %d [expire]", ctx->skipped_ret);
+                       goto skipped;
+               }
+       }
  
-       now = current_kernel_time();
-       err = -EAGAIN;
-       sp = 0;
-
-       /* firstly we should check to see if this top-level keyring is what we
-        * are looking for */
-       key_ref = ERR_PTR(-EAGAIN);
-       kflags = keyring->flags;
-       if (keyring->type == type && match(keyring, description)) {
-               key = keyring;
-               if (no_state_check)
-                       goto found;
+       /* keys that don't match */
+       if (!ctx->match(key, ctx->match_data)) {
+               kleave(" = 0 [!match]");
+               return 0;
+       }
  
-               /* check it isn't negative and hasn't expired or been
-                * revoked */
-               if (kflags & (1 << KEY_FLAG_REVOKED))
-                       goto error_2;
-               if (key->expiry && now.tv_sec >= key->expiry)
-                       goto error_2;
-               key_ref = ERR_PTR(key->type_data.reject_error);
-               if (kflags & (1 << KEY_FLAG_NEGATIVE))
-                       goto error_2;
-               goto found;
+       /* key must have search permissions */
+       if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+           key_task_permission(make_key_ref(key, ctx->possessed),
+                               ctx->cred, KEY_SEARCH) < 0) {
+               ctx->result = ERR_PTR(-EACCES);
+               kleave(" = %d [!perm]", ctx->skipped_ret);
+               goto skipped;
         }
  
-       /* otherwise, the top keyring must not be revoked, expired, or
-        * negatively instantiated if we are to search it */
-       key_ref = ERR_PTR(-EAGAIN);
-       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                     (1 << KEY_FLAG_REVOKED) |
-                     (1 << KEY_FLAG_NEGATIVE)) ||
-           (keyring->expiry && now.tv_sec >= keyring->expiry))
-               goto error_2;
-
-       /* start processing a new keyring */
-descend:
-       kflags = keyring->flags;
-       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                     (1 << KEY_FLAG_REVOKED)))
-               goto not_this_keyring;
+       if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+               /* we set a different error code if we pass a negative key */
+               if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+                       smp_rmb();
+                       ctx->result = ERR_PTR(key->type_data.reject_error);
+                       kleave(" = %d [neg]", ctx->skipped_ret);
+                       goto skipped;
+               }
+       }
  
-       keylist = rcu_dereference(keyring->payload.subscriptions);
-       if (!keylist)
-               goto not_this_keyring;
+       /* Found */
+       ctx->result = make_key_ref(key, ctx->possessed);
+       kleave(" = 1 [found]");
+       return 1;
  
-       /* iterate through the keys in this keyring first */
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (kix = 0; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-               kflags = key->flags;
+skipped:
+       return ctx->skipped_ret;
+}
  
-               /* ignore keys not of this type */
-               if (key->type != type)
-                       continue;
+/*
+ * Search inside a keyring for a key.  We can search by walking to it
+ * directly based on its index-key or we can iterate over the entire
+ * tree looking for it, based on the match function.
+ */
+static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
+{
+       if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
+           KEYRING_SEARCH_LOOKUP_DIRECT) {
+               const void *object;
+
+               object = assoc_array_find(&keyring->keys,
+                                         &keyring_assoc_array_ops,
+                                         &ctx->index_key);
+               return object ? ctx->iterator(object, ctx) : 0;
+       }
+       return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
+}
  
-               /* skip invalidated, revoked and expired keys */
-               if (!no_state_check) {
-                       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                                     (1 << KEY_FLAG_REVOKED)))
-                               continue;
+/*
+ * Search a tree of keyrings that point to other keyrings up to the maximum
+ * depth.
+ */
+static bool search_nested_keyrings(struct key *keyring,
+                                  struct keyring_search_context *ctx)
+{
+       struct {
+               struct key *keyring;
+               struct assoc_array_node *node;
+               int slot;
+       } stack[KEYRING_SEARCH_MAX_DEPTH];
  
-                       if (key->expiry && now.tv_sec >= key->expiry)
-                               continue;
-               }
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *ptr;
+       struct key *key;
+       int sp = 0, slot;
  
-               /* keys that don't match */
-               if (!match(key, description))
-                       continue;
+       kenter("{%d},{%s,%s}",
+              keyring->serial,
+              ctx->index_key.type->name,
+              ctx->index_key.description);
  
-               /* key must have search permissions */
-               if (key_task_permission(make_key_ref(key, possessed),
-                                       cred, KEY_SEARCH) < 0)
-                       continue;
+       if (ctx->index_key.description)
+               ctx->index_key.desc_len = strlen(ctx->index_key.description);
  
-               if (no_state_check)
+       /* Check to see if this top-level keyring is what we are looking for
+        * and whether it is valid or not.
+        */
+       if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+           keyring_compare_object(keyring, &ctx->index_key)) {
+               ctx->skipped_ret = 2;
+               ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
+               switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
+               case 1:
                         goto found;
-
-               /* we set a different error code if we pass a negative key */
-               if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
-                       err = key->type_data.reject_error;
-                       continue;
+               case 2:
+                       return false;
+               default:
+                       break;
                 }
+       }
+
+       ctx->skipped_ret = 0;
+       if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+               ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
  
+       /* Start processing a new keyring */
+descend_to_keyring:
+       kdebug("descend to %d", keyring->serial);
+       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED)))
+               goto not_this_keyring;
+
+       /* Search through the keys in this keyring before its searching its
+        * subtrees.
+        */
+       if (search_keyring(keyring, ctx))
                 goto found;
-       }
  
-       /* search through the keyrings nested in this one */
-       kix = 0;
-ascend:
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-               if (key->type != &key_type_keyring)
-                       continue;
+       /* Then manually iterate through the keyrings nested in this one.
+        *
+        * Start from the root node of the index tree.  Because of the way the
+        * hash function has been set up, keyrings cluster on the leftmost
+        * branch of the root node (root slot 0) or in the root node itself.
+        * Non-keyrings avoid the leftmost branch of the root entirely (root
+        * slots 1-15).
+        */
+       ptr = ACCESS_ONCE(keyring->keys.root);
+       if (!ptr)
+               goto not_this_keyring;
  
-               /* recursively search nested keyrings
-                * - only search keyrings for which we have search permission
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               /* If the root is a shortcut, either the keyring only contains
+                * keyring pointers (everything clusters behind root slot 0) or
+                * doesn't contain any keyring pointers.
                  */
-               if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
+                       goto not_this_keyring;
+
+               ptr = ACCESS_ONCE(shortcut->next_node);
+               node = assoc_array_ptr_to_node(ptr);
+               goto begin_node;
+       }
+
+       node = assoc_array_ptr_to_node(ptr);
+       smp_read_barrier_depends();
+
+       ptr = node->slots[0];
+       if (!assoc_array_ptr_is_meta(ptr))
+               goto begin_node;
+
+descend_to_node:
+       /* Descend to a more distal node in this keyring's content tree and go
+        * through that.
+        */
+       kdebug("descend");
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               ptr = ACCESS_ONCE(shortcut->next_node);
+               BUG_ON(!assoc_array_ptr_is_node(ptr));
+               node = assoc_array_ptr_to_node(ptr);
+       }
+
+begin_node:
+       kdebug("begin_node");
+       smp_read_barrier_depends();
+       slot = 0;
+ascend_to_node:
+       /* Go through the slots in a node */
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+
+               if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
+                       goto descend_to_node;
+
+               if (!keyring_ptr_is_keyring(ptr))
                         continue;
  
-               if (key_task_permission(make_key_ref(key, possessed),
-                                       cred, KEY_SEARCH) < 0)
+               key = keyring_ptr_to_key(ptr);
+
+               if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
+                       if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
+                               ctx->result = ERR_PTR(-ELOOP);
+                               return false;
+                       }
+                       goto not_this_keyring;
+               }
+
+               /* Search a nested keyring */
+               if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+                   key_task_permission(make_key_ref(key, ctx->possessed),
+                                       ctx->cred, KEY_SEARCH) < 0)
                         continue;
  
                 /* stack the current position */
                 stack[sp].keyring = keyring;
-               stack[sp].keylist = keylist;
-               stack[sp].kix = kix;
+               stack[sp].node = node;
+               stack[sp].slot = slot;
                 sp++;
  
                 /* begin again with the new keyring */
                 keyring = key;
-               goto descend;
+               goto descend_to_keyring;
         }
  
-       /* the keyring we're looking at was disqualified or didn't contain a
-        * matching key */
+       /* We've dealt with all the slots in the current node, so now we need
+        * to ascend to the parent and continue processing there.
+        */
+       ptr = ACCESS_ONCE(node->back_pointer);
+       slot = node->parent_slot;
+
+       if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               ptr = ACCESS_ONCE(shortcut->back_pointer);
+               slot = shortcut->parent_slot;
+       }
+       if (!ptr)
+               goto not_this_keyring;
+       node = assoc_array_ptr_to_node(ptr);
+       smp_read_barrier_depends();
+       slot++;
+
+       /* If we've ascended to the root (zero backpointer), we must have just
+        * finished processing the leftmost branch rather than the root slots -
+        * so there can't be any more keyrings for us to find.
+        */
+       if (node->back_pointer) {
+               kdebug("ascend %d", slot);
+               goto ascend_to_node;
+       }
+
+       /* The keyring we're looking at was disqualified or didn't contain a
+        * matching key.
+        */
  not_this_keyring:
-       if (sp > 0) {
-               /* resume the processing of a keyring higher up in the tree */
-               sp--;
-               keyring = stack[sp].keyring;
-               keylist = stack[sp].keylist;
-               kix = stack[sp].kix + 1;
-               goto ascend;
+       kdebug("not_this_keyring %d", sp);
+       if (sp <= 0) {
+               kleave(" = false");
+               return false;
         }
  
-       key_ref = ERR_PTR(err);
-       goto error_2;
+       /* Resume the processing of a keyring higher up in the tree */
+       sp--;
+       keyring = stack[sp].keyring;
+       node = stack[sp].node;
+       slot = stack[sp].slot + 1;
+       kdebug("ascend to %d [%d]", keyring->serial, slot);
+       goto ascend_to_node;
  
-       /* we found a viable match */
+       /* We found a viable match */
  found:
-       atomic_inc(&key->usage);
-       key->last_used_at = now.tv_sec;
-       keyring->last_used_at = now.tv_sec;
-       while (sp > 0)
-               stack[--sp].keyring->last_used_at = now.tv_sec;
+       key = key_ref_to_ptr(ctx->result);
         key_check(key);
-       key_ref = make_key_ref(key, possessed);
-error_2:
+       if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
+               key->last_used_at = ctx->now.tv_sec;
+               keyring->last_used_at = ctx->now.tv_sec;
+               while (sp > 0)
+                       stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+       }
+       kleave(" = true");
+       return true;
+}
+
+/**
+ * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * @keyring_ref: A pointer to the keyring with possession indicator.
+ * @ctx: The keyring search context.
+ *
+ * Search the supplied keyring tree for a key that matches the criteria given.
+ * The root keyring and any linked keyrings must grant Search permission to the
+ * caller to be searchable and keys can only be found if they too grant Search
+ * to the caller. The possession flag on the root keyring pointer controls use
+ * of the possessor bits in permissions checking of the entire tree.  In
+ * addition, the LSM gets to forbid keyring searches and key matches.
+ *
+ * The search is performed as a breadth-then-depth search up to the prescribed
+ * limit (KEYRING_SEARCH_MAX_DEPTH).
+ *
+ * Keys are matched to the type provided and are then filtered by the match
+ * function, which is given the description to use in any way it sees fit.  The
+ * match function may use any attributes of a key that it wishes to to
+ * determine the match.  Normally the match function from the key type would be
+ * used.
+ *
+ * RCU can be used to prevent the keyring key lists from disappearing without
+ * the need to take lots of locks.
+ *
+ * Returns a pointer to the found key and increments the key usage count if
+ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+ * specified keyring wasn't a keyring.
+ *
+ * In the case of a successful return, the possession attribute from
+ * @keyring_ref is propagated to the returned key reference.
+ */
+key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+                            struct keyring_search_context *ctx)
+{
+       struct key *keyring;
+       long err;
+
+       ctx->iterator = keyring_search_iterator;
+       ctx->possessed = is_key_possessed(keyring_ref);
+       ctx->result = ERR_PTR(-EAGAIN);
+
+       keyring = key_ref_to_ptr(keyring_ref);
+       key_check(keyring);
+
+       if (keyring->type != &key_type_keyring)
+               return ERR_PTR(-ENOTDIR);
+
+       if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
+               err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+               if (err < 0)
+                       return ERR_PTR(err);
+       }
+
+       rcu_read_lock();
+       ctx->now = current_kernel_time();
+       if (search_nested_keyrings(keyring, ctx))
+               __key_get(key_ref_to_ptr(ctx->result));
         rcu_read_unlock();
-error:
-       return key_ref;
+       return ctx->result;
  }
  
  /**
@@ -507,77 +864,73 @@ error:
   * @description: The name of the keyring we want to find.
   *
   * As keyring_search_aux() above, but using the current task's credentials and
- * type's default matching function.
+ * type's default matching function and preferred search method.
   */
  key_ref_t keyring_search(key_ref_t keyring,
                          struct key_type *type,
                          const char *description)
  {
-       if (!type->match)
+       struct keyring_search_context ctx = {
+               .index_key.type         = type,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = type->match,
+               .match_data             = description,
+               .flags                  = (type->def_lookup_type |
+                                          KEYRING_SEARCH_DO_STATE_CHECK),
+       };
+
+       if (!ctx.match)
                 return ERR_PTR(-ENOKEY);
  
-       return keyring_search_aux(keyring, current->cred,
-                                 type, description, type->match, false);
+       return keyring_search_aux(keyring, &ctx);
  }
  EXPORT_SYMBOL(keyring_search);
  
  /*
- * Search the given keyring only (no recursion).
+ * Search the given keyring for a key that might be updated.
   *
   * The caller must guarantee that the keyring is a keyring and that the
- * permission is granted to search the keyring as no check is made here.
- *
- * RCU is used to make it unnecessary to lock the keyring key list here.
+ * permission is granted to modify the keyring as no check is made here.  The
+ * caller must also hold a lock on the keyring semaphore.
   *
   * Returns a pointer to the found key with usage count incremented if
- * successful and returns -ENOKEY if not found.  Revoked keys and keys not
- * providing the requested permission are skipped over.
+ * successful and returns NULL if not found.  Revoked and invalidated keys are
+ * skipped over.
   *
   * If successful, the possession indicator is propagated from the keyring ref
   * to the returned key reference.
   */
-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-                              const struct key_type *ktype,
-                              const char *description,
-                              key_perm_t perm)
+key_ref_t find_key_to_update(key_ref_t keyring_ref,
+                            const struct keyring_index_key *index_key)
  {
-       struct keyring_list *klist;
-       unsigned long possessed;
         struct key *keyring, *key;
-       int nkeys, loop;
+       const void *object;
  
         keyring = key_ref_to_ptr(keyring_ref);
-       possessed = is_key_possessed(keyring_ref);
  
-       rcu_read_lock();
+       kenter("{%d},{%s,%s}",
+              keyring->serial, index_key->type->name, index_key->description);
  
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (klist) {
-               nkeys = klist->nkeys;
-               smp_rmb();
-               for (loop = 0; loop < nkeys ; loop++) {
-                       key = rcu_dereference(klist->keys[loop]);
-                       if (key->type == ktype &&
-                           (!key->type->match ||
-                            key->type->match(key, description)) &&
-                           key_permission(make_key_ref(key, possessed),
-                                          perm) == 0 &&
-                           !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                                           (1 << KEY_FLAG_REVOKED)))
-                           )
-                               goto found;
-               }
-       }
+       object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
+                                 index_key);
  
-       rcu_read_unlock();
-       return ERR_PTR(-ENOKEY);
+       if (object)
+               goto found;
+
+       kleave(" = NULL");
+       return NULL;
  
  found:
-       atomic_inc(&key->usage);
-       keyring->last_used_at = key->last_used_at =
-               current_kernel_time().tv_sec;
-       rcu_read_unlock();
-       return make_key_ref(key, possessed);
+       key = keyring_ptr_to_key(object);
+       if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                         (1 << KEY_FLAG_REVOKED))) {
+               kleave(" = NULL [x]");
+               return NULL;
+       }
+       __key_get(key);
+       kleave(" = {%d}", key->serial);
+       return make_key_ref(key, is_key_possessed(keyring_ref));
  }
  
  /*
@@ -640,6 +993,19 @@ out:
         return keyring;
  }
  
+static int keyring_detect_cycle_iterator(const void *object,
+                                        void *iterator_data)
+{
+       struct keyring_search_context *ctx = iterator_data;
+       const struct key *key = keyring_ptr_to_key(object);
+
+       kenter("{%d}", key->serial);
+
+       BUG_ON(key != ctx->match_data);
+       ctx->result = ERR_PTR(-EDEADLK);
+       return 1;
+}
+
  /*
   * See if a cycle will will be created by inserting acyclic tree B in acyclic
   * tree A at the topmost level (ie: as a direct child of A).
@@ -649,116 +1015,39 @@ out:
   */
  static int keyring_detect_cycle(struct key *A, struct key *B)
  {
-       struct {
-               struct keyring_list *keylist;
-               int kix;
-       } stack[KEYRING_SEARCH_MAX_DEPTH];
-
-       struct keyring_list *keylist;
-       struct key *subtree, *key;
-       int sp, nkeys, kix, ret;
+       struct keyring_search_context ctx = {
+               .index_key      = A->index_key,
+               .match_data     = A,
+               .iterator       = keyring_detect_cycle_iterator,
+               .flags          = (KEYRING_SEARCH_LOOKUP_DIRECT |
+                                  KEYRING_SEARCH_NO_STATE_CHECK |
+                                  KEYRING_SEARCH_NO_UPDATE_TIME |
+                                  KEYRING_SEARCH_NO_CHECK_PERM |
+                                  KEYRING_SEARCH_DETECT_TOO_DEEP),
+       };
  
         rcu_read_lock();
-
-       ret = -EDEADLK;
-       if (A == B)
-               goto cycle_detected;
-
-       subtree = B;
-       sp = 0;
-
-       /* start processing a new keyring */
-descend:
-       if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
-               goto not_this_keyring;
-
-       keylist = rcu_dereference(subtree->payload.subscriptions);
-       if (!keylist)
-               goto not_this_keyring;
-       kix = 0;
-
-ascend:
-       /* iterate through the remaining keys in this keyring */
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-
-               if (key == A)
-                       goto cycle_detected;
-
-               /* recursively check nested keyrings */
-               if (key->type == &key_type_keyring) {
-                       if (sp >= KEYRING_SEARCH_MAX_DEPTH)
-                               goto too_deep;
-
-                       /* stack the current position */
-                       stack[sp].keylist = keylist;
-                       stack[sp].kix = kix;
-                       sp++;
-
-                       /* begin again with the new keyring */
-                       subtree = key;
-                       goto descend;
-               }
-       }
-
-       /* the keyring we're looking at was disqualified or didn't contain a
-        * matching key */
-not_this_keyring:
-       if (sp > 0) {
-               /* resume the checking of a keyring higher up in the tree */
-               sp--;
-               keylist = stack[sp].keylist;
-               kix = stack[sp].kix + 1;
-               goto ascend;
-       }
-
-       ret = 0; /* no cycles detected */
-
-error:
+       search_nested_keyrings(B, &ctx);
         rcu_read_unlock();
-       return ret;
-
-too_deep:
-       ret = -ELOOP;
-       goto error;
-
-cycle_detected:
-       ret = -EDEADLK;
-       goto error;
-}
-
-/*
- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
- * key
- */
-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist =
-               container_of(rcu, struct keyring_list, rcu);
-
-       if (klist->delkey != USHRT_MAX)
-               key_put(rcu_access_pointer(klist->keys[klist->delkey]));
-       kfree(klist);
+       return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
  }
  
  /*
   * Preallocate memory so that a key can be linked into to a keyring.
   */
-int __key_link_begin(struct key *keyring, const struct key_type *type,
-                    const char *description, unsigned long *_prealloc)
+int __key_link_begin(struct key *keyring,
+                    const struct keyring_index_key *index_key,
+                    struct assoc_array_edit **_edit)
         __acquires(&keyring->sem)
         __acquires(&keyring_serialise_link_sem)
  {
-       struct keyring_list *klist, *nklist;
-       unsigned long prealloc;
-       unsigned max;
-       time_t lowest_lru;
-       size_t size;
-       int loop, lru, ret;
+       struct assoc_array_edit *edit;
+       int ret;
+
+       kenter("%d,%s,%s,",
+              keyring->serial, index_key->type->name, index_key->description);
  
-       kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
+       BUG_ON(index_key->desc_len == 0);
  
         if (keyring->type != &key_type_keyring)
                 return -ENOTDIR;
@@ -771,100 +1060,39 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
  
         /* serialise link/link calls to prevent parallel calls causing a cycle
          * when linking two keyring in opposite orders */
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                 down_write(&keyring_serialise_link_sem);
  
-       klist = rcu_dereference_locked_keyring(keyring);
-
-       /* see if there's a matching key we can displace */
-       lru = -1;
-       if (klist && klist->nkeys > 0) {
-               lowest_lru = TIME_T_MAX;
-               for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-                       struct key *key = rcu_deref_link_locked(klist, loop,
-                                                               keyring);
-                       if (key->type == type &&
-                           strcmp(key->description, description) == 0) {
-                               /* Found a match - we'll replace the link with
-                                * one to the new key.  We record the slot
-                                * position.
-                                */
-                               klist->delkey = loop;
-                               prealloc = 0;
-                               goto done;
-                       }
-                       if (key->last_used_at < lowest_lru) {
-                               lowest_lru = key->last_used_at;
-                               lru = loop;
-                       }
-               }
-       }
-
-       /* If the keyring is full then do an LRU discard */
-       if (klist &&
-           klist->nkeys == klist->maxkeys &&
-           klist->maxkeys >= MAX_KEYRING_LINKS) {
-               kdebug("LRU discard %d\n", lru);
-               klist->delkey = lru;
-               prealloc = 0;
-               goto done;
-       }
-
-       /* check that we aren't going to overrun the user's quota */
-       ret = key_payload_reserve(keyring,
-                                 keyring->datalen + KEYQUOTA_LINK_BYTES);
-       if (ret < 0)
+       /* Create an edit script that will insert/replace the key in the
+        * keyring tree.
+        */
+       edit = assoc_array_insert(&keyring->keys,
+                                 &keyring_assoc_array_ops,
+                                 index_key,
+                                 NULL);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
                 goto error_sem;
+       }
  
-       if (klist && klist->nkeys < klist->maxkeys) {
-               /* there's sufficient slack space to append directly */
-               klist->delkey = klist->nkeys;
-               prealloc = KEY_LINK_FIXQUOTA;
-       } else {
-               /* grow the key list */
-               max = 4;
-               if (klist) {
-                       max += klist->maxkeys;
-                       if (max > MAX_KEYRING_LINKS)
-                               max = MAX_KEYRING_LINKS;
-                       BUG_ON(max <= klist->maxkeys);
-               }
-
-               size = sizeof(*klist) + sizeof(struct key *) * max;
-
-               ret = -ENOMEM;
-               nklist = kmalloc(size, GFP_KERNEL);
-               if (!nklist)
-                       goto error_quota;
-
-               nklist->maxkeys = max;
-               if (klist) {
-                       memcpy(nklist->keys, klist->keys,
-                              sizeof(struct key *) * klist->nkeys);
-                       nklist->delkey = klist->nkeys;
-                       nklist->nkeys = klist->nkeys + 1;
-                       klist->delkey = USHRT_MAX;
-               } else {
-                       nklist->nkeys = 1;
-                       nklist->delkey = 0;
-               }
-
-               /* add the key into the new space */
-               RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
-               prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
+       /* If we're not replacing a link in-place then we're going to need some
+        * extra quota.
+        */
+       if (!edit->dead_leaf) {
+               ret = key_payload_reserve(keyring,
+                                         keyring->datalen + KEYQUOTA_LINK_BYTES);
+               if (ret < 0)
+                       goto error_cancel;
         }
  
-done:
-       *_prealloc = prealloc;
+       *_edit = edit;
         kleave(" = 0");
         return 0;
  
-error_quota:
-       /* undo the quota changes */
-       key_payload_reserve(keyring,
-                           keyring->datalen - KEYQUOTA_LINK_BYTES);
+error_cancel:
+       assoc_array_cancel_edit(edit);
  error_sem:
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                 up_write(&keyring_serialise_link_sem);
  error_krsem:
         up_write(&keyring->sem);
@@ -895,60 +1123,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
   * holds at most one link to any given key of a particular type+description
   * combination.
   */
-void __key_link(struct key *keyring, struct key *key,
-               unsigned long *_prealloc)
+void __key_link(struct key *key, struct assoc_array_edit **_edit)
  {
-       struct keyring_list *klist, *nklist;
-       struct key *discard;
-
-       nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
-       *_prealloc = 0;
-
-       kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
-
-       klist = rcu_dereference_locked_keyring(keyring);
-
-       atomic_inc(&key->usage);
-       keyring->last_used_at = key->last_used_at =
-               current_kernel_time().tv_sec;
-
-       /* there's a matching key we can displace or an empty slot in a newly
-        * allocated list we can fill */
-       if (nklist) {
-               kdebug("reissue %hu/%hu/%hu",
-                      nklist->delkey, nklist->nkeys, nklist->maxkeys);
-
-               RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
-
-               rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-               /* dispose of the old keyring list and, if there was one, the
-                * displaced key */
-               if (klist) {
-                       kdebug("dispose %hu/%hu/%hu",
-                              klist->delkey, klist->nkeys, klist->maxkeys);
-                       call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
-               }
-       } else if (klist->delkey < klist->nkeys) {
-               kdebug("replace %hu/%hu/%hu",
-                      klist->delkey, klist->nkeys, klist->maxkeys);
-
-               discard = rcu_dereference_protected(
-                       klist->keys[klist->delkey],
-                       rwsem_is_locked(&keyring->sem));
-               rcu_assign_pointer(klist->keys[klist->delkey], key);
-               /* The garbage collector will take care of RCU
-                * synchronisation */
-               key_put(discard);
-       } else {
-               /* there's sufficient slack space to append directly */
-               kdebug("append %hu/%hu/%hu",
-                      klist->delkey, klist->nkeys, klist->maxkeys);
-
-               RCU_INIT_POINTER(klist->keys[klist->delkey], key);
-               smp_wmb();
-               klist->nkeys++;
-       }
+       __key_get(key);
+       assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
+       assoc_array_apply_edit(*_edit);
+       *_edit = NULL;
  }
  
  /*
@@ -956,24 +1136,22 @@ void __key_link(struct key *keyring, struct key *key,
   *
   * Must be called with __key_link_begin() having being called.
   */
-void __key_link_end(struct key *keyring, struct key_type *type,
-                   unsigned long prealloc)
+void __key_link_end(struct key *keyring,
+                   const struct keyring_index_key *index_key,
+                   struct assoc_array_edit *edit)
         __releases(&keyring->sem)
         __releases(&keyring_serialise_link_sem)
  {
-       BUG_ON(type == NULL);
-       BUG_ON(type->name == NULL);
-       kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
+       BUG_ON(index_key->type == NULL);
+       kenter("%d,%s,", keyring->serial, index_key->type->name);
  
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                 up_write(&keyring_serialise_link_sem);
  
-       if (prealloc) {
-               if (prealloc & KEY_LINK_FIXQUOTA)
-                       key_payload_reserve(keyring,
-                                           keyring->datalen -
-                                           KEYQUOTA_LINK_BYTES);
-               kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
+       if (edit && !edit->dead_leaf) {
+               key_payload_reserve(keyring,
+                                   keyring->datalen - KEYQUOTA_LINK_BYTES);
+               assoc_array_cancel_edit(edit);
         }
         up_write(&keyring->sem);
  }
@@ -1000,20 +1178,28 @@ void __key_link_end(struct key *keyring, struct key_type *type,
   */
  int key_link(struct key *keyring, struct key *key)
  {
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
         int ret;
  
+       kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+
         key_check(keyring);
         key_check(key);
  
-       ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
+       if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) &&
+           !test_bit(KEY_FLAG_TRUSTED, &key->flags))
+               return -EPERM;
+
+       ret = __key_link_begin(keyring, &key->index_key, &edit);
         if (ret == 0) {
+               kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
                 ret = __key_link_check_live_key(keyring, key);
                 if (ret == 0)
-                       __key_link(keyring, key, &prealloc);
-               __key_link_end(keyring, key->type, prealloc);
+                       __key_link(key, &edit);
+               __key_link_end(keyring, &key->index_key, edit);
         }
  
+       kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
         return ret;
  }
  EXPORT_SYMBOL(key_link);
@@ -1037,90 +1223,37 @@ EXPORT_SYMBOL(key_link);
   */
  int key_unlink(struct key *keyring, struct key *key)
  {
-       struct keyring_list *klist, *nklist;
-       int loop, ret;
+       struct assoc_array_edit *edit;
+       int ret;
  
         key_check(keyring);
         key_check(key);
  
-       ret = -ENOTDIR;
         if (keyring->type != &key_type_keyring)
-               goto error;
+               return -ENOTDIR;
  
         down_write(&keyring->sem);
  
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (klist) {
-               /* search the keyring for the key */
-               for (loop = 0; loop < klist->nkeys; loop++)
-                       if (rcu_access_pointer(klist->keys[loop]) == key)
-                               goto key_is_present;
+       edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
+                                 &key->index_key);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+               goto error;
         }
-
-       up_write(&keyring->sem);
         ret = -ENOENT;
-       goto error;
-
-key_is_present:
-       /* we need to copy the key list for RCU purposes */
-       nklist = kmalloc(sizeof(*klist) +
-                        sizeof(struct key *) * klist->maxkeys,
-                        GFP_KERNEL);
-       if (!nklist)
-               goto nomem;
-       nklist->maxkeys = klist->maxkeys;
-       nklist->nkeys = klist->nkeys - 1;
-
-       if (loop > 0)
-               memcpy(&nklist->keys[0],
-                      &klist->keys[0],
-                      loop * sizeof(struct key *));
-
-       if (loop < nklist->nkeys)
-               memcpy(&nklist->keys[loop],
-                      &klist->keys[loop + 1],
-                      (nklist->nkeys - loop) * sizeof(struct key *));
-
-       /* adjust the user's quota */
-       key_payload_reserve(keyring,
-                           keyring->datalen - KEYQUOTA_LINK_BYTES);
-
-       rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-       up_write(&keyring->sem);
-
-       /* schedule for later cleanup */
-       klist->delkey = loop;
-       call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+       if (edit == NULL)
+               goto error;
  
+       assoc_array_apply_edit(edit);
+       key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
         ret = 0;
  
  error:
-       return ret;
-nomem:
-       ret = -ENOMEM;
         up_write(&keyring->sem);
-       goto error;
+       return ret;
  }
  EXPORT_SYMBOL(key_unlink);
  
-/*
- * Dispose of a keyring list after the RCU grace period, releasing the keys it
- * links to.
- */
-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist;
-       int loop;
-
-       klist = container_of(rcu, struct keyring_list, rcu);
-
-       for (loop = klist->nkeys - 1; loop >= 0; loop--)
-               key_put(rcu_access_pointer(klist->keys[loop]));
-
-       kfree(klist);
-}
-
  /**
   * keyring_clear - Clear a keyring
   * @keyring: The keyring to clear.
@@ -1131,33 +1264,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
   */
  int keyring_clear(struct key *keyring)
  {
-       struct keyring_list *klist;
+       struct assoc_array_edit *edit;
         int ret;
  
-       ret = -ENOTDIR;
-       if (keyring->type == &key_type_keyring) {
-               /* detach the pointer block with the locks held */
-               down_write(&keyring->sem);
-
-               klist = rcu_dereference_locked_keyring(keyring);
-               if (klist) {
-                       /* adjust the quota */
-                       key_payload_reserve(keyring,
-                                           sizeof(struct keyring_list));
-
-                       rcu_assign_pointer(keyring->payload.subscriptions,
-                                          NULL);
-               }
-
-               up_write(&keyring->sem);
+       if (keyring->type != &key_type_keyring)
+               return -ENOTDIR;
  
-               /* free the keys after the locks have been dropped */
-               if (klist)
-                       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+       down_write(&keyring->sem);
  
+       edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+       } else {
+               if (edit)
+                       assoc_array_apply_edit(edit);
+               key_payload_reserve(keyring, 0);
                 ret = 0;
         }
  
+       up_write(&keyring->sem);
         return ret;
  }
  EXPORT_SYMBOL(keyring_clear);
@@ -1169,111 +1294,68 @@ EXPORT_SYMBOL(keyring_clear);
   */
  static void keyring_revoke(struct key *keyring)
  {
-       struct keyring_list *klist;
+       struct assoc_array_edit *edit;
+
+       edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+       if (!IS_ERR(edit)) {
+               if (edit)
+                       assoc_array_apply_edit(edit);
+               key_payload_reserve(keyring, 0);
+       }
+}
+
+static bool keyring_gc_select_iterator(void *object, void *iterator_data)
+{
+       struct key *key = keyring_ptr_to_key(object);
+       time_t *limit = iterator_data;
  
-       klist = rcu_dereference_locked_keyring(keyring);
+       if (key_is_dead(key, *limit))
+               return false;
+       key_get(key);
+       return true;
+}
  
-       /* adjust the quota */
-       key_payload_reserve(keyring, 0);
+static int keyring_gc_check_iterator(const void *object, void *iterator_data)
+{
+       const struct key *key = keyring_ptr_to_key(object);
+       time_t *limit = iterator_data;
  
-       if (klist) {
-               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-               call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-       }
+       key_check(key);
+       return key_is_dead(key, *limit);
  }
  
  /*
- * Collect garbage from the contents of a keyring, replacing the old list with
- * a new one with the pointers all shuffled down.
+ * Garbage collect pointers from a keyring.
   *
- * Dead keys are classed as oned that are flagged as being dead or are revoked,
- * expired or negative keys that were revoked or expired before the specified
- * limit.
+ * Not called with any locks held.  The keyring's key struct will not be
+ * deallocated under us as only our caller may deallocate it.
   */
  void keyring_gc(struct key *keyring, time_t limit)
  {
-       struct keyring_list *klist, *new;
-       struct key *key;
-       int loop, keep, max;
-
-       kenter("{%x,%s}", key_serial(keyring), keyring->description);
-
-       down_write(&keyring->sem);
-
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (!klist)
-               goto no_klist;
-
-       /* work out how many subscriptions we're keeping */
-       keep = 0;
-       for (loop = klist->nkeys - 1; loop >= 0; loop--)
-               if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
-                                limit))
-                       keep++;
-
-       if (keep == klist->nkeys)
-               goto just_return;
-
-       /* allocate a new keyring payload */
-       max = roundup(keep, 4);
-       new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
-                     GFP_KERNEL);
-       if (!new)
-               goto nomem;
-       new->maxkeys = max;
-       new->nkeys = 0;
-       new->delkey = 0;
-
-       /* install the live keys
-        * - must take care as expired keys may be updated back to life
-        */
-       keep = 0;
-       for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-               key = rcu_deref_link_locked(klist, loop, keyring);
-               if (!key_is_dead(key, limit)) {
-                       if (keep >= max)
-                               goto discard_new;
-                       RCU_INIT_POINTER(new->keys[keep++], key_get(key));
-               }
-       }
-       new->nkeys = keep;
-
-       /* adjust the quota */
-       key_payload_reserve(keyring,
-                           sizeof(struct keyring_list) +
-                           KEYQUOTA_LINK_BYTES * keep);
+       int result;
  
-       if (keep == 0) {
-               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-               kfree(new);
-       } else {
-               rcu_assign_pointer(keyring->payload.subscriptions, new);
-       }
+       kenter("%x{%s}", keyring->serial, keyring->description ?: "");
  
-       up_write(&keyring->sem);
+       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED)))
+               goto dont_gc;
  
-       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-       kleave(" [yes]");
-       return;
-
-discard_new:
-       new->nkeys = keep;
-       keyring_clear_rcu_disposal(&new->rcu);
-       up_write(&keyring->sem);
-       kleave(" [discard]");
-       return;
-
-just_return:
-       up_write(&keyring->sem);
-       kleave(" [no dead]");
-       return;
+       /* scan the keyring looking for dead keys */
+       rcu_read_lock();
+       result = assoc_array_iterate(&keyring->keys,
+                                    keyring_gc_check_iterator, &limit);
+       rcu_read_unlock();
+       if (result == true)
+               goto do_gc;
  
-no_klist:
-       up_write(&keyring->sem);
-       kleave(" [no_klist]");
+dont_gc:
+       kleave(" [no gc]");
         return;
  
-nomem:
+do_gc:
+       down_write(&keyring->sem);
+       assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
+                      keyring_gc_select_iterator, &limit);
         up_write(&keyring->sem);
-       kleave(" [oom]");
+       kleave(" [gc]");
  }
diff --git a/security/keys/persistent.c b/security/keys/persistent.c

new file mode 100644 (file)

index 0000000..0ad3ee2
--- /dev/null
+++ b/security/keys/persistent.c
@@ -0,0 +1,167 @@
+/* General persistent per-UID keyrings register
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/user_namespace.h>
+#include "internal.h"
+
+unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
+
+/*
+ * Create the persistent keyring register for the current user namespace.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static int key_create_persistent_register(struct user_namespace *ns)
+{
+       struct key *reg = keyring_alloc(".persistent_register",
+                                       KUIDT_INIT(0), KGIDT_INIT(0),
+                                       current_cred(),
+                                       ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                        KEY_USR_VIEW | KEY_USR_READ),
+                                       KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(reg))
+               return PTR_ERR(reg);
+
+       ns->persistent_keyring_register = reg;
+       return 0;
+}
+
+/*
+ * Create the persistent keyring for the specified user.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
+                                      struct keyring_index_key *index_key)
+{
+       struct key *persistent;
+       key_ref_t reg_ref, persistent_ref;
+
+       if (!ns->persistent_keyring_register) {
+               long err = key_create_persistent_register(ns);
+               if (err < 0)
+                       return ERR_PTR(err);
+       } else {
+               reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+               persistent_ref = find_key_to_update(reg_ref, index_key);
+               if (persistent_ref)
+                       return persistent_ref;
+       }
+
+       persistent = keyring_alloc(index_key->description,
+                                  uid, INVALID_GID, current_cred(),
+                                  ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                   KEY_USR_VIEW | KEY_USR_READ),
+                                  KEY_ALLOC_NOT_IN_QUOTA,
+                                  ns->persistent_keyring_register);
+       if (IS_ERR(persistent))
+               return ERR_CAST(persistent);
+
+       return make_key_ref(persistent, true);
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
+                              key_ref_t dest_ref)
+{
+       struct keyring_index_key index_key;
+       struct key *persistent;
+       key_ref_t reg_ref, persistent_ref;
+       char buf[32];
+       long ret;
+
+       /* Look in the register if it exists */
+       index_key.type = &key_type_keyring;
+       index_key.description = buf;
+       index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
+
+       if (ns->persistent_keyring_register) {
+               reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+               down_read(&ns->persistent_keyring_register_sem);
+               persistent_ref = find_key_to_update(reg_ref, &index_key);
+               up_read(&ns->persistent_keyring_register_sem);
+
+               if (persistent_ref)
+                       goto found;
+       }
+
+       /* It wasn't in the register, so we'll need to create it.  We might
+        * also need to create the register.
+        */
+       down_write(&ns->persistent_keyring_register_sem);
+       persistent_ref = key_create_persistent(ns, uid, &index_key);
+       up_write(&ns->persistent_keyring_register_sem);
+       if (!IS_ERR(persistent_ref))
+               goto found;
+
+       return PTR_ERR(persistent_ref);
+
+found:
+       ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
+       if (ret == 0) {
+               persistent = key_ref_to_ptr(persistent_ref);
+               ret = key_link(key_ref_to_ptr(dest_ref), persistent);
+               if (ret == 0) {
+                       key_set_timeout(persistent, persistent_keyring_expiry);
+                       ret = persistent->serial;               
+               }
+       }
+
+       key_ref_put(persistent_ref);
+       return ret;
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
+{
+       struct user_namespace *ns = current_user_ns();
+       key_ref_t dest_ref;
+       kuid_t uid;
+       long ret;
+
+       /* -1 indicates the current user */
+       if (_uid == (uid_t)-1) {
+               uid = current_uid();
+       } else {
+               uid = make_kuid(ns, _uid);
+               if (!uid_valid(uid))
+                       return -EINVAL;
+
+               /* You can only see your own persistent cache if you're not
+                * sufficiently privileged.
+                */
+               if (!uid_eq(uid, current_uid()) &&
+                   !uid_eq(uid, current_euid()) &&
+                   !ns_capable(ns, CAP_SETUID))
+                       return -EPERM;
+       }
+
+       /* There must be a destination keyring */
+       dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
+       if (IS_ERR(dest_ref))
+               return PTR_ERR(dest_ref);
+       if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
+               ret = -ENOTDIR;
+               goto out_put_dest;
+       }
+
+       ret = key_get_persistent(ns, uid, dest_ref);
+
+out_put_dest:
+       key_ref_put(dest_ref);
+       return ret;
+}
diff --git a/security/keys/proc.c b/security/keys/proc.c

index 217b6855e815cb851153fa08646d2bf145cee579..88e9a466940f642af60f61b407888155ba057be5 100644 (file)
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
  
  static int proc_keys_show(struct seq_file *m, void *v)
  {
-       const struct cred *cred = current_cred();
         struct rb_node *_p = v;
         struct key *key = rb_entry(_p, struct key, serial_node);
         struct timespec now;
@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
         char xbuf[12];
         int rc;
  
+       struct keyring_search_context ctx = {
+               .index_key.type         = key->type,
+               .index_key.description  = key->description,
+               .cred                   = current_cred(),
+               .match                  = lookup_user_key_possessed,
+               .match_data             = key,
+               .flags                  = (KEYRING_SEARCH_NO_STATE_CHECK |
+                                          KEYRING_SEARCH_LOOKUP_DIRECT),
+       };
+
         key_ref = make_key_ref(key, 0);
  
         /* determine if the key is possessed by this process (a test we can
          * skip if the key does not indicate the possessor can view it
          */
         if (key->perm & KEY_POS_VIEW) {
-               skey_ref = search_my_process_keyrings(key->type, key,
-                                                     lookup_user_key_possessed,
-                                                     true, cred);
+               skey_ref = search_my_process_keyrings(&ctx);
                 if (!IS_ERR(skey_ref)) {
                         key_ref_put(skey_ref);
                         key_ref = make_key_ref(key, 1);
@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
          * - the caller holds a spinlock, and thus the RCU read lock, making our
          *   access to __current_cred() safe
          */
-       rc = key_task_permission(key_ref, cred, KEY_VIEW);
+       rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
         if (rc < 0)
                 return 0;
  
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c

index 42defae1e161632e93b13b8194af1a30a09f2492..0cf8a130a267ca58fbc5599787c93b9913cfc576 100644 (file)
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
                 if (IS_ERR(keyring))
                         return PTR_ERR(keyring);
         } else {
-               atomic_inc(&keyring->usage);
+               __key_get(keyring);
         }
  
         /* install the keyring */
@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
   * In the case of a successful return, the possession attribute is set on the
   * returned key reference.
   */
-key_ref_t search_my_process_keyrings(struct key_type *type,
-                                    const void *description,
-                                    key_match_func_t match,
-                                    bool no_state_check,
-                                    const struct cred *cred)
+key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
  {
         key_ref_t key_ref, ret, err;
  
@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
         err = ERR_PTR(-EAGAIN);
  
         /* search the thread keyring first */
-       if (cred->thread_keyring) {
+       if (ctx->cred->thread_keyring) {
                 key_ref = keyring_search_aux(
-                       make_key_ref(cred->thread_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->thread_keyring, 1), ctx);
                 if (!IS_ERR(key_ref))
                         goto found;
  
@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
         }
  
         /* search the process keyring second */
-       if (cred->process_keyring) {
+       if (ctx->cred->process_keyring) {
                 key_ref = keyring_search_aux(
-                       make_key_ref(cred->process_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->process_keyring, 1), ctx);
                 if (!IS_ERR(key_ref))
                         goto found;
  
@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
         }
  
         /* search the session keyring */
-       if (cred->session_keyring) {
+       if (ctx->cred->session_keyring) {
                 rcu_read_lock();
                 key_ref = keyring_search_aux(
-                       make_key_ref(rcu_dereference(cred->session_keyring), 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
+                       ctx);
                 rcu_read_unlock();
  
                 if (!IS_ERR(key_ref))
@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
                 }
         }
         /* or search the user-session keyring */
-       else if (cred->user->session_keyring) {
+       else if (ctx->cred->user->session_keyring) {
                 key_ref = keyring_search_aux(
-                       make_key_ref(cred->user->session_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->user->session_keyring, 1),
+                       ctx);
                 if (!IS_ERR(key_ref))
                         goto found;
  
@@ -437,18 +431,14 @@ found:
   *
   * Return same as search_my_process_keyrings().
   */
-key_ref_t search_process_keyrings(struct key_type *type,
-                                 const void *description,
-                                 key_match_func_t match,
-                                 const struct cred *cred)
+key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
  {
         struct request_key_auth *rka;
         key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
  
         might_sleep();
  
-       key_ref = search_my_process_keyrings(type, description, match,
-                                            false, cred);
+       key_ref = search_my_process_keyrings(ctx);
         if (!IS_ERR(key_ref))
                 goto found;
         err = key_ref;
@@ -457,18 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
          * search the keyrings of the process mentioned there
          * - we don't permit access to request_key auth keys via this method
          */
-       if (cred->request_key_auth &&
-           cred == current_cred() &&
-           type != &key_type_request_key_auth
+       if (ctx->cred->request_key_auth &&
+           ctx->cred == current_cred() &&
+           ctx->index_key.type != &key_type_request_key_auth
             ) {
+               const struct cred *cred = ctx->cred;
+
                 /* defend against the auth key being revoked */
                 down_read(&cred->request_key_auth->sem);
  
-               if (key_validate(cred->request_key_auth) == 0) {
-                       rka = cred->request_key_auth->payload.data;
+               if (key_validate(ctx->cred->request_key_auth) == 0) {
+                       rka = ctx->cred->request_key_auth->payload.data;
  
-                       key_ref = search_process_keyrings(type, description,
-                                                         match, rka->cred);
+                       ctx->cred = rka->cred;
+                       key_ref = search_process_keyrings(ctx);
+                       ctx->cred = cred;
  
                         up_read(&cred->request_key_auth->sem);
  
@@ -522,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
  key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
                           key_perm_t perm)
  {
+       struct keyring_search_context ctx = {
+               .match  = lookup_user_key_possessed,
+               .flags  = (KEYRING_SEARCH_NO_STATE_CHECK |
+                          KEYRING_SEARCH_LOOKUP_DIRECT),
+       };
         struct request_key_auth *rka;
-       const struct cred *cred;
         struct key *key;
         key_ref_t key_ref, skey_ref;
         int ret;
  
  try_again:
-       cred = get_current_cred();
+       ctx.cred = get_current_cred();
         key_ref = ERR_PTR(-ENOKEY);
  
         switch (id) {
         case KEY_SPEC_THREAD_KEYRING:
-               if (!cred->thread_keyring) {
+               if (!ctx.cred->thread_keyring) {
                         if (!(lflags & KEY_LOOKUP_CREATE))
                                 goto error;
  
@@ -546,13 +543,13 @@ try_again:
                         goto reget_creds;
                 }
  
-               key = cred->thread_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->thread_keyring;
+               __key_get(key);
                 key_ref = make_key_ref(key, 1);
                 break;
  
         case KEY_SPEC_PROCESS_KEYRING:
-               if (!cred->process_keyring) {
+               if (!ctx.cred->process_keyring) {
                         if (!(lflags & KEY_LOOKUP_CREATE))
                                 goto error;
  
@@ -564,13 +561,13 @@ try_again:
                         goto reget_creds;
                 }
  
-               key = cred->process_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->process_keyring;
+               __key_get(key);
                 key_ref = make_key_ref(key, 1);
                 break;
  
         case KEY_SPEC_SESSION_KEYRING:
-               if (!cred->session_keyring) {
+               if (!ctx.cred->session_keyring) {
                         /* always install a session keyring upon access if one
                          * doesn't exist yet */
                         ret = install_user_keyrings();
@@ -580,13 +577,13 @@ try_again:
                                 ret = join_session_keyring(NULL);
                         else
                                 ret = install_session_keyring(
-                                       cred->user->session_keyring);
+                                       ctx.cred->user->session_keyring);
  
                         if (ret < 0)
                                 goto error;
                         goto reget_creds;
-               } else if (cred->session_keyring ==
-                          cred->user->session_keyring &&
+               } else if (ctx.cred->session_keyring ==
+                          ctx.cred->user->session_keyring &&
                            lflags & KEY_LOOKUP_CREATE) {
                         ret = join_session_keyring(NULL);
                         if (ret < 0)
@@ -595,33 +592,33 @@ try_again:
                 }
  
                 rcu_read_lock();
-               key = rcu_dereference(cred->session_keyring);
-               atomic_inc(&key->usage);
+               key = rcu_dereference(ctx.cred->session_keyring);
+               __key_get(key);
                 rcu_read_unlock();
                 key_ref = make_key_ref(key, 1);
                 break;
  
         case KEY_SPEC_USER_KEYRING:
-               if (!cred->user->uid_keyring) {
+               if (!ctx.cred->user->uid_keyring) {
                         ret = install_user_keyrings();
                         if (ret < 0)
                                 goto error;
                 }
  
-               key = cred->user->uid_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->user->uid_keyring;
+               __key_get(key);
                 key_ref = make_key_ref(key, 1);
                 break;
  
         case KEY_SPEC_USER_SESSION_KEYRING:
-               if (!cred->user->session_keyring) {
+               if (!ctx.cred->user->session_keyring) {
                         ret = install_user_keyrings();
                         if (ret < 0)
                                 goto error;
                 }
  
-               key = cred->user->session_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->user->session_keyring;
+               __key_get(key);
                 key_ref = make_key_ref(key, 1);
                 break;
  
@@ -631,29 +628,29 @@ try_again:
                 goto error;
  
         case KEY_SPEC_REQKEY_AUTH_KEY:
-               key = cred->request_key_auth;
+               key = ctx.cred->request_key_auth;
                 if (!key)
                         goto error;
  
-               atomic_inc(&key->usage);
+               __key_get(key);
                 key_ref = make_key_ref(key, 1);
                 break;
  
         case KEY_SPEC_REQUESTOR_KEYRING:
-               if (!cred->request_key_auth)
+               if (!ctx.cred->request_key_auth)
                         goto error;
  
-               down_read(&cred->request_key_auth->sem);
+               down_read(&ctx.cred->request_key_auth->sem);
                 if (test_bit(KEY_FLAG_REVOKED,
-                            &cred->request_key_auth->flags)) {
+                            &ctx.cred->request_key_auth->flags)) {
                         key_ref = ERR_PTR(-EKEYREVOKED);
                         key = NULL;
                 } else {
-                       rka = cred->request_key_auth->payload.data;
+                       rka = ctx.cred->request_key_auth->payload.data;
                         key = rka->dest_keyring;
-                       atomic_inc(&key->usage);
+                       __key_get(key);
                 }
-               up_read(&cred->request_key_auth->sem);
+               up_read(&ctx.cred->request_key_auth->sem);
                 if (!key)
                         goto error;
                 key_ref = make_key_ref(key, 1);
@@ -673,9 +670,13 @@ try_again:
                 key_ref = make_key_ref(key, 0);
  
                 /* check to see if we possess the key */
-               skey_ref = search_process_keyrings(key->type, key,
-                                                  lookup_user_key_possessed,
-                                                  cred);
+               ctx.index_key.type              = key->type;
+               ctx.index_key.description       = key->description;
+               ctx.index_key.desc_len          = strlen(key->description);
+               ctx.match_data                  = key;
+               kdebug("check possessed");
+               skey_ref = search_process_keyrings(&ctx);
+               kdebug("possessed=%p", skey_ref);
  
                 if (!IS_ERR(skey_ref)) {
                         key_put(key);
@@ -715,14 +716,14 @@ try_again:
                 goto invalid_key;
  
         /* check the permissions */
-       ret = key_task_permission(key_ref, cred, perm);
+       ret = key_task_permission(key_ref, ctx.cred, perm);
         if (ret < 0)
                 goto invalid_key;
  
         key->last_used_at = current_kernel_time().tv_sec;
  
  error:
-       put_cred(cred);
+       put_cred(ctx.cred);
         return key_ref;
  
  invalid_key:
@@ -733,7 +734,7 @@ invalid_key:
         /* if we attempted to install a keyring, then it may have caused new
          * creds to be installed */
  reget_creds:
-       put_cred(cred);
+       put_cred(ctx.cred);
         goto try_again;
  }
  
@@ -856,3 +857,13 @@ void key_change_session_keyring(struct callback_head *twork)
  
         commit_creds(new);
  }
+
+/*
+ * Make sure that root's user and user-session keyrings exist.
+ */
+static int __init init_root_keyring(void)
+{
+       return install_user_keyrings();
+}
+
+late_initcall(init_root_keyring);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c

index c411f9bb156b205751ae06983e85f547119a245f..381411941cc1abbc48a699b1c2c38042f1a05711 100644 (file)
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -345,33 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
   * May return a key that's already under construction instead if there was a
   * race between two thread calling request_key().
   */
-static int construct_alloc_key(struct key_type *type,
-                              const char *description,
+static int construct_alloc_key(struct keyring_search_context *ctx,
                                struct key *dest_keyring,
                                unsigned long flags,
                                struct key_user *user,
                                struct key **_key)
  {
-       const struct cred *cred = current_cred();
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
         struct key *key;
         key_perm_t perm;
         key_ref_t key_ref;
         int ret;
  
-       kenter("%s,%s,,,", type->name, description);
+       kenter("%s,%s,,,",
+              ctx->index_key.type->name, ctx->index_key.description);
  
         *_key = NULL;
         mutex_lock(&user->cons_lock);
  
         perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
         perm |= KEY_USR_VIEW;
-       if (type->read)
+       if (ctx->index_key.type->read)
                 perm |= KEY_POS_READ;
-       if (type == &key_type_keyring || type->update)
+       if (ctx->index_key.type == &key_type_keyring ||
+           ctx->index_key.type->update)
                 perm |= KEY_POS_WRITE;
  
-       key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
+       key = key_alloc(ctx->index_key.type, ctx->index_key.description,
+                       ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
                         perm, flags);
         if (IS_ERR(key))
                 goto alloc_failed;
@@ -379,8 +380,7 @@ static int construct_alloc_key(struct key_type *type,
         set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
  
         if (dest_keyring) {
-               ret = __key_link_begin(dest_keyring, type, description,
-                                      &prealloc);
+               ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
                 if (ret < 0)
                         goto link_prealloc_failed;
         }
@@ -390,16 +390,16 @@ static int construct_alloc_key(struct key_type *type,
          * waited for locks */
         mutex_lock(&key_construction_mutex);
  
-       key_ref = search_process_keyrings(type, description, type->match, cred);
+       key_ref = search_process_keyrings(ctx);
         if (!IS_ERR(key_ref))
                 goto key_already_present;
  
         if (dest_keyring)
-               __key_link(dest_keyring, key, &prealloc);
+               __key_link(key, &edit);
  
         mutex_unlock(&key_construction_mutex);
         if (dest_keyring)
-               __key_link_end(dest_keyring, type, prealloc);
+               __key_link_end(dest_keyring, &ctx->index_key, edit);
         mutex_unlock(&user->cons_lock);
         *_key = key;
         kleave(" = 0 [%d]", key_serial(key));
@@ -414,8 +414,8 @@ key_already_present:
         if (dest_keyring) {
                 ret = __key_link_check_live_key(dest_keyring, key);
                 if (ret == 0)
-                       __key_link(dest_keyring, key, &prealloc);
-               __key_link_end(dest_keyring, type, prealloc);
+                       __key_link(key, &edit);
+               __key_link_end(dest_keyring, &ctx->index_key, edit);
                 if (ret < 0)
                         goto link_check_failed;
         }
@@ -444,8 +444,7 @@ alloc_failed:
  /*
   * Commence key construction.
   */
-static struct key *construct_key_and_link(struct key_type *type,
-                                         const char *description,
+static struct key *construct_key_and_link(struct keyring_search_context *ctx,
                                           const char *callout_info,
                                           size_t callout_len,
                                           void *aux,
@@ -464,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
  
         construct_get_dest_keyring(&dest_keyring);
  
-       ret = construct_alloc_key(type, description, dest_keyring, flags, user,
-                                 &key);
+       ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
         key_user_put(user);
  
         if (ret == 0) {
@@ -529,17 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
                                  struct key *dest_keyring,
                                  unsigned long flags)
  {
-       const struct cred *cred = current_cred();
+       struct keyring_search_context ctx = {
+               .index_key.type         = type,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = type->match,
+               .match_data             = description,
+               .flags                  = KEYRING_SEARCH_LOOKUP_DIRECT,
+       };
         struct key *key;
         key_ref_t key_ref;
         int ret;
  
         kenter("%s,%s,%p,%zu,%p,%p,%lx",
-              type->name, description, callout_info, callout_len, aux,
-              dest_keyring, flags);
+              ctx.index_key.type->name, ctx.index_key.description,
+              callout_info, callout_len, aux, dest_keyring, flags);
  
         /* search all the process keyrings for a key */
-       key_ref = search_process_keyrings(type, description, type->match, cred);
+       key_ref = search_process_keyrings(&ctx);
  
         if (!IS_ERR(key_ref)) {
                 key = key_ref_to_ptr(key_ref);
@@ -562,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
                 if (!callout_info)
                         goto error;
  
-               key = construct_key_and_link(type, description, callout_info,
-                                            callout_len, aux, dest_keyring,
-                                            flags);
+               key = construct_key_and_link(&ctx, callout_info, callout_len,
+                                            aux, dest_keyring, flags);
         }
  
  error:
@@ -592,8 +596,10 @@ int wait_for_key_construction(struct key *key, bool intr)
                           intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
         if (ret < 0)
                 return ret;
-       if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+       if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+               smp_rmb();
                 return key->type_data.reject_error;
+       }
         return key_validate(key);
  }
  EXPORT_SYMBOL(wait_for_key_construction);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c

index 85730d5a5a59a05c852b3d22c586778b117589fa..7495a93b4b9024dad78d526d17feb7d07f93016f 100644 (file)
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -18,6 +18,7 @@
  #include <linux/slab.h>
  #include <asm/uaccess.h>
  #include "internal.h"
+#include <keys/user-type.h>
  
  static int request_key_auth_instantiate(struct key *,
                                         struct key_preparsed_payload *);
@@ -221,33 +222,27 @@ error_alloc:
         return ERR_PTR(ret);
  }
  
-/*
- * See if an authorisation key is associated with a particular key.
- */
-static int key_get_instantiation_authkey_match(const struct key *key,
-                                              const void *_id)
-{
-       struct request_key_auth *rka = key->payload.data;
-       key_serial_t id = (key_serial_t)(unsigned long) _id;
-
-       return rka->target_key->serial == id;
-}
-
  /*
   * Search the current process's keyrings for the authorisation key for
   * instantiation of a key.
   */
  struct key *key_get_instantiation_authkey(key_serial_t target_id)
  {
-       const struct cred *cred = current_cred();
+       char description[16];
+       struct keyring_search_context ctx = {
+               .index_key.type         = &key_type_request_key_auth,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = user_match,
+               .match_data             = description,
+               .flags                  = KEYRING_SEARCH_LOOKUP_DIRECT,
+       };
         struct key *authkey;
         key_ref_t authkey_ref;
  
-       authkey_ref = search_process_keyrings(
-               &key_type_request_key_auth,
-               (void *) (unsigned long) target_id,
-               key_get_instantiation_authkey_match,
-               cred);
+       sprintf(description, "%x", target_id);
+
+       authkey_ref = search_process_keyrings(&ctx);
  
         if (IS_ERR(authkey_ref)) {
                 authkey = ERR_CAST(authkey_ref);
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c

index ee32d181764ab876fa2c6b0470c4a65f937cd031..8c0af08760c809b2923d04c5cc3b114c75e27b27 100644 (file)
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
                 .extra1 = (void *) &zero,
                 .extra2 = (void *) &max,
         },
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       {
+               .procname = "persistent_keyring_expiry",
+               .data = &persistent_keyring_expiry,
+               .maxlen = sizeof(unsigned),
+               .mode = 0644,
+               .proc_handler = proc_dointvec_minmax,
+               .extra1 = (void *) &zero,
+               .extra2 = (void *) &max,
+       },
+#endif
         { }
  };
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c

index 55dc88939185812f70145427b96c991ed6d636e2..faa2caeb593f8524a059e79d58e09bf430a1f992 100644 (file)
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
   * arbitrary blob of data as the payload
   */
  struct key_type key_type_user = {
-       .name           = "user",
-       .instantiate    = user_instantiate,
-       .update         = user_update,
-       .match          = user_match,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
+       .name                   = "user",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
+       .instantiate            = user_instantiate,
+       .update                 = user_update,
+       .match                  = user_match,
+       .revoke                 = user_revoke,
+       .destroy                = user_destroy,
+       .describe               = user_describe,
+       .read                   = user_read,
  };
  
  EXPORT_SYMBOL_GPL(key_type_user);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
   */
  struct key_type key_type_logon = {
         .name                   = "logon",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
         .instantiate            = user_instantiate,
         .update                 = user_update,
         .match                  = user_match,
diff --git a/security/lsm_audit.c b/security/lsm_audit.c

index 234bc2ab450c61b42b1db2b53f631ab72bc48a39..9a62045e6282467493567a52f546d1e8d269bcd6 100644 (file)
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -397,7 +397,8 @@ void common_lsm_audit(struct common_audit_data *a,
         if (a == NULL)
                 return;
         /* we use GFP_ATOMIC so we won't sleep */
-       ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
+       ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN,
+                            AUDIT_AVC);
  
         if (ab == NULL)
                 return;
diff --git a/security/security.c b/security/security.c

index 4dc31f4f2700626cb951aed5e874f0ce18d8b064..15b6928592ef68aac565e3fc94daf4737b6adc54 100644 (file)
--- a/security/security.c
+++ b/security/security.c
@@ -1340,22 +1340,17 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
         return security_ops->xfrm_policy_delete_security(ctx);
  }
  
-int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+int security_xfrm_state_alloc(struct xfrm_state *x,
+                             struct xfrm_user_sec_ctx *sec_ctx)
  {
-       return security_ops->xfrm_state_alloc_security(x, sec_ctx, 0);
+       return security_ops->xfrm_state_alloc(x, sec_ctx);
  }
  EXPORT_SYMBOL(security_xfrm_state_alloc);
  
  int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
                                       struct xfrm_sec_ctx *polsec, u32 secid)
  {
-       if (!polsec)
-               return 0;
-       /*
-        * We want the context to be taken from secid which is usually
-        * from the sock.
-        */
-       return security_ops->xfrm_state_alloc_security(x, NULL, secid);
+       return security_ops->xfrm_state_alloc_acquire(x, polsec, secid);
  }
  
  int security_xfrm_state_delete(struct xfrm_state *x)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index c540795fb3f2647619cb4705281872e93592e21e..794c3ca49eac92998caa17be71a4bdc472c2e9c8 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -95,7 +95,9 @@
  #include "audit.h"
  #include "avc_ss.h"
  
-#define NUM_SEL_MNT_OPTS 5
+#define SB_TYPE_FMT "%s%s%s"
+#define SB_SUBTYPE(sb) (sb->s_subtype && sb->s_subtype[0])
+#define SB_TYPE_ARGS(sb) sb->s_type->name, SB_SUBTYPE(sb) ? "." : "", SB_SUBTYPE(sb) ? sb->s_subtype : ""
  
  extern struct security_operations *security_ops;
  
@@ -139,12 +141,28 @@ static struct kmem_cache *sel_inode_cache;
   * This function checks the SECMARK reference counter to see if any SECMARK
   * targets are currently configured, if the reference counter is greater than
   * zero SECMARK is considered to be enabled.  Returns true (1) if SECMARK is
- * enabled, false (0) if SECMARK is disabled.
+ * enabled, false (0) if SECMARK is disabled.  If the always_check_network
+ * policy capability is enabled, SECMARK is always considered enabled.
   *
   */
  static int selinux_secmark_enabled(void)
  {
-       return (atomic_read(&selinux_secmark_refcount) > 0);
+       return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount));
+}
+
+/**
+ * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled
+ *
+ * Description:
+ * This function checks if NetLabel or labeled IPSEC is enabled.  Returns true
+ * (1) if any are enabled or false (0) if neither are enabled.  If the
+ * always_check_network policy capability is enabled, peer labeling
+ * is always considered enabled.
+ *
+ */
+static int selinux_peerlbl_enabled(void)
+{
+       return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled());
  }
  
  /*
@@ -309,8 +327,11 @@ enum {
         Opt_defcontext = 3,
         Opt_rootcontext = 4,
         Opt_labelsupport = 5,
+       Opt_nextmntopt = 6,
  };
  
+#define NUM_SEL_MNT_OPTS       (Opt_nextmntopt - 1)
+
  static const match_table_t tokens = {
         {Opt_context, CONTEXT_STR "%s"},
         {Opt_fscontext, FSCONTEXT_STR "%s"},
@@ -355,6 +376,29 @@ static int may_context_mount_inode_relabel(u32 sid,
         return rc;
  }
  
+static int selinux_is_sblabel_mnt(struct super_block *sb)
+{
+       struct superblock_security_struct *sbsec = sb->s_security;
+
+       if (sbsec->behavior == SECURITY_FS_USE_XATTR ||
+           sbsec->behavior == SECURITY_FS_USE_TRANS ||
+           sbsec->behavior == SECURITY_FS_USE_TASK)
+               return 1;
+
+       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
+       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+               return 1;
+
+       /*
+        * Special handling for rootfs. Is genfs but supports
+        * setting SELinux context on in-core inodes.
+        */
+       if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0)
+               return 1;
+
+       return 0;
+}
+
  static int sb_finish_set_opts(struct super_block *sb)
  {
         struct superblock_security_struct *sbsec = sb->s_security;
@@ -369,8 +413,8 @@ static int sb_finish_set_opts(struct super_block *sb)
                    the first boot of the SELinux kernel before we have
                    assigned xattr values to the filesystem. */
                 if (!root_inode->i_op->getxattr) {
-                       printk(KERN_WARNING "SELinux: (dev %s, type %s) has no "
-                              "xattr support\n", sb->s_id, sb->s_type->name);
+                       printk(KERN_WARNING "SELinux: (dev %s, type "SB_TYPE_FMT") has no "
+                              "xattr support\n", sb->s_id, SB_TYPE_ARGS(sb));
                         rc = -EOPNOTSUPP;
                         goto out;
                 }
@@ -378,35 +422,27 @@ static int sb_finish_set_opts(struct super_block *sb)
                 if (rc < 0 && rc != -ENODATA) {
                         if (rc == -EOPNOTSUPP)
                                 printk(KERN_WARNING "SELinux: (dev %s, type "
-                                      "%s) has no security xattr handler\n",
-                                      sb->s_id, sb->s_type->name);
+                                      SB_TYPE_FMT") has no security xattr handler\n",
+                                      sb->s_id, SB_TYPE_ARGS(sb));
                         else
                                 printk(KERN_WARNING "SELinux: (dev %s, type "
-                                      "%s) getxattr errno %d\n", sb->s_id,
-                                      sb->s_type->name, -rc);
+                                      SB_TYPE_FMT") getxattr errno %d\n", sb->s_id,
+                                      SB_TYPE_ARGS(sb), -rc);
                         goto out;
                 }
         }
  
-       sbsec->flags |= (SE_SBINITIALIZED | SE_SBLABELSUPP);
-
         if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
-               printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n",
-                      sb->s_id, sb->s_type->name);
+               printk(KERN_ERR "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), unknown behavior\n",
+                      sb->s_id, SB_TYPE_ARGS(sb));
         else
-               printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n",
-                      sb->s_id, sb->s_type->name,
+               printk(KERN_DEBUG "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), %s\n",
+                      sb->s_id, SB_TYPE_ARGS(sb),
                        labeling_behaviors[sbsec->behavior-1]);
  
-       if (sbsec->behavior == SECURITY_FS_USE_GENFS ||
-           sbsec->behavior == SECURITY_FS_USE_MNTPOINT ||
-           sbsec->behavior == SECURITY_FS_USE_NONE ||
-           sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
-               sbsec->flags &= ~SE_SBLABELSUPP;
-
-       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
-       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
-               sbsec->flags |= SE_SBLABELSUPP;
+       sbsec->flags |= SE_SBINITIALIZED;
+       if (selinux_is_sblabel_mnt(sb))
+               sbsec->flags |= SBLABEL_MNT;
  
         /* Initialize the root inode. */
         rc = inode_doinit_with_dentry(root_inode, root);
@@ -460,15 +496,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
         if (!ss_initialized)
                 return -EINVAL;
  
+       /* make sure we always check enough bits to cover the mask */
+       BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS));
+
         tmp = sbsec->flags & SE_MNTMASK;
         /* count the number of mount options for this sb */
-       for (i = 0; i < 8; i++) {
+       for (i = 0; i < NUM_SEL_MNT_OPTS; i++) {
                 if (tmp & 0x01)
                         opts->num_mnt_opts++;
                 tmp >>= 1;
         }
         /* Check if the Label support flag is set */
-       if (sbsec->flags & SE_SBLABELSUPP)
+       if (sbsec->flags & SBLABEL_MNT)
                 opts->num_mnt_opts++;
  
         opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC);
@@ -515,9 +554,9 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
                 opts->mnt_opts[i] = context;
                 opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT;
         }
-       if (sbsec->flags & SE_SBLABELSUPP) {
+       if (sbsec->flags & SBLABEL_MNT) {
                 opts->mnt_opts[i] = NULL;
-               opts->mnt_opts_flags[i++] = SE_SBLABELSUPP;
+               opts->mnt_opts_flags[i++] = SBLABEL_MNT;
         }
  
         BUG_ON(i != opts->num_mnt_opts);
@@ -561,7 +600,6 @@ static int selinux_set_mnt_opts(struct super_block *sb,
         const struct cred *cred = current_cred();
         int rc = 0, i;
         struct superblock_security_struct *sbsec = sb->s_security;
-       const char *name = sb->s_type->name;
         struct inode *inode = sbsec->sb->s_root->d_inode;
         struct inode_security_struct *root_isec = inode->i_security;
         u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
@@ -614,14 +652,14 @@ static int selinux_set_mnt_opts(struct super_block *sb,
         for (i = 0; i < num_opts; i++) {
                 u32 sid;
  
-               if (flags[i] == SE_SBLABELSUPP)
+               if (flags[i] == SBLABEL_MNT)
                         continue;
                 rc = security_context_to_sid(mount_options[i],
                                              strlen(mount_options[i]), &sid);
                 if (rc) {
                         printk(KERN_WARNING "SELinux: security_context_to_sid"
-                              "(%s) failed for (dev %s, type %s) errno=%d\n",
-                              mount_options[i], sb->s_id, name, rc);
+                              "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+                              mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
                         goto out;
                 }
                 switch (flags[i]) {
@@ -685,9 +723,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
                  * Determine the labeling behavior to use for this
                  * filesystem type.
                  */
-               rc = security_fs_use((sbsec->flags & SE_SBPROC) ?
-                                       "proc" : sb->s_type->name,
-                                       &sbsec->behavior, &sbsec->sid);
+               rc = security_fs_use(sb);
                 if (rc) {
                         printk(KERN_WARNING
                                 "%s: security_fs_use(%s) returned %d\n",
@@ -770,7 +806,8 @@ out:
  out_double_mount:
         rc = -EINVAL;
         printk(KERN_WARNING "SELinux: mount invalid.  Same superblock, different "
-              "security settings for (dev %s, type %s)\n", sb->s_id, name);
+              "security settings for (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+              SB_TYPE_ARGS(sb));
         goto out;
  }
  
@@ -1037,7 +1074,7 @@ static void selinux_write_opts(struct seq_file *m,
                 case DEFCONTEXT_MNT:
                         prefix = DEFCONTEXT_STR;
                         break;
-               case SE_SBLABELSUPP:
+               case SBLABEL_MNT:
                         seq_putc(m, ',');
                         seq_puts(m, LABELSUPP_STR);
                         continue;
@@ -1649,7 +1686,7 @@ static int may_create(struct inode *dir,
         if (rc)
                 return rc;
  
-       if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+       if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
                 rc = security_transition_sid(sid, dsec->sid, tclass,
                                              &dentry->d_name, &newsid);
                 if (rc)
@@ -2437,14 +2474,14 @@ static int selinux_sb_remount(struct super_block *sb, void *data)
                 u32 sid;
                 size_t len;
  
-               if (flags[i] == SE_SBLABELSUPP)
+               if (flags[i] == SBLABEL_MNT)
                         continue;
                 len = strlen(mount_options[i]);
                 rc = security_context_to_sid(mount_options[i], len, &sid);
                 if (rc) {
                         printk(KERN_WARNING "SELinux: security_context_to_sid"
-                              "(%s) failed for (dev %s, type %s) errno=%d\n",
-                              mount_options[i], sb->s_id, sb->s_type->name, rc);
+                              "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+                              mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
                         goto out_free_opts;
                 }
                 rc = -EINVAL;
@@ -2482,8 +2519,8 @@ out_free_secdata:
         return rc;
  out_bad_option:
         printk(KERN_WARNING "SELinux: unable to change security options "
-              "during remount (dev %s, type=%s)\n", sb->s_id,
-              sb->s_type->name);
+              "during remount (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+              SB_TYPE_ARGS(sb));
         goto out_free_opts;
  }
  
@@ -2606,7 +2643,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
         if ((sbsec->flags & SE_SBINITIALIZED) &&
             (sbsec->behavior == SECURITY_FS_USE_MNTPOINT))
                 newsid = sbsec->mntpoint_sid;
-       else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+       else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
                 rc = security_transition_sid(sid, dsec->sid,
                                              inode_mode_to_security_class(inode->i_mode),
                                              qstr, &newsid);
@@ -2628,7 +2665,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
                 isec->initialized = 1;
         }
  
-       if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP))
+       if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT))
                 return -EOPNOTSUPP;
  
         if (name)
@@ -2830,7 +2867,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
                 return selinux_inode_setotherxattr(dentry, name);
  
         sbsec = inode->i_sb->s_security;
-       if (!(sbsec->flags & SE_SBLABELSUPP))
+       if (!(sbsec->flags & SBLABEL_MNT))
                 return -EOPNOTSUPP;
  
         if (!inode_owner_or_capable(inode))
@@ -3791,8 +3828,12 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
         u32 nlbl_sid;
         u32 nlbl_type;
  
-       selinux_skb_xfrm_sid(skb, &xfrm_sid);
-       selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+       err = selinux_skb_xfrm_sid(skb, &xfrm_sid);
+       if (unlikely(err))
+               return -EACCES;
+       err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+       if (unlikely(err))
+               return -EACCES;
  
         err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid);
         if (unlikely(err)) {
@@ -4246,7 +4287,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
                 return selinux_sock_rcv_skb_compat(sk, skb, family);
  
         secmark_active = selinux_secmark_enabled();
-       peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
         if (!secmark_active && !peerlbl_active)
                 return 0;
  
@@ -4628,7 +4669,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
  
         secmark_active = selinux_secmark_enabled();
         netlbl_active = netlbl_enabled();
-       peerlbl_active = netlbl_active || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
         if (!secmark_active && !peerlbl_active)
                 return NF_ACCEPT;
  
@@ -4780,7 +4821,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
                 return NF_ACCEPT;
  #endif
         secmark_active = selinux_secmark_enabled();
-       peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
         if (!secmark_active && !peerlbl_active)
                 return NF_ACCEPT;
  
@@ -5784,7 +5825,8 @@ static struct security_operations selinux_ops = {
         .xfrm_policy_clone_security =   selinux_xfrm_policy_clone,
         .xfrm_policy_free_security =    selinux_xfrm_policy_free,
         .xfrm_policy_delete_security =  selinux_xfrm_policy_delete,
-       .xfrm_state_alloc_security =    selinux_xfrm_state_alloc,
+       .xfrm_state_alloc =             selinux_xfrm_state_alloc,
+       .xfrm_state_alloc_acquire =     selinux_xfrm_state_alloc_acquire,
         .xfrm_state_free_security =     selinux_xfrm_state_free,
         .xfrm_state_delete_security =   selinux_xfrm_state_delete,
         .xfrm_policy_lookup =           selinux_xfrm_policy_lookup,
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h

index aa47bcabb5f65e728aadbaa39cdecfa55d20aa16..b1dfe104945078ead53647c247c46aa6134fac2e 100644 (file)
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -58,8 +58,8 @@ struct superblock_security_struct {
         u32 sid;                        /* SID of file system superblock */
         u32 def_sid;                    /* default SID for labeling */
         u32 mntpoint_sid;               /* SECURITY_FS_USE_MNTPOINT context for files */
-       unsigned int behavior;          /* labeling behavior */
-       unsigned char flags;            /* which mount options were specified */
+       unsigned short behavior;        /* labeling behavior */
+       unsigned short flags;           /* which mount options were specified */
         struct mutex lock;
         struct list_head isec_head;
         spinlock_t isec_lock;
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h

index 8fd8e18ea34019c863d91ba88268b8c4018f3410..fe341ae370049b39ac2012d665a64dd4dc9af198 100644 (file)
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -45,14 +45,15 @@
  /* Mask for just the mount related flags */
  #define SE_MNTMASK     0x0f
  /* Super block security struct flags for mount options */
+/* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */
  #define CONTEXT_MNT    0x01
  #define FSCONTEXT_MNT  0x02
  #define ROOTCONTEXT_MNT        0x04
  #define DEFCONTEXT_MNT 0x08
+#define SBLABEL_MNT    0x10
  /* Non-mount related flags */
-#define SE_SBINITIALIZED       0x10
-#define SE_SBPROC              0x20
-#define SE_SBLABELSUPP 0x40
+#define SE_SBINITIALIZED       0x0100
+#define SE_SBPROC              0x0200
  
  #define CONTEXT_STR    "context="
  #define FSCONTEXT_STR  "fscontext="
@@ -68,12 +69,15 @@ extern int selinux_enabled;
  enum {
         POLICYDB_CAPABILITY_NETPEER,
         POLICYDB_CAPABILITY_OPENPERM,
+       POLICYDB_CAPABILITY_REDHAT1,
+       POLICYDB_CAPABILITY_ALWAYSNETWORK,
         __POLICYDB_CAPABILITY_MAX
  };
  #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
  
  extern int selinux_policycap_netpeer;
  extern int selinux_policycap_openperm;
+extern int selinux_policycap_alwaysnetwork;
  
  /*
   * type_datum properties
@@ -172,8 +176,7 @@ int security_get_allow_unknown(void);
  #define SECURITY_FS_USE_NATIVE         7 /* use native label support */
  #define SECURITY_FS_USE_MAX            7 /* Highest SECURITY_FS_USE_XXX */
  
-int security_fs_use(const char *fstype, unsigned int *behavior,
-       u32 *sid);
+int security_fs_use(struct super_block *sb);
  
  int security_genfs_sid(const char *fstype, char *name, u16 sclass,
         u32 *sid);
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h

index 6713f04e30ba8810415f88f7ed6e78cb5685f6f4..0dec76c64cf53853d0eea6aac983db307c8636b8 100644 (file)
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -10,29 +10,21 @@
  #include <net/flow.h>
  
  int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
-                             struct xfrm_user_sec_ctx *sec_ctx);
+                             struct xfrm_user_sec_ctx *uctx);
  int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                               struct xfrm_sec_ctx **new_ctxp);
  void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx);
  int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx);
  int selinux_xfrm_state_alloc(struct xfrm_state *x,
-       struct xfrm_user_sec_ctx *sec_ctx, u32 secid);
+                            struct xfrm_user_sec_ctx *uctx);
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                    struct xfrm_sec_ctx *polsec, u32 secid);
  void selinux_xfrm_state_free(struct xfrm_state *x);
  int selinux_xfrm_state_delete(struct xfrm_state *x);
  int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
  int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
-                       struct xfrm_policy *xp, const struct flowi *fl);
-
-/*
- * Extract the security blob from the sock (it's actually on the socket)
- */
-static inline struct inode_security_struct *get_sock_isec(struct sock *sk)
-{
-       if (!sk->sk_socket)
-               return NULL;
-
-       return SOCK_INODE(sk->sk_socket)->i_security;
-}
+                                     struct xfrm_policy *xp,
+                                     const struct flowi *fl);
  
  #ifdef CONFIG_SECURITY_NETWORK_XFRM
  extern atomic_t selinux_xfrm_refcount;
@@ -42,10 +34,10 @@ static inline int selinux_xfrm_enabled(void)
         return (atomic_read(&selinux_xfrm_refcount) > 0);
  }
  
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
-                       struct common_audit_data *ad);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad, u8 proto);
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                             struct common_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                               struct common_audit_data *ad, u8 proto);
  int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
  
  static inline void selinux_xfrm_notify_policyload(void)
@@ -64,19 +56,21 @@ static inline int selinux_xfrm_enabled(void)
         return 0;
  }
  
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad)
+static inline int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                                           struct common_audit_data *ad)
  {
         return 0;
  }
  
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad, u8 proto)
+static inline int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                                             struct common_audit_data *ad,
+                                             u8 proto)
  {
         return 0;
  }
  
-static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid,
+                                             int ckall)
  {
         *sid = SECSID_NULL;
         return 0;
@@ -87,10 +81,9 @@ static inline void selinux_xfrm_notify_policyload(void)
  }
  #endif
  
-static inline void selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
+static inline int selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
  {
-       int err = selinux_xfrm_decode_session(skb, sid, 0);
-       BUG_ON(err);
+       return selinux_xfrm_decode_session(skb, sid, 0);
  }
  
  #endif /* _SELINUX_XFRM_H_ */
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c

index da4b8b2332802c9624f2f7f49ea8d622f96e180a..6235d052338b2e63b838711ed09c7ba1b04c67c6 100644 (file)
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -442,8 +442,7 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
             sksec->nlbl_state != NLBL_CONNLABELED)
                 return 0;
  
-       local_bh_disable();
-       bh_lock_sock_nested(sk);
+       lock_sock(sk);
  
         /* connected sockets are allowed to disconnect when the address family
          * is set to AF_UNSPEC, if that is what is happening we want to reset
@@ -464,7 +463,6 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
                 sksec->nlbl_state = NLBL_CONNLABELED;
  
  socket_connect_return:
-       bh_unlock_sock(sk);
-       local_bh_enable();
+       release_sock(sk);
         return rc;
  }
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c

index c5454c0477c346e4d814f5ff209feba86e5b86ad..03a72c32afd738ccad5c188bbe853202c32f53f6 100644 (file)
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -166,6 +166,7 @@ static void sel_netnode_insert(struct sel_netnode *node)
                 break;
         default:
                 BUG();
+               return;
         }
  
         /* we need to impose a limit on the growth of the hash table so check
@@ -225,6 +226,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
                 break;
         default:
                 BUG();
+               ret = -EINVAL;
         }
         if (ret != 0)
                 goto out;
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c

index 855e464e92efb9916535957ed53a3c9df2c1a33f..332ac8a80cf5b62c77bff350f6a92698d76a8e0f 100644 (file)
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -116,6 +116,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] =
         { AUDIT_MAKE_EQUIV,     NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
         { AUDIT_TTY_GET,        NETLINK_AUDIT_SOCKET__NLMSG_READ     },
         { AUDIT_TTY_SET,        NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT   },
+       { AUDIT_GET_FEATURE,    NETLINK_AUDIT_SOCKET__NLMSG_READ     },
+       { AUDIT_SET_FEATURE,    NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
  };
  
  
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c

index ff427733c2903cab275a05da0887478850e1e374..5122affe06a8840e193150d62bd9b2f996fe67fe 100644 (file)
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -44,7 +44,9 @@
  /* Policy capability filenames */
  static char *policycap_names[] = {
         "network_peer_controls",
-       "open_perms"
+       "open_perms",
+       "redhat1",
+       "always_check_network"
  };
  
  unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c

index 30f119b1d1ec36a95dc456c52b5b0ac1a6868514..820313a04d49bf4c4a8bc0f04ea01514ff184a64 100644 (file)
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -213,7 +213,12 @@ netlbl_import_failure:
  }
  #endif /* CONFIG_NETLABEL */
  
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
+/*
+ * Check to see if all the bits set in e2 are also set in e1. Optionally,
+ * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed
+ * last_e2bit.
+ */
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit)
  {
         struct ebitmap_node *n1, *n2;
         int i;
@@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
  
         n1 = e1->node;
         n2 = e2->node;
+
         while (n1 && n2 && (n1->startbit <= n2->startbit)) {
                 if (n1->startbit < n2->startbit) {
                         n1 = n1->next;
                         continue;
                 }
-               for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
+               for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; )
+                       i--;    /* Skip trailing NULL map entries */
+               if (last_e2bit && (i >= 0)) {
+                       u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE +
+                                        __fls(n2->maps[i]);
+                       if (lastsetbit > last_e2bit)
+                               return 0;
+               }
+
+               while (i >= 0) {
                         if ((n1->maps[i] & n2->maps[i]) != n2->maps[i])
                                 return 0;
+                       i--;
                 }
  
                 n1 = n1->next;
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h

index 922f8afa89dd5837e2617daaf793db0e40ad009e..712c8a7b8e8b879d3835b5ee3650b66baa46e106 100644 (file)
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -16,7 +16,13 @@
  
  #include <net/netlabel.h>
  
-#define EBITMAP_UNIT_NUMS      ((32 - sizeof(void *) - sizeof(u32))    \
+#ifdef CONFIG_64BIT
+#define        EBITMAP_NODE_SIZE       64
+#else
+#define        EBITMAP_NODE_SIZE       32
+#endif
+
+#define EBITMAP_UNIT_NUMS      ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\
                                         / sizeof(unsigned long))
  #define EBITMAP_UNIT_SIZE      BITS_PER_LONG
  #define EBITMAP_SIZE           (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE)
@@ -117,7 +123,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
  
  int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
  int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit);
  int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
  int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
  void ebitmap_destroy(struct ebitmap *e);
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c

index 40de8d3f208ecf95db162f4ae355d0d53ba99265..c85bc1ec040c0c58f93772004361cbcd04861575 100644 (file)
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context,
  int mls_level_isvalid(struct policydb *p, struct mls_level *l)
  {
         struct level_datum *levdatum;
-       struct ebitmap_node *node;
-       int i;
  
         if (!l->sens || l->sens > p->p_levels.nprim)
                 return 0;
@@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l)
         if (!levdatum)
                 return 0;
  
-       ebitmap_for_each_positive_bit(&l->cat, node, i) {
-               if (i > p->p_cats.nprim)
-                       return 0;
-               if (!ebitmap_get_bit(&levdatum->level->cat, i)) {
-                       /*
-                        * Category may not be associated with
-                        * sensitivity.
-                        */
-                       return 0;
-               }
-       }
-
-       return 1;
+       /*
+        * Return 1 iff all the bits set in l->cat are also be set in
+        * levdatum->level->cat and no bit in l->cat is larger than
+        * p->p_cats.nprim.
+        */
+       return ebitmap_contains(&levdatum->level->cat, &l->cat,
+                               p->p_cats.nprim);
  }
  
  int mls_range_isvalid(struct policydb *p, struct mls_range *r)
diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h

index 03bed52a80526abfbda766a33859595cc1d8bfa5..e93648774137c601f5ec90ce14a03983655ce36d 100644 (file)
--- a/security/selinux/ss/mls_types.h
+++ b/security/selinux/ss/mls_types.h
@@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2)
  static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2)
  {
         return ((l1->sens >= l2->sens) &&
-               ebitmap_contains(&l1->cat, &l2->cat));
+               ebitmap_contains(&l1->cat, &l2->cat, 0));
  }
  
  #define mls_level_incomp(l1, l2) \
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c

index c8adde3aff8fdbe93fb2f867e55f71b9879685a5..f6195ebde3c94eef0cdf1cf92933246069b25059 100644 (file)
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -3203,9 +3203,8 @@ static int range_write_helper(void *key, void *data, void *ptr)
  
  static int range_write(struct policydb *p, void *fp)
  {
-       size_t nel;
         __le32 buf[1];
-       int rc;
+       int rc, nel;
         struct policy_data pd;
  
         pd.p = p;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c

index b4feecc3fe0110d10bbdc183c369a03ab8495a6c..ee470a0b5c27fdad95a59b258792b6182435b999 100644 (file)
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -72,6 +72,7 @@
  
  int selinux_policycap_netpeer;
  int selinux_policycap_openperm;
+int selinux_policycap_alwaysnetwork;
  
  static DEFINE_RWLOCK(policy_rwlock);
  
@@ -1812,6 +1813,8 @@ static void security_load_policycaps(void)
                                                   POLICYDB_CAPABILITY_NETPEER);
         selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps,
                                                   POLICYDB_CAPABILITY_OPENPERM);
+       selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps,
+                                                 POLICYDB_CAPABILITY_ALWAYSNETWORK);
  }
  
  static int security_preserve_bools(struct policydb *p);
@@ -2323,43 +2326,74 @@ out:
  
  /**
   * security_fs_use - Determine how to handle labeling for a filesystem.
- * @fstype: filesystem type
- * @behavior: labeling behavior
- * @sid: SID for filesystem (superblock)
+ * @sb: superblock in question
   */
-int security_fs_use(
-       const char *fstype,
-       unsigned int *behavior,
-       u32 *sid)
+int security_fs_use(struct super_block *sb)
  {
         int rc = 0;
         struct ocontext *c;
+       struct superblock_security_struct *sbsec = sb->s_security;
+       const char *fstype = sb->s_type->name;
+       const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL;
+       struct ocontext *base = NULL;
  
         read_lock(&policy_rwlock);
  
-       c = policydb.ocontexts[OCON_FSUSE];
-       while (c) {
-               if (strcmp(fstype, c->u.name) == 0)
+       for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) {
+               char *sub;
+               int baselen;
+
+               baselen = strlen(fstype);
+
+               /* if base does not match, this is not the one */
+               if (strncmp(fstype, c->u.name, baselen))
+                       continue;
+
+               /* if there is no subtype, this is the one! */
+               if (!subtype)
+                       break;
+
+               /* skip past the base in this entry */
+               sub = c->u.name + baselen;
+
+               /* entry is only a base. save it. keep looking for subtype */
+               if (sub[0] == '\0') {
+                       base = c;
+                       continue;
+               }
+
+               /* entry is not followed by a subtype, so it is not a match */
+               if (sub[0] != '.')
+                       continue;
+
+               /* whew, we found a subtype of this fstype */
+               sub++; /* move past '.' */
+
+               /* exact match of fstype AND subtype */
+               if (!strcmp(subtype, sub))
                         break;
-               c = c->next;
         }
  
+       /* in case we had found an fstype match but no subtype match */
+       if (!c)
+               c = base;
+
         if (c) {
-               *behavior = c->v.behavior;
+               sbsec->behavior = c->v.behavior;
                 if (!c->sid[0]) {
                         rc = sidtab_context_to_sid(&sidtab, &c->context[0],
                                                    &c->sid[0]);
                         if (rc)
                                 goto out;
                 }
-               *sid = c->sid[0];
+               sbsec->sid = c->sid[0];
         } else {
-               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
+               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid);
                 if (rc) {
-                       *behavior = SECURITY_FS_USE_NONE;
+                       sbsec->behavior = SECURITY_FS_USE_NONE;
                         rc = 0;
                 } else {
-                       *behavior = SECURITY_FS_USE_GENFS;
+                       sbsec->behavior = SECURITY_FS_USE_GENFS;
                 }
         }
  
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c

index d030818862146732ebe30c8cc3f266d485ef0677..a91d205ec0c6094cc9a0fecb5d427d4d24b1ed9a 100644 (file)
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -56,7 +56,7 @@
  atomic_t selinux_xfrm_refcount = ATOMIC_INIT(0);
  
  /*
- * Returns true if an LSM/SELinux context
+ * Returns true if the context is an LSM/SELinux context.
   */
  static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
  {
@@ -66,7 +66,7 @@ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
  }
  
  /*
- * Returns true if the xfrm contains a security blob for SELinux
+ * Returns true if the xfrm contains a security blob for SELinux.
   */
  static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
  {
@@ -74,48 +74,111 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
  }
  
  /*
- * LSM hook implementation that authorizes that a flow can use
- * a xfrm policy rule.
+ * Allocates a xfrm_sec_state and populates it using the supplied security
+ * xfrm_user_sec_ctx context.
   */
-int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp,
+                                  struct xfrm_user_sec_ctx *uctx)
  {
         int rc;
-       u32 sel_sid;
+       const struct task_security_struct *tsec = current_security();
+       struct xfrm_sec_ctx *ctx = NULL;
+       u32 str_len;
  
-       /* Context sid is either set to label or ANY_ASSOC */
-       if (ctx) {
-               if (!selinux_authorizable_ctx(ctx))
-                       return -EINVAL;
-
-               sel_sid = ctx->ctx_sid;
-       } else
-               /*
-                * All flows should be treated as polmatch'ing an
-                * otherwise applicable "non-labeled" policy. This
-                * would prevent inadvertent "leaks".
-                */
-               return 0;
+       if (ctxp == NULL || uctx == NULL ||
+           uctx->ctx_doi != XFRM_SC_DOI_LSM ||
+           uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
+               return -EINVAL;
  
-       rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__POLMATCH,
-                         NULL);
+       str_len = uctx->ctx_len;
+       if (str_len >= PAGE_SIZE)
+               return -ENOMEM;
  
-       if (rc == -EACCES)
-               return -ESRCH;
+       ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
  
+       ctx->ctx_doi = XFRM_SC_DOI_LSM;
+       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+       ctx->ctx_len = str_len;
+       memcpy(ctx->ctx_str, &uctx[1], str_len);
+       ctx->ctx_str[str_len] = '\0';
+       rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid);
+       if (rc)
+               goto err;
+
+       rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
+                         SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL);
+       if (rc)
+               goto err;
+
+       *ctxp = ctx;
+       atomic_inc(&selinux_xfrm_refcount);
+       return 0;
+
+err:
+       kfree(ctx);
         return rc;
  }
  
+/*
+ * Free the xfrm_sec_ctx structure.
+ */
+static void selinux_xfrm_free(struct xfrm_sec_ctx *ctx)
+{
+       if (!ctx)
+               return;
+
+       atomic_dec(&selinux_xfrm_refcount);
+       kfree(ctx);
+}
+
+/*
+ * Authorize the deletion of a labeled SA or policy rule.
+ */
+static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx)
+{
+       const struct task_security_struct *tsec = current_security();
+
+       if (!ctx)
+               return 0;
+
+       return avc_has_perm(tsec->sid, ctx->ctx_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
+                           NULL);
+}
+
+/*
+ * LSM hook implementation that authorizes that a flow can use a xfrm policy
+ * rule.
+ */
+int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+{
+       int rc;
+
+       /* All flows should be treated as polmatch'ing an otherwise applicable
+        * "non-labeled" policy. This would prevent inadvertent "leaks". */
+       if (!ctx)
+               return 0;
+
+       /* Context sid is either set to label or ANY_ASSOC */
+       if (!selinux_authorizable_ctx(ctx))
+               return -EINVAL;
+
+       rc = avc_has_perm(fl_secid, ctx->ctx_sid,
+                         SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL);
+       return (rc == -EACCES ? -ESRCH : rc);
+}
+
  /*
   * LSM hook implementation that authorizes that a state matches
   * the given policy, flow combo.
   */
-
-int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
-                       const struct flowi *fl)
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+                                     struct xfrm_policy *xp,
+                                     const struct flowi *fl)
  {
         u32 state_sid;
-       int rc;
  
         if (!xp->security)
                 if (x->security)
@@ -138,187 +201,80 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
         if (fl->flowi_secid != state_sid)
                 return 0;
  
-       rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SENDTO,
-                         NULL)? 0:1;
-
-       /*
-        * We don't need a separate SA Vs. policy polmatch check
-        * since the SA is now of the same label as the flow and
-        * a flow Vs. policy polmatch check had already happened
-        * in selinux_xfrm_policy_lookup() above.
-        */
-
-       return rc;
+       /* We don't need a separate SA Vs. policy polmatch check since the SA
+        * is now of the same label as the flow and a flow Vs. policy polmatch
+        * check had already happened in selinux_xfrm_policy_lookup() above. */
+       return (avc_has_perm(fl->flowi_secid, state_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO,
+                           NULL) ? 0 : 1);
  }
  
  /*
   * LSM hook implementation that checks and/or returns the xfrm sid for the
   * incoming packet.
   */
-
  int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
  {
+       u32 sid_session = SECSID_NULL;
         struct sec_path *sp;
  
-       *sid = SECSID_NULL;
-
         if (skb == NULL)
-               return 0;
+               goto out;
  
         sp = skb->sp;
         if (sp) {
-               int i, sid_set = 0;
+               int i;
  
-               for (i = sp->len-1; i >= 0; i--) {
+               for (i = sp->len - 1; i >= 0; i--) {
                         struct xfrm_state *x = sp->xvec[i];
                         if (selinux_authorizable_xfrm(x)) {
                                 struct xfrm_sec_ctx *ctx = x->security;
  
-                               if (!sid_set) {
-                                       *sid = ctx->ctx_sid;
-                                       sid_set = 1;
-
+                               if (sid_session == SECSID_NULL) {
+                                       sid_session = ctx->ctx_sid;
                                         if (!ckall)
-                                               break;
-                               } else if (*sid != ctx->ctx_sid)
+                                               goto out;
+                               } else if (sid_session != ctx->ctx_sid) {
+                                       *sid = SECSID_NULL;
                                         return -EINVAL;
+                               }
                         }
                 }
         }
  
-       return 0;
-}
-
-/*
- * Security blob allocation for xfrm_policy and xfrm_state
- * CTX does not have a meaningful value on input
- */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
-       struct xfrm_user_sec_ctx *uctx, u32 sid)
-{
-       int rc = 0;
-       const struct task_security_struct *tsec = current_security();
-       struct xfrm_sec_ctx *ctx = NULL;
-       char *ctx_str = NULL;
-       u32 str_len;
-
-       BUG_ON(uctx && sid);
-
-       if (!uctx)
-               goto not_from_user;
-
-       if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
-               return -EINVAL;
-
-       str_len = uctx->ctx_len;
-       if (str_len >= PAGE_SIZE)
-               return -ENOMEM;
-
-       *ctxp = ctx = kmalloc(sizeof(*ctx) +
-                             str_len + 1,
-                             GFP_KERNEL);
-
-       if (!ctx)
-               return -ENOMEM;
-
-       ctx->ctx_doi = uctx->ctx_doi;
-       ctx->ctx_len = str_len;
-       ctx->ctx_alg = uctx->ctx_alg;
-
-       memcpy(ctx->ctx_str,
-              uctx+1,
-              str_len);
-       ctx->ctx_str[str_len] = 0;
-       rc = security_context_to_sid(ctx->ctx_str,
-                                    str_len,
-                                    &ctx->ctx_sid);
-
-       if (rc)
-               goto out;
-
-       /*
-        * Does the subject have permission to set security context?
-        */
-       rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
-                         SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SETCONTEXT, NULL);
-       if (rc)
-               goto out;
-
-       return rc;
-
-not_from_user:
-       rc = security_sid_to_context(sid, &ctx_str, &str_len);
-       if (rc)
-               goto out;
-
-       *ctxp = ctx = kmalloc(sizeof(*ctx) +
-                             str_len,
-                             GFP_ATOMIC);
-
-       if (!ctx) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       ctx->ctx_doi = XFRM_SC_DOI_LSM;
-       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
-       ctx->ctx_sid = sid;
-       ctx->ctx_len = str_len;
-       memcpy(ctx->ctx_str,
-              ctx_str,
-              str_len);
-
-       goto out2;
-
  out:
-       *ctxp = NULL;
-       kfree(ctx);
-out2:
-       kfree(ctx_str);
-       return rc;
+       *sid = sid_session;
+       return 0;
  }
  
  /*
- * LSM hook implementation that allocs and transfers uctx spec to
- * xfrm_policy.
+ * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy.
   */
  int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
                               struct xfrm_user_sec_ctx *uctx)
  {
-       int err;
-
-       BUG_ON(!uctx);
-
-       err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0);
-       if (err == 0)
-               atomic_inc(&selinux_xfrm_refcount);
-
-       return err;
+       return selinux_xfrm_alloc_user(ctxp, uctx);
  }
  
-
  /*
- * LSM hook implementation that copies security data structure from old to
- * new for policy cloning.
+ * LSM hook implementation that copies security data structure from old to new
+ * for policy cloning.
   */
  int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                               struct xfrm_sec_ctx **new_ctxp)
  {
         struct xfrm_sec_ctx *new_ctx;
  
-       if (old_ctx) {
-               new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len,
-                                 GFP_ATOMIC);
-               if (!new_ctx)
-                       return -ENOMEM;
+       if (!old_ctx)
+               return 0;
+
+       new_ctx = kmemdup(old_ctx, sizeof(*old_ctx) + old_ctx->ctx_len,
+                         GFP_ATOMIC);
+       if (!new_ctx)
+               return -ENOMEM;
+       atomic_inc(&selinux_xfrm_refcount);
+       *new_ctxp = new_ctx;
  
-               memcpy(new_ctx, old_ctx, sizeof(*new_ctx));
-               memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len);
-               atomic_inc(&selinux_xfrm_refcount);
-               *new_ctxp = new_ctx;
-       }
         return 0;
  }
  
@@ -327,8 +283,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
   */
  void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
  {
-       atomic_dec(&selinux_xfrm_refcount);
-       kfree(ctx);
+       selinux_xfrm_free(ctx);
  }
  
  /*
@@ -336,31 +291,55 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
   */
  int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
  {
-       const struct task_security_struct *tsec = current_security();
-
-       if (!ctx)
-               return 0;
+       return selinux_xfrm_delete(ctx);
+}
  
-       return avc_has_perm(tsec->sid, ctx->ctx_sid,
-                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
-                           NULL);
+/*
+ * LSM hook implementation that allocates a xfrm_sec_state, populates it using
+ * the supplied security context, and assigns it to the xfrm_state.
+ */
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+                            struct xfrm_user_sec_ctx *uctx)
+{
+       return selinux_xfrm_alloc_user(&x->security, uctx);
  }
  
  /*
- * LSM hook implementation that allocs and transfers sec_ctx spec to
- * xfrm_state.
+ * LSM hook implementation that allocates a xfrm_sec_state and populates based
+ * on a secid.
   */
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
-               u32 secid)
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                    struct xfrm_sec_ctx *polsec, u32 secid)
  {
-       int err;
+       int rc;
+       struct xfrm_sec_ctx *ctx;
+       char *ctx_str = NULL;
+       int str_len;
+
+       if (!polsec)
+               return 0;
  
-       BUG_ON(!x);
+       if (secid == 0)
+               return -EINVAL;
  
-       err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, secid);
-       if (err == 0)
-               atomic_inc(&selinux_xfrm_refcount);
-       return err;
+       rc = security_sid_to_context(secid, &ctx_str, &str_len);
+       if (rc)
+               return rc;
+
+       ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->ctx_doi = XFRM_SC_DOI_LSM;
+       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+       ctx->ctx_sid = secid;
+       ctx->ctx_len = str_len;
+       memcpy(ctx->ctx_str, ctx_str, str_len);
+       kfree(ctx_str);
+
+       x->security = ctx;
+       atomic_inc(&selinux_xfrm_refcount);
+       return 0;
  }
  
  /*
@@ -368,24 +347,15 @@ int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uct
   */
  void selinux_xfrm_state_free(struct xfrm_state *x)
  {
-       atomic_dec(&selinux_xfrm_refcount);
-       kfree(x->security);
+       selinux_xfrm_free(x->security);
  }
  
- /*
-  * LSM hook implementation that authorizes deletion of labeled SAs.
-  */
+/*
+ * LSM hook implementation that authorizes deletion of labeled SAs.
+ */
  int selinux_xfrm_state_delete(struct xfrm_state *x)
  {
-       const struct task_security_struct *tsec = current_security();
-       struct xfrm_sec_ctx *ctx = x->security;
-
-       if (!ctx)
-               return 0;
-
-       return avc_has_perm(tsec->sid, ctx->ctx_sid,
-                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
-                           NULL);
+       return selinux_xfrm_delete(x->security);
  }
  
  /*
@@ -395,14 +365,12 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
   * we need to check for unlabelled access since this may not have
   * gone thru the IPSec process.
   */
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                               struct common_audit_data *ad)
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                             struct common_audit_data *ad)
  {
-       int i, rc = 0;
-       struct sec_path *sp;
-       u32 sel_sid = SECINITSID_UNLABELED;
-
-       sp = skb->sp;
+       int i;
+       struct sec_path *sp = skb->sp;
+       u32 peer_sid = SECINITSID_UNLABELED;
  
         if (sp) {
                 for (i = 0; i < sp->len; i++) {
@@ -410,23 +378,17 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
  
                         if (x && selinux_authorizable_xfrm(x)) {
                                 struct xfrm_sec_ctx *ctx = x->security;
-                               sel_sid = ctx->ctx_sid;
+                               peer_sid = ctx->ctx_sid;
                                 break;
                         }
                 }
         }
  
-       /*
-        * This check even when there's no association involved is
-        * intended, according to Trent Jaeger, to make sure a
-        * process can't engage in non-ipsec communication unless
-        * explicitly allowed by policy.
-        */
-
-       rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__RECVFROM, ad);
-
-       return rc;
+       /* This check even when there's no association involved is intended,
+        * according to Trent Jaeger, to make sure a process can't engage in
+        * non-IPsec communication unless explicitly allowed by policy. */
+       return avc_has_perm(sk_sid, peer_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad);
  }
  
  /*
@@ -436,49 +398,38 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
   * If we do have a authorizable security association, then it has already been
   * checked in the selinux_xfrm_state_pol_flow_match hook above.
   */
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                                       struct common_audit_data *ad, u8 proto)
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                               struct common_audit_data *ad, u8 proto)
  {
         struct dst_entry *dst;
-       int rc = 0;
-
-       dst = skb_dst(skb);
-
-       if (dst) {
-               struct dst_entry *dst_test;
-
-               for (dst_test = dst; dst_test != NULL;
-                    dst_test = dst_test->child) {
-                       struct xfrm_state *x = dst_test->xfrm;
-
-                       if (x && selinux_authorizable_xfrm(x))
-                               goto out;
-               }
-       }
  
         switch (proto) {
         case IPPROTO_AH:
         case IPPROTO_ESP:
         case IPPROTO_COMP:
-               /*
-                * We should have already seen this packet once before
-                * it underwent xfrm(s). No need to subject it to the
-                * unlabeled check.
-                */
-               goto out;
+               /* We should have already seen this packet once before it
+                * underwent xfrm(s). No need to subject it to the unlabeled
+                * check. */
+               return 0;
         default:
                 break;
         }
  
-       /*
-        * This check even when there's no association involved is
-        * intended, according to Trent Jaeger, to make sure a
-        * process can't engage in non-ipsec communication unless
-        * explicitly allowed by policy.
-        */
+       dst = skb_dst(skb);
+       if (dst) {
+               struct dst_entry *iter;
  
-       rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SENDTO, ad);
-out:
-       return rc;
+               for (iter = dst; iter != NULL; iter = iter->child) {
+                       struct xfrm_state *x = iter->xfrm;
+
+                       if (x && selinux_authorizable_xfrm(x))
+                               return 0;
+               }
+       }
+
+       /* This check even when there's no association involved is intended,
+        * according to Trent Jaeger, to make sure a process can't engage in
+        * non-IPsec communication unless explicitly allowed by policy. */
+       return avc_has_perm(sk_sid, SECINITSID_UNLABELED,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad);
  }
diff --git a/security/smack/smack.h b/security/smack/smack.h

index 076b8e8a51abd50d833bd2bc6a262349ed902f0e..364cc64fce717be1e75d16fa91f4aeaadc55539f 100644 (file)
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -177,9 +177,13 @@ struct smk_port_label {
  #define SMACK_CIPSO_MAXCATNUM           184     /* 23 * 8 */
  
  /*
- * Flag for transmute access
+ * Flags for untraditional access modes.
+ * It shouldn't be necessary to avoid conflicts with definitions
+ * in fs.h, but do so anyway.
   */
-#define MAY_TRANSMUTE  64
+#define MAY_TRANSMUTE  0x00001000      /* Controls directory labeling */
+#define MAY_LOCK       0x00002000      /* Locks should be writes, but ... */
+
  /*
   * Just to make the common cases easier to deal with
   */
@@ -188,9 +192,9 @@ struct smk_port_label {
  #define MAY_NOT                0
  
  /*
- * Number of access types used by Smack (rwxat)
+ * Number of access types used by Smack (rwxatl)
   */
-#define SMK_NUM_ACCESS_TYPE 5
+#define SMK_NUM_ACCESS_TYPE 6
  
  /* SMACK data */
  struct smack_audit_data {
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c

index b3b59b1e93d6e6b056789243b77f0b319e56c982..14293cd9b1e53b4a260e9258a5cad54c75d71204 100644 (file)
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -84,6 +84,8 @@ int log_policy = SMACK_AUDIT_DENIED;
   *
   * Do the object check first because that is more
   * likely to differ.
+ *
+ * Allowing write access implies allowing locking.
   */
  int smk_access_entry(char *subject_label, char *object_label,
                         struct list_head *rule_list)
@@ -99,6 +101,11 @@ int smk_access_entry(char *subject_label, char *object_label,
                 }
         }
  
+       /*
+        * MAY_WRITE implies MAY_LOCK.
+        */
+       if ((may & MAY_WRITE) == MAY_WRITE)
+               may |= MAY_LOCK;
         return may;
  }
  
@@ -245,6 +252,7 @@ out_audit:
  static inline void smack_str_from_perm(char *string, int access)
  {
         int i = 0;
+
         if (access & MAY_READ)
                 string[i++] = 'r';
         if (access & MAY_WRITE)
@@ -255,6 +263,8 @@ static inline void smack_str_from_perm(char *string, int access)
                 string[i++] = 'a';
         if (access & MAY_TRANSMUTE)
                 string[i++] = 't';
+       if (access & MAY_LOCK)
+               string[i++] = 'l';
         string[i] = '\0';
  }
  /**
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c

index 8825375cc031709b3918cd073cd574708c3f0405..b0be893ad44d52bd0f062a1747199330c4d6940d 100644 (file)
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -185,7 +185,7 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
         smk_ad_setfield_u_tsk(&ad, ctp);
  
-       rc = smk_curacc(skp->smk_known, MAY_READWRITE, &ad);
+       rc = smk_curacc(skp->smk_known, mode, &ad);
         return rc;
  }
  
@@ -1146,7 +1146,7 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
   * @file: the object
   * @cmd: unused
   *
- * Returns 0 if current has write access, error code otherwise
+ * Returns 0 if current has lock access, error code otherwise
   */
  static int smack_file_lock(struct file *file, unsigned int cmd)
  {
@@ -1154,7 +1154,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
  
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
         smk_ad_setfield_u_fs_path(&ad, file->f_path);
-       return smk_curacc(file->f_security, MAY_WRITE, &ad);
+       return smk_curacc(file->f_security, MAY_LOCK, &ad);
  }
  
  /**
@@ -1178,8 +1178,13 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
  
         switch (cmd) {
         case F_GETLK:
+               break;
         case F_SETLK:
         case F_SETLKW:
+               smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+               smk_ad_setfield_u_fs_path(&ad, file->f_path);
+               rc = smk_curacc(file->f_security, MAY_LOCK, &ad);
+               break;
         case F_SETOWN:
         case F_SETSIG:
                 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c

index 80f4b4a45725bddba4f094d2fefdd4701c07cb6b..160aa08e3cd5ecd775c819941a056d5c04e1a13e 100644 (file)
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -139,7 +139,7 @@ const char *smack_cipso_option = SMACK_CIPSO_OPTION;
   * SMK_LOADLEN: Smack rule length
   */
  #define SMK_OACCESS    "rwxa"
-#define SMK_ACCESS     "rwxat"
+#define SMK_ACCESS     "rwxatl"
  #define SMK_OACCESSLEN (sizeof(SMK_OACCESS) - 1)
  #define SMK_ACCESSLEN  (sizeof(SMK_ACCESS) - 1)
  #define SMK_OLOADLEN   (SMK_LABELLEN + SMK_LABELLEN + SMK_OACCESSLEN)
@@ -282,6 +282,10 @@ static int smk_perm_from_str(const char *string)
                 case 'T':
                         perm |= MAY_TRANSMUTE;
                         break;
+               case 'l':
+               case 'L':
+                       perm |= MAY_LOCK;
+                       break;
                 default:
                         return perm;
                 }
@@ -452,7 +456,7 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
                 /*
                  * Minor hack for backward compatibility
                  */
-               if (count != SMK_OLOADLEN && count != SMK_LOADLEN)
+               if (count < SMK_OLOADLEN || count > SMK_LOADLEN)
                         return -EINVAL;
         } else {
                 if (count >= PAGE_SIZE) {
@@ -592,6 +596,8 @@ static void smk_rule_show(struct seq_file *s, struct smack_rule *srp, int max)
                 seq_putc(s, 'a');
         if (srp->smk_access & MAY_TRANSMUTE)
                 seq_putc(s, 't');
+       if (srp->smk_access & MAY_LOCK)
+               seq_putc(s, 'l');
  
         seq_putc(s, '\n');
  }
diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c

index fa64cd85204fff5a9615f9cabbb5ac519a164d9d..fb5d107f56034eebec7003878eceded711a51769 100644 (file)
--- a/sound/soc/davinci/davinci-pcm.c
+++ b/sound/soc/davinci/davinci-pcm.c
@@ -238,7 +238,7 @@ static void davinci_pcm_dma_irq(unsigned link, u16 ch_status, void *data)
         print_buf_info(prtd->ram_channel, "i ram_channel");
         pr_debug("davinci_pcm: link=%d, status=0x%x\n", link, ch_status);
  
-       if (unlikely(ch_status != DMA_COMPLETE))
+       if (unlikely(ch_status != EDMA_DMA_COMPLETE))
                 return;
  
         if (snd_pcm_running(substream)) {
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c

index fe702076ca46cc2d3d02bab818446c9d15f8c392..9d77f13c2d2548934293662e314828a817d16144 100644 (file)
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2,7 +2,7 @@
   * turbostat -- show CPU frequency and C-state residency
   * on modern Intel turbo-capable processors.
   *
- * Copyright (c) 2012 Intel Corporation.
+ * Copyright (c) 2013 Intel Corporation.
   * Len Brown <len.brown@intel.com>
   *
   * This program is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@ unsigned int skip_c1;
  unsigned int do_nhm_cstates;
  unsigned int do_snb_cstates;
  unsigned int do_c8_c9_c10;
+unsigned int do_slm_cstates;
+unsigned int use_c1_residency_msr;
  unsigned int has_aperf;
  unsigned int has_epb;
  unsigned int units = 1000000000;       /* Ghz etc */
@@ -81,6 +83,8 @@ double rapl_joule_counter_range;
  #define RAPL_DRAM      (1 << 3)
  #define RAPL_PKG_PERF_STATUS   (1 << 4)
  #define RAPL_DRAM_PERF_STATUS  (1 << 5)
+#define RAPL_PKG_POWER_INFO    (1 << 6)
+#define RAPL_CORE_POLICY       (1 << 7)
  #define        TJMAX_DEFAULT   100
  
  #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -96,7 +100,7 @@ struct thread_data {
         unsigned long long tsc;
         unsigned long long aperf;
         unsigned long long mperf;
-       unsigned long long c1;  /* derived */
+       unsigned long long c1;
         unsigned long long extra_msr64;
         unsigned long long extra_delta64;
         unsigned long long extra_msr32;
@@ -266,7 +270,7 @@ void print_header(void)
                 outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
         if (do_nhm_cstates)
                 outp += sprintf(outp, "    %%c1");
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                 outp += sprintf(outp, "    %%c3");
         if (do_nhm_cstates)
                 outp += sprintf(outp, "    %%c6");
@@ -280,9 +284,9 @@ void print_header(void)
  
         if (do_snb_cstates)
                 outp += sprintf(outp, "   %%pc2");
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                 outp += sprintf(outp, "   %%pc3");
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                 outp += sprintf(outp, "   %%pc6");
         if (do_snb_cstates)
                 outp += sprintf(outp, "   %%pc7");
@@ -480,7 +484,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                 goto done;
  
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
         if (do_nhm_cstates)
                 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
@@ -499,9 +503,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
  
         if (do_snb_cstates)
                 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
         if (do_snb_cstates)
                 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
@@ -648,17 +652,24 @@ delta_thread(struct thread_data *new, struct thread_data *old,
         }
  
  
-       /*
-        * As counter collection is not atomic,
-        * it is possible for mperf's non-halted cycles + idle states
-        * to exceed TSC's all cycles: show c1 = 0% in that case.
-        */
-       if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
-               old->c1 = 0;
-       else {
-               /* normal case, derive c1 */
-               old->c1 = old->tsc - old->mperf - core_delta->c3
+       if (use_c1_residency_msr) {
+               /*
+                * Some models have a dedicated C1 residency MSR,
+                * which should be more accurate than the derivation below.
+                */
+       } else {
+               /*
+                * As counter collection is not atomic,
+                * it is possible for mperf's non-halted cycles + idle states
+                * to exceed TSC's all cycles: show c1 = 0% in that case.
+                */
+               if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+                       old->c1 = 0;
+               else {
+                       /* normal case, derive c1 */
+                       old->c1 = old->tsc - old->mperf - core_delta->c3
                                 - core_delta->c6 - core_delta->c7;
+               }
         }
  
         if (old->mperf == 0) {
@@ -872,13 +883,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
                         return -5;
  
+       if (use_c1_residency_msr) {
+               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+                       return -6;
+       }
+
         /* collect core counters only for 1st thread in core */
         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                 return 0;
  
-       if (do_nhm_cstates) {
+       if (do_nhm_cstates && !do_slm_cstates) {
                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
                         return -6;
+       }
+
+       if (do_nhm_cstates) {
                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                         return -7;
         }
@@ -898,7 +917,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
                 return 0;
  
-       if (do_nhm_cstates) {
+       if (do_nhm_cstates && !do_slm_cstates) {
                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
                         return -9;
                 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
@@ -977,7 +996,7 @@ void print_verbose_header(void)
                 ratio, bclk, ratio * bclk);
  
         get_msr(0, MSR_IA32_POWER_CTL, &msr);
-       fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+       fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                 msr, msr & 0x2 ? "EN" : "DIS");
  
         if (!do_ivt_turbo_ratio_limit)
@@ -1046,25 +1065,28 @@ print_nhm_turbo_ratio_limits:
  
         switch(msr & 0x7) {
         case 0:
-               fprintf(stderr, "pc0");
+               fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
                 break;
         case 1:
-               fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+               fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
                 break;
         case 2:
-               fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+               fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
                 break;
         case 3:
-               fprintf(stderr, "pc6");
+               fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
                 break;
         case 4:
-               fprintf(stderr, "pc7");
+               fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
                 break;
         case 5:
-               fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+               fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
+               break;
+       case 6:
+               fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
                 break;
         case 7:
-               fprintf(stderr, "unlimited");
+               fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
                 break;
         default:
                 fprintf(stderr, "invalid");
@@ -1460,6 +1482,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
         case 0x3F:      /* HSW */
         case 0x45:      /* HSW */
         case 0x46:      /* HSW */
+       case 0x37:      /* BYT */
+       case 0x4D:      /* AVN */
                 return 1;
         case 0x2E:      /* Nehalem-EX Xeon - Beckton */
         case 0x2F:      /* Westmere-EX Xeon - Eagleton */
@@ -1532,14 +1556,33 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  #define        RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
  #define        RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
  
+double get_tdp(model)
+{
+       unsigned long long msr;
+
+       if (do_rapl & RAPL_PKG_POWER_INFO)
+               if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+                       return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+       switch (model) {
+       case 0x37:
+       case 0x4D:
+               return 30.0;
+       default:
+               return 135.0;
+       }
+}
+
+
  /*
   * rapl_probe()
   *
- * sets do_rapl
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
   */
  void rapl_probe(unsigned int family, unsigned int model)
  {
         unsigned long long msr;
+       unsigned int time_unit;
         double tdp;
  
         if (!genuine_intel)
@@ -1555,11 +1598,15 @@ void rapl_probe(unsigned int family, unsigned int model)
         case 0x3F:      /* HSW */
         case 0x45:      /* HSW */
         case 0x46:      /* HSW */
-               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
                 break;
         case 0x2D:
         case 0x3E:
-               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+               break;
+       case 0x37:      /* BYT */
+       case 0x4D:      /* AVN */
+               do_rapl = RAPL_PKG | RAPL_CORES ;
                 break;
         default:
                 return;
@@ -1570,19 +1617,22 @@ void rapl_probe(unsigned int family, unsigned int model)
                 return;
  
         rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
-       rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+       if (model == 0x37)
+               rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
+       else
+               rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
  
-       /* get TDP to determine energy counter range */
-       if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
-               return;
+       time_unit = msr >> 16 & 0xF;
+       if (time_unit == 0)
+               time_unit = 0xA;
  
-       tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+       rapl_time_units = 1.0 / (1 << (time_unit));
  
-       rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+       tdp = get_tdp(model);
  
+       rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
         if (verbose)
-               fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+               fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
  
         return;
  }
@@ -1668,7 +1718,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  {
         unsigned long long msr;
         int cpu;
-       double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
  
         if (!do_rapl)
                 return 0;
@@ -1686,23 +1735,13 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
                 return -1;
  
-       local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
-       local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
-
-       if (local_rapl_power_units != rapl_power_units)
-               fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
-       if (local_rapl_energy_units != rapl_energy_units)
-               fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
-       if (local_rapl_time_units != rapl_time_units)
-               fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
-
         if (verbose) {
                 fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
                         "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
-                       local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
+                       rapl_power_units, rapl_energy_units, rapl_time_units);
         }
-       if (do_rapl & RAPL_PKG) {
+       if (do_rapl & RAPL_PKG_POWER_INFO) {
+
                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                         return -5;
  
@@ -1714,6 +1753,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
  
+       }
+       if (do_rapl & RAPL_PKG) {
+
                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
                         return -9;
  
@@ -1749,12 +1791,16 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  
                 print_power_limit_msr(cpu, msr, "DRAM Limit");
         }
-       if (do_rapl & RAPL_CORES) {
+       if (do_rapl & RAPL_CORE_POLICY) {
                 if (verbose) {
                         if (get_msr(cpu, MSR_PP0_POLICY, &msr))
                                 return -7;
  
                         fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+               }
+       }
+       if (do_rapl & RAPL_CORES) {
+               if (verbose) {
  
                         if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
                                 return -9;
@@ -1813,10 +1859,48 @@ int has_c8_c9_c10(unsigned int family, unsigned int model)
  }
  
  
+int is_slm(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+       switch (model) {
+       case 0x37:      /* BYT */
+       case 0x4D:      /* AVN */
+               return 1;
+       }
+       return 0;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+       unsigned long long msr = 3;
+       unsigned int i;
+       double freq;
+
+       if (get_msr(0, MSR_FSB_FREQ, &msr))
+               fprintf(stderr, "SLM BCLK: unknown\n");
+
+       i = msr & 0xf;
+       if (i >= SLM_BCLK_FREQS) {
+               fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+               msr = 3;
+       }
+       freq = slm_freq_table[i];
+
+       fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+
+       return freq;
+}
+
  double discover_bclk(unsigned int family, unsigned int model)
  {
         if (is_snb(family, model))
                 return 100.00;
+       else if (is_slm(family, model))
+               return slm_bclk();
         else
                 return 133.33;
  }
@@ -1873,7 +1957,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
                 fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
                         cpu, msr, target_c_local);
  
-       if (target_c_local < 85 || target_c_local > 120)
+       if (target_c_local < 85 || target_c_local > 127)
                 goto guess;
  
         tcc_activation_temp = target_c_local;
@@ -1970,6 +2054,7 @@ void check_cpuid()
         do_smi = do_nhm_cstates;
         do_snb_cstates = is_snb(family, model);
         do_c8_c9_c10 = has_c8_c9_c10(family, model);
+       do_slm_cstates = is_slm(family, model);
         bclk = discover_bclk(family, model);
  
         do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
@@ -2331,7 +2416,7 @@ int main(int argc, char **argv)
         cmdline(argc, argv);
  
         if (verbose)
-               fprintf(stderr, "turbostat v3.4 April 17, 2013"
+               fprintf(stderr, "turbostat v3.5 April 26, 2013"
                         " - Len Brown <lenb@kernel.org>\n");
  
         turbostat_init();
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 662f34c3287e59e8f0707d80b0c88396d402c8c0..a0aa84b5941ac96aabae48b03d80278052ce8929 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1615,8 +1615,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
  
  int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
  {
-       return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
-                                   offset, len);
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+       return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
  }
  EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)