From: John W. Linville Date: Fri, 13 Dec 2013 18:14:28 +0000 (-0500) Subject: Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel... X-Git-Tag: drm-fsl-dcu-for-next~6014^2~482^2 X-Git-Url: http://git.agner.ch/gitweb/?p=linux-drm-fsl-dcu.git;a=commitdiff_plain;h=f647a52e1576f9c92cc9c02d5756cd0207295a2b;hp=55957fb7a0b61d8ab6ff3f04e279b8fc22b738fa Merge branch 'master' of git://git./linux/kernel/git/linville/wireless-next into for-davem --- diff --git a/Documentation/Changes b/Documentation/Changes index b17580885273..07c75d18154e 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -196,13 +196,6 @@ chmod 0644 /dev/cpu/microcode as root before you can use this. You'll probably also want to get the user-space microcode_ctl utility to use with this. -Powertweak ----------- - -If you are running v0.1.17 or earlier, you should upgrade to -version v0.99.0 or higher. Running old versions may cause problems -with programs using shared memory. - udev ---- udev is a userspace application for populating /dev dynamically with @@ -366,10 +359,6 @@ Intel P6 microcode ------------------ o -Powertweak ----------- -o - udev ---- o diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 6c9d9d37c83a..f5170082bdb3 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -58,7 +58,7 @@ Wait queues and Wake events !Iinclude/linux/wait.h -!Ekernel/wait.c +!Ekernel/sched/wait.c High-resolution timers !Iinclude/linux/ktime.h diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt new file mode 100644 index 000000000000..f4faec0f66e4 --- /dev/null +++ b/Documentation/assoc_array.txt @@ -0,0 +1,574 @@ + ======================================== + GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION + ======================================== + +Contents: + + - Overview. + + - The public API. + - Edit script. + - Operations table. + - Manipulation functions. + - Access functions. + - Index key form. + + - Internal workings. + - Basic internal tree layout. + - Shortcuts. + - Splitting and collapsing nodes. + - Non-recursive iteration. + - Simultaneous alteration and iteration. + + +======== +OVERVIEW +======== + +This associative array implementation is an object container with the following +properties: + + (1) Objects are opaque pointers. The implementation does not care where they + point (if anywhere) or what they point to (if anything). + + [!] NOTE: Pointers to objects _must_ be zero in the least significant bit. + + (2) Objects do not need to contain linkage blocks for use by the array. This + permits an object to be located in multiple arrays simultaneously. + Rather, the array is made up of metadata blocks that point to objects. + + (3) Objects require index keys to locate them within the array. + + (4) Index keys must be unique. Inserting an object with the same key as one + already in the array will replace the old object. + + (5) Index keys can be of any length and can be of different lengths. + + (6) Index keys should encode the length early on, before any variation due to + length is seen. + + (7) Index keys can include a hash to scatter objects throughout the array. + + (8) The array can iterated over. The objects will not necessarily come out in + key order. + + (9) The array can be iterated over whilst it is being modified, provided the + RCU readlock is being held by the iterator. Note, however, under these + circumstances, some objects may be seen more than once. If this is a + problem, the iterator should lock against modification. Objects will not + be missed, however, unless deleted. + +(10) Objects in the array can be looked up by means of their index key. + +(11) Objects can be looked up whilst the array is being modified, provided the + RCU readlock is being held by the thread doing the look up. + +The implementation uses a tree of 16-pointer nodes internally that are indexed +on each level by nibbles from the index key in the same manner as in a radix +tree. To improve memory efficiency, shortcuts can be emplaced to skip over +what would otherwise be a series of single-occupancy nodes. Further, nodes +pack leaf object pointers into spare space in the node rather than making an +extra branch until as such time an object needs to be added to a full node. + + +============== +THE PUBLIC API +============== + +The public API can be found in . The associative array is +rooted on the following structure: + + struct assoc_array { + ... + }; + +The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY. + + +EDIT SCRIPT +----------- + +The insertion and deletion functions produce an 'edit script' that can later be +applied to effect the changes without risking ENOMEM. This retains the +preallocated metadata blocks that will be installed in the internal tree and +keeps track of the metadata blocks that will be removed from the tree when the +script is applied. + +This is also used to keep track of dead blocks and dead objects after the +script has been applied so that they can be freed later. The freeing is done +after an RCU grace period has passed - thus allowing access functions to +proceed under the RCU read lock. + +The script appears as outside of the API as a pointer of the type: + + struct assoc_array_edit; + +There are two functions for dealing with the script: + + (1) Apply an edit script. + + void assoc_array_apply_edit(struct assoc_array_edit *edit); + + This will perform the edit functions, interpolating various write barriers + to permit accesses under the RCU read lock to continue. The edit script + will then be passed to call_rcu() to free it and any dead stuff it points + to. + + (2) Cancel an edit script. + + void assoc_array_cancel_edit(struct assoc_array_edit *edit); + + This frees the edit script and all preallocated memory immediately. If + this was for insertion, the new object is _not_ released by this function, + but must rather be released by the caller. + +These functions are guaranteed not to fail. + + +OPERATIONS TABLE +---------------- + +Various functions take a table of operations: + + struct assoc_array_ops { + ... + }; + +This points to a number of methods, all of which need to be provided: + + (1) Get a chunk of index key from caller data: + + unsigned long (*get_key_chunk)(const void *index_key, int level); + + This should return a chunk of caller-supplied index key starting at the + *bit* position given by the level argument. The level argument will be a + multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return + ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible. + + + (2) Get a chunk of an object's index key. + + unsigned long (*get_object_key_chunk)(const void *object, int level); + + As the previous function, but gets its data from an object in the array + rather than from a caller-supplied index key. + + + (3) See if this is the object we're looking for. + + bool (*compare_object)(const void *object, const void *index_key); + + Compare the object against an index key and return true if it matches and + false if it doesn't. + + + (4) Diff the index keys of two objects. + + int (*diff_objects)(const void *a, const void *b); + + Return the bit position at which the index keys of two objects differ or + -1 if they are the same. + + + (5) Free an object. + + void (*free_object)(void *object); + + Free the specified object. Note that this may be called an RCU grace + period after assoc_array_apply_edit() was called, so synchronize_rcu() may + be necessary on module unloading. + + +MANIPULATION FUNCTIONS +---------------------- + +There are a number of functions for manipulating an associative array: + + (1) Initialise an associative array. + + void assoc_array_init(struct assoc_array *array); + + This initialises the base structure for an associative array. It can't + fail. + + + (2) Insert/replace an object in an associative array. + + struct assoc_array_edit * + assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object); + + This inserts the given object into the array. Note that the least + significant bit of the pointer must be zero as it's used to type-mark + pointers internally. + + If an object already exists for that key then it will be replaced with the + new object and the old one will be freed automatically. + + The index_key argument should hold index key information and is + passed to the methods in the ops table when they are called. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. + + The caller should lock exclusively against other modifiers of the array. + + + (3) Delete an object from an associative array. + + struct assoc_array_edit * + assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); + + This deletes an object that matches the specified data from the array. + + The index_key argument should hold index key information and is + passed to the methods in the ops table when they are called. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. NULL will be returned if the specified object is + not found within the array. + + The caller should lock exclusively against other modifiers of the array. + + + (4) Delete all objects from an associative array. + + struct assoc_array_edit * + assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops); + + This deletes all the objects from an associative array and leaves it + completely empty. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. + + The caller should lock exclusively against other modifiers of the array. + + + (5) Destroy an associative array, deleting all objects. + + void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops); + + This destroys the contents of the associative array and leaves it + completely empty. It is not permitted for another thread to be traversing + the array under the RCU read lock at the same time as this function is + destroying it as no RCU deferral is performed on memory release - + something that would require memory to be allocated. + + The caller should lock exclusively against other modifiers and accessors + of the array. + + + (6) Garbage collect an associative array. + + int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data); + + This iterates over the objects in an associative array and passes each one + to iterator(). If iterator() returns true, the object is kept. If it + returns false, the object will be freed. If the iterator() function + returns true, it must perform any appropriate refcount incrementing on the + object before returning. + + The internal tree will be packed down if possible as part of the iteration + to reduce the number of nodes in it. + + The iterator_data is passed directly to iterator() and is otherwise + ignored by the function. + + The function will return 0 if successful and -ENOMEM if there wasn't + enough memory. + + It is possible for other threads to iterate over or search the array under + the RCU read lock whilst this function is in progress. The caller should + lock exclusively against other modifiers of the array. + + +ACCESS FUNCTIONS +---------------- + +There are two functions for accessing an associative array: + + (1) Iterate over all the objects in an associative array. + + int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data); + + This passes each object in the array to the iterator callback function. + iterator_data is private data for that function. + + This may be used on an array at the same time as the array is being + modified, provided the RCU read lock is held. Under such circumstances, + it is possible for the iteration function to see some objects twice. If + this is a problem, then modification should be locked against. The + iteration algorithm should not, however, miss any objects. + + The function will return 0 if no objects were in the array or else it will + return the result of the last iterator function called. Iteration stops + immediately if any call to the iteration function results in a non-zero + return. + + + (2) Find an object in an associative array. + + void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); + + This walks through the array's internal tree directly to the object + specified by the index key.. + + This may be used on an array at the same time as the array is being + modified, provided the RCU read lock is held. + + The function will return the object if found (and set *_type to the object + type) or will return NULL if the object was not found. + + +INDEX KEY FORM +-------------- + +The index key can be of any form, but since the algorithms aren't told how long +the key is, it is strongly recommended that the index key includes its length +very early on before any variation due to the length would have an effect on +comparisons. + +This will cause leaves with different length keys to scatter away from each +other - and those with the same length keys to cluster together. + +It is also recommended that the index key begin with a hash of the rest of the +key to maximise scattering throughout keyspace. + +The better the scattering, the wider and lower the internal tree will be. + +Poor scattering isn't too much of a problem as there are shortcuts and nodes +can contain mixtures of leaves and metadata pointers. + +The index key is read in chunks of machine word. Each chunk is subdivided into +one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and +on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is +unlikely that more than one word of any particular index key will have to be +used. + + +================= +INTERNAL WORKINGS +================= + +The associative array data structure has an internal tree. This tree is +constructed of two types of metadata blocks: nodes and shortcuts. + +A node is an array of slots. Each slot can contain one of four things: + + (*) A NULL pointer, indicating that the slot is empty. + + (*) A pointer to an object (a leaf). + + (*) A pointer to a node at the next level. + + (*) A pointer to a shortcut. + + +BASIC INTERNAL TREE LAYOUT +-------------------------- + +Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index +key space is strictly subdivided by the nodes in the tree and nodes occur on +fixed levels. For example: + + Level: 0 1 2 3 + =============== =============== =============== =============== + NODE D + NODE B NODE C +------>+---+ + +------>+---+ +------>+---+ | | 0 | + NODE A | | 0 | | | 0 | | +---+ + +---+ | +---+ | +---+ | : : + | 0 | | : : | : : | +---+ + +---+ | +---+ | +---+ | | f | + | 1 |---+ | 3 |---+ | 7 |---+ +---+ + +---+ +---+ +---+ + : : : : | 8 |---+ + +---+ +---+ +---+ | NODE E + | e |---+ | f | : : +------>+---+ + +---+ | +---+ +---+ | 0 | + | f | | | f | +---+ + +---+ | +---+ : : + | NODE F +---+ + +------>+---+ | f | + | 0 | NODE G +---+ + +---+ +------>+---+ + : : | | 0 | + +---+ | +---+ + | 6 |---+ : : + +---+ +---+ + : : | f | + +---+ +---+ + | f | + +---+ + +In the above example, there are 7 nodes (A-G), each with 16 slots (0-f). +Assuming no other meta data nodes in the tree, the key space is divided thusly: + + KEY PREFIX NODE + ========== ==== + 137* D + 138* E + 13[0-69-f]* C + 1[0-24-f]* B + e6* G + e[0-57-f]* F + [02-df]* A + +So, for instance, keys with the following example index keys will be found in +the appropriate nodes: + + INDEX KEY PREFIX NODE + =============== ======= ==== + 13694892892489 13 C + 13795289025897 137 D + 13889dde88793 138 E + 138bbb89003093 138 E + 1394879524789 12 C + 1458952489 1 B + 9431809de993ba - A + b4542910809cd - A + e5284310def98 e F + e68428974237 e6 G + e7fffcbd443 e F + f3842239082 - A + +To save memory, if a node can hold all the leaves in its portion of keyspace, +then the node will have all those leaves in it and will not have any metadata +pointers - even if some of those leaves would like to be in the same slot. + +A node can contain a heterogeneous mix of leaves and metadata pointers. +Metadata pointers must be in the slots that match their subdivisions of key +space. The leaves can be in any slot not occupied by a metadata pointer. It +is guaranteed that none of the leaves in a node will match a slot occupied by a +metadata pointer. If the metadata pointer is there, any leaf whose key matches +the metadata key prefix must be in the subtree that the metadata pointer points +to. + +In the above example list of index keys, node A will contain: + + SLOT CONTENT INDEX KEY (PREFIX) + ==== =============== ================== + 1 PTR TO NODE B 1* + any LEAF 9431809de993ba + any LEAF b4542910809cd + e PTR TO NODE F e* + any LEAF f3842239082 + +and node B: + + 3 PTR TO NODE C 13* + any LEAF 1458952489 + + +SHORTCUTS +--------- + +Shortcuts are metadata records that jump over a piece of keyspace. A shortcut +is a replacement for a series of single-occupancy nodes ascending through the +levels. Shortcuts exist to save memory and to speed up traversal. + +It is possible for the root of the tree to be a shortcut - say, for example, +the tree contains at least 17 nodes all with key prefix '1111'. The insertion +algorithm will insert a shortcut to skip over the '1111' keyspace in a single +bound and get to the fourth level where these actually become different. + + +SPLITTING AND COLLAPSING NODES +------------------------------ + +Each node has a maximum capacity of 16 leaves and metadata pointers. If the +insertion algorithm finds that it is trying to insert a 17th object into a +node, that node will be split such that at least two leaves that have a common +key segment at that level end up in a separate node rooted on that slot for +that common key segment. + +If the leaves in a full node and the leaf that is being inserted are +sufficiently similar, then a shortcut will be inserted into the tree. + +When the number of objects in the subtree rooted at a node falls to 16 or +fewer, then the subtree will be collapsed down to a single node - and this will +ripple towards the root if possible. + + +NON-RECURSIVE ITERATION +----------------------- + +Each node and shortcut contains a back pointer to its parent and the number of +slot in that parent that points to it. None-recursive iteration uses these to +proceed rootwards through the tree, going to the parent node, slot N + 1 to +make sure progress is made without the need for a stack. + +The backpointers, however, make simultaneous alteration and iteration tricky. + + +SIMULTANEOUS ALTERATION AND ITERATION +------------------------------------- + +There are a number of cases to consider: + + (1) Simple insert/replace. This involves simply replacing a NULL or old + matching leaf pointer with the pointer to the new leaf after a barrier. + The metadata blocks don't change otherwise. An old leaf won't be freed + until after the RCU grace period. + + (2) Simple delete. This involves just clearing an old matching leaf. The + metadata blocks don't change otherwise. The old leaf won't be freed until + after the RCU grace period. + + (3) Insertion replacing part of a subtree that we haven't yet entered. This + may involve replacement of part of that subtree - but that won't affect + the iteration as we won't have reached the pointer to it yet and the + ancestry blocks are not replaced (the layout of those does not change). + + (4) Insertion replacing nodes that we're actively processing. This isn't a + problem as we've passed the anchoring pointer and won't switch onto the + new layout until we follow the back pointers - at which point we've + already examined the leaves in the replaced node (we iterate over all the + leaves in a node before following any of its metadata pointers). + + We might, however, re-see some leaves that have been split out into a new + branch that's in a slot further along than we were at. + + (5) Insertion replacing nodes that we're processing a dependent branch of. + This won't affect us until we follow the back pointers. Similar to (4). + + (6) Deletion collapsing a branch under us. This doesn't affect us because the + back pointers will get us back to the parent of the new node before we + could see the new node. The entire collapsed subtree is thrown away + unchanged - and will still be rooted on the same slot, so we shouldn't + process it a second time as we'll go back to slot + 1. + +Note: + + (*) Under some circumstances, we need to simultaneously change the parent + pointer and the parent slot pointer on a node (say, for example, we + inserted another node before it and moved it up a level). We cannot do + this without locking against a read - so we have to replace that node too. + + However, when we're changing a shortcut into a node this isn't a problem + as shortcuts only have one slot and so the parent slot number isn't used + when traversing backwards over one. This means that it's okay to change + the slot number first - provided suitable barriers are used to make sure + the parent slot number is read after the back pointer. + +Obsolete blocks and leaves are freed up after an RCU grace period has passed, +so as long as anyone doing walking or iteration holds the RCU read lock, the +old superstructure should not go away on them. diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt index e1f343c7a34b..f69bcf5a6343 100644 --- a/Documentation/devicetree/bindings/dma/atmel-dma.txt +++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt @@ -28,7 +28,7 @@ The three cells in order are: dependent: - bit 7-0: peripheral identifier for the hardware handshaking interface. The identifier can be different for tx and rx. - - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 1 for ASAP. + - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP. Example: diff --git a/Documentation/devicetree/bindings/i2c/i2c-omap.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt index 56564aa4b444..7e49839d4124 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-omap.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt @@ -1,7 +1,8 @@ I2C for OMAP platforms Required properties : -- compatible : Must be "ti,omap3-i2c" or "ti,omap4-i2c" +- compatible : Must be "ti,omap2420-i2c", "ti,omap2430-i2c", "ti,omap3-i2c" + or "ti,omap4-i2c" - ti,hwmods : Must be "i2c", n being the instance number (1-based) - #address-cells = <1>; - #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index ad6a73852f08..b1cb3415e6f1 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -15,6 +15,7 @@ adi,adt7461 +/-1C TDM Extended Temp Range I.C adt7461 +/-1C TDM Extended Temp Range I.C at,24c08 i2c serial eeprom (24cxx) atmel,24c02 i2c serial eeprom (24cxx) +atmel,at97sc3204t i2c trusted platform module (TPM) catalyst,24c32 i2c serial eeprom dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock dallas,ds1338 I2C RTC with 56-Byte NV RAM @@ -35,6 +36,7 @@ fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51 fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec +gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface infineon,slb9635tt Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz) infineon,slb9645tt Infineon SLB9645 I2C TPM (new protocol, max 400khz) maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator @@ -44,6 +46,7 @@ mc,rv3029c2 Real Time Clock Module with I2C-Bus national,lm75 I2C TEMP SENSOR national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface +nuvoton,npct501 i2c trusted platform module (TPM) nxp,pca9556 Octal SMBus and I2C registered interface nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset nxp,pcf8563 Real-time clock/calendar @@ -61,3 +64,4 @@ taos,tsl2550 Ambient Light Sensor with SMBUS/Two Wire Serial Interface ti,tsc2003 I2C Touch-Screen Controller ti,tmp102 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface ti,tmp275 Digital Temperature Sensor +winbond,wpct301 i2c trusted platform module (TPM) diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt index 48b259e29e87..bad381faf036 100644 --- a/Documentation/devicetree/bindings/net/davinci_emac.txt +++ b/Documentation/devicetree/bindings/net/davinci_emac.txt @@ -4,7 +4,7 @@ This file provides information, what the device node for the davinci_emac interface contains. Required properties: -- compatible: "ti,davinci-dm6467-emac"; +- compatible: "ti,davinci-dm6467-emac" or "ti,am3517-emac" - reg: Offset and length of the register set for the device - ti,davinci-ctrl-reg-offset: offset to control register - ti,davinci-ctrl-mod-reg-offset: offset to control module register diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt index 7cd18fbfcf71..f648094abc35 100644 --- a/Documentation/devicetree/bindings/net/phy.txt +++ b/Documentation/devicetree/bindings/net/phy.txt @@ -22,6 +22,7 @@ Optional Properties: specifications. If neither of these are specified, the default is to assume clause 22. The compatible list may also contain other elements. +- max-speed: Maximum PHY supported speed (10, 100, 1000...) Example: diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt index 2a4b4bce6110..7fc1b010fa75 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt @@ -1,33 +1,30 @@ -* Freescale 83xx DMA Controller +* Freescale DMA Controllers -Freescale PowerPC 83xx have on chip general purpose DMA controllers. +** Freescale Elo DMA Controller + This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx + series chips such as mpc8315, mpc8349, mpc8379 etc. Required properties: -- compatible : compatible list, contains 2 entries, first is - "fsl,CHIP-dma", where CHIP is the processor - (mpc8349, mpc8360, etc.) and the second is - "fsl,elo-dma" -- reg : -- ranges : Should be defined as specified in 1) to describe the - DMA controller channels. +- compatible : must include "fsl,elo-dma" +- reg : DMA General Status Register, i.e. DGSR which contains + status for all the 4 DMA channels +- ranges : describes the mapping between the address space of the + DMA channels and the address space of the DMA controller - cell-index : controller index. 0 for controller @ 0x8100 -- interrupts : +- interrupts : interrupt specifier for DMA IRQ - interrupt-parent : optional, if needed for interrupt mapping - - DMA channel nodes: - - compatible : compatible list, contains 2 entries, first is - "fsl,CHIP-dma-channel", where CHIP is the processor - (mpc8349, mpc8350, etc.) and the second is - "fsl,elo-dma-channel". However, see note below. - - reg : - - cell-index : dma channel index starts at 0. + - compatible : must include "fsl,elo-dma-channel" + However, see note below. + - reg : DMA channel specific registers + - cell-index : DMA channel index starts at 0. Optional properties: - - interrupts : - (on 83xx this is expected to be identical to - the interrupts property of the parent node) + - interrupts : interrupt specifier for DMA channel IRQ + (on 83xx this is expected to be identical to + the interrupts property of the parent node) - interrupt-parent : optional, if needed for interrupt mapping Example: @@ -70,30 +67,27 @@ Example: }; }; -* Freescale 85xx/86xx DMA Controller - -Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers. +** Freescale EloPlus DMA Controller + This is a 4-channel DMA controller with extended addresses and chaining, + mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as + mpc8540, mpc8641 p4080, bsc9131 etc. Required properties: -- compatible : compatible list, contains 2 entries, first is - "fsl,CHIP-dma", where CHIP is the processor - (mpc8540, mpc8540, etc.) and the second is - "fsl,eloplus-dma" -- reg : +- compatible : must include "fsl,eloplus-dma" +- reg : DMA General Status Register, i.e. DGSR which contains + status for all the 4 DMA channels - cell-index : controller index. 0 for controller @ 0x21000, 1 for controller @ 0xc000 -- ranges : Should be defined as specified in 1) to describe the - DMA controller channels. +- ranges : describes the mapping between the address space of the + DMA channels and the address space of the DMA controller - DMA channel nodes: - - compatible : compatible list, contains 2 entries, first is - "fsl,CHIP-dma-channel", where CHIP is the processor - (mpc8540, mpc8560, etc.) and the second is - "fsl,eloplus-dma-channel". However, see note below. - - cell-index : dma channel index starts at 0. - - reg : - - interrupts : + - compatible : must include "fsl,eloplus-dma-channel" + However, see note below. + - cell-index : DMA channel index starts at 0. + - reg : DMA channel specific registers + - interrupts : interrupt specifier for DMA channel IRQ - interrupt-parent : optional, if needed for interrupt mapping Example: @@ -134,6 +128,76 @@ Example: }; }; +** Freescale Elo3 DMA Controller + DMA controller which has same function as EloPlus except that Elo3 has 8 + channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx + series chips, such as t1040, t4240, b4860. + +Required properties: + +- compatible : must include "fsl,elo3-dma" +- reg : contains two entries for DMA General Status Registers, + i.e. DGSR0 which includes status for channel 1~4, and + DGSR1 for channel 5~8 +- ranges : describes the mapping between the address space of the + DMA channels and the address space of the DMA controller + +- DMA channel nodes: + - compatible : must include "fsl,eloplus-dma-channel" + - reg : DMA channel specific registers + - interrupts : interrupt specifier for DMA channel IRQ + - interrupt-parent : optional, if needed for interrupt mapping + +Example: +dma@100300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,elo3-dma"; + reg = <0x100300 0x4>, + <0x100600 0x4>; + ranges = <0x0 0x100100 0x500>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + interrupts = <28 2 0 0>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + interrupts = <29 2 0 0>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + interrupts = <30 2 0 0>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + interrupts = <31 2 0 0>; + }; + dma-channel@300 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x300 0x80>; + interrupts = <76 2 0 0>; + }; + dma-channel@380 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x380 0x80>; + interrupts = <77 2 0 0>; + }; + dma-channel@400 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x400 0x80>; + interrupts = <78 2 0 0>; + }; + dma-channel@480 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x480 0x80>; + interrupts = <79 2 0 0>; + }; +}; + Note on DMA channel compatible properties: The compatible property must say "fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA driver (fsldma). Any DMA channel used by fsldma cannot be used by another diff --git a/Documentation/devicetree/bindings/rng/qcom,prng.txt b/Documentation/devicetree/bindings/rng/qcom,prng.txt new file mode 100644 index 000000000000..8e5853c2879b --- /dev/null +++ b/Documentation/devicetree/bindings/rng/qcom,prng.txt @@ -0,0 +1,17 @@ +Qualcomm MSM pseudo random number generator. + +Required properties: + +- compatible : should be "qcom,prng" +- reg : specifies base physical address and size of the registers map +- clocks : phandle to clock-controller plus clock-specifier pair +- clock-names : "core" clocks all registers, FIFO and circuits in PRNG IP block + +Example: + + rng@f9bff000 { + compatible = "qcom,prng"; + reg = <0xf9bff000 0x200>; + clocks = <&clock GCC_PRNG_AHB_CLK>; + clock-names = "core"; + }; diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt index a2b5663eae26..dd77a81bdb80 100644 --- a/Documentation/dmatest.txt +++ b/Documentation/dmatest.txt @@ -15,39 +15,48 @@ be built as module or inside kernel. Let's consider those cases. Part 2 - When dmatest is built as a module... -After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest -folder with nodes will be created. There are two important files located. First -is the 'run' node that controls run and stop phases of the test, and the second -one, 'results', is used to get the test case results. - -Note that in this case test will not run on load automatically. - Example of usage: + % modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1 + +...or: + % modprobe dmatest % echo dma0chan0 > /sys/module/dmatest/parameters/channel % echo 2000 > /sys/module/dmatest/parameters/timeout % echo 1 > /sys/module/dmatest/parameters/iterations - % echo 1 > /sys/kernel/debug/dmatest/run + % echo 1 > /sys/module/dmatest/parameters/run + +...or on the kernel command line: + + dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1 Hint: available channel list could be extracted by running the following command: % ls -1 /sys/class/dma/ -After a while you will start to get messages about current status or error like -in the original code. +Once started a message like "dmatest: Started 1 threads using dma0chan0" is +emitted. After that only test failure messages are reported until the test +stops. Note that running a new test will not stop any in progress test. -The following command should return actual state of the test. - % cat /sys/kernel/debug/dmatest/run - -To wait for test done the user may perform a busy loop that checks the state. - - % while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ] - > do - > echo -n "." - > sleep 1 - > done - > echo +The following command returns the state of the test. + % cat /sys/module/dmatest/parameters/run + +To wait for test completion userpace can poll 'run' until it is false, or use +the wait parameter. Specifying 'wait=1' when loading the module causes module +initialization to pause until a test run has completed, while reading +/sys/module/dmatest/parameters/wait waits for any running test to complete +before returning. For example, the following scripts wait for 42 tests +to complete before exiting. Note that if 'iterations' is set to 'infinite' then +waiting is disabled. + +Example: + % modprobe dmatest run=1 iterations=42 wait=1 + % modprobe -r dmatest +...or: + % modprobe dmatest run=1 iterations=42 + % cat /sys/module/dmatest/parameters/wait + % modprobe -r dmatest Part 3 - When built-in in the kernel... @@ -62,21 +71,22 @@ case. You always could check them at run-time by running Part 4 - Gathering the test results -The module provides a storage for the test results in the memory. The gathered -data could be used after test is done. +Test results are printed to the kernel log buffer with the format: -The special file 'results' in the debugfs represents gathered data of the in -progress test. The messages collected are printed to the kernel log as well. +"dmatest: result : : '' with src_off= dst_off= len= ()" Example of output: - % cat /sys/kernel/debug/dmatest/results - dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0) + % dmesg | tail -n 1 + dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0) The message format is unified across the different types of errors. A number in the parens represents additional information, e.g. error code, error counter, -or status. +or status. A test thread also emits a summary line at completion listing the +number of tests executed, number that failed, and a result code. -Comparison between buffers is stored to the dedicated structure. +Example: + % dmesg | tail -n 1 + dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0) -Note that the verify result is now accessible only via file 'results' in the -debugfs. +The details of a data miscompare error are also emitted, but do not follow the +above format. diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index 9dae59407437..5dd282dda55c 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -70,6 +70,12 @@ Unless otherwise specified, all options default to off. See comments at the top of fs/btrfs/check-integrity.c for more info. + commit= + Set the interval of periodic commit, 30 seconds by default. Higher + values defer data being synced to permanent storage with obvious + consequences when the system crashes. The upper bound is not forced, + but a warning is printed if it's more than 300 seconds (5 minutes). + compress compress= compress-force @@ -154,7 +160,11 @@ Unless otherwise specified, all options default to off. Currently this scans a list of several previous tree roots and tries to use the first readable. - skip_balance + rescan_uuid_tree + Force check and rebuild procedure of the UUID tree. This should not + normally be needed. + + skip_balance Skip automatic resume of interrupted balance operation after mount. May be resumed with "btrfs balance resume." @@ -234,24 +244,14 @@ available from the git repository at the following location: These include the following tools: -mkfs.btrfs: create a filesystem - -btrfsctl: control program to create snapshots and subvolumes: +* mkfs.btrfs: create a filesystem - mount /dev/sda2 /mnt - btrfsctl -s new_subvol_name /mnt - btrfsctl -s snapshot_of_default /mnt/default - btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name - btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol - ls /mnt - default snapshot_of_a_snapshot snapshot_of_new_subvol - new_subvol_name snapshot_of_default +* btrfs: a single tool to manage the filesystems, refer to the manpage for more details - Snapshots and subvolumes cannot be deleted right now, but you can - rm -rf all the files and directories inside them. +* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem -btrfsck: do a limited check of the FS extent trees. +Other tools for specific tasks: -btrfs-debug-tree: print all of the FS metadata in text form. Example: +* btrfs-convert: in-place conversion from ext2/3/4 filesystems - btrfs-debug-tree /dev/sda2 >& big_output_file +* btrfs-image: dump filesystem metadata for debugging diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt deleted file mode 100644 index 6f83fa965b4b..000000000000 --- a/Documentation/gpio.txt +++ /dev/null @@ -1,775 +0,0 @@ -GPIO Interfaces - -This provides an overview of GPIO access conventions on Linux. - -These calls use the gpio_* naming prefix. No other calls should use that -prefix, or the related __gpio_* prefix. - - -What is a GPIO? -=============== -A "General Purpose Input/Output" (GPIO) is a flexible software-controlled -digital signal. They are provided from many kinds of chip, and are familiar -to Linux developers working with embedded and custom hardware. Each GPIO -represents a bit connected to a particular pin, or "ball" on Ball Grid Array -(BGA) packages. Board schematics show which external hardware connects to -which GPIOs. Drivers can be written generically, so that board setup code -passes such pin configuration data to drivers. - -System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every -non-dedicated pin can be configured as a GPIO; and most chips have at least -several dozen of them. Programmable logic devices (like FPGAs) can easily -provide GPIOs; multifunction chips like power managers, and audio codecs -often have a few such pins to help with pin scarcity on SOCs; and there are -also "GPIO Expander" chips that connect using the I2C or SPI serial busses. -Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS -firmware knowing how they're used). - -The exact capabilities of GPIOs vary between systems. Common options: - - - Output values are writable (high=1, low=0). Some chips also have - options about how that value is driven, so that for example only one - value might be driven ... supporting "wire-OR" and similar schemes - for the other value (notably, "open drain" signaling). - - - Input values are likewise readable (1, 0). Some chips support readback - of pins configured as "output", which is very useful in such "wire-OR" - cases (to support bidirectional signaling). GPIO controllers may have - input de-glitch/debounce logic, sometimes with software controls. - - - Inputs can often be used as IRQ signals, often edge triggered but - sometimes level triggered. Such IRQs may be configurable as system - wakeup events, to wake the system from a low power state. - - - Usually a GPIO will be configurable as either input or output, as needed - by different product boards; single direction ones exist too. - - - Most GPIOs can be accessed while holding spinlocks, but those accessed - through a serial bus normally can't. Some systems support both types. - -On a given board each GPIO is used for one specific purpose like monitoring -MMC/SD card insertion/removal, detecting card writeprotect status, driving -a LED, configuring a transceiver, bitbanging a serial bus, poking a hardware -watchdog, sensing a switch, and so on. - - -GPIO conventions -================ -Note that this is called a "convention" because you don't need to do it this -way, and it's no crime if you don't. There **are** cases where portability -is not the main issue; GPIOs are often used for the kind of board-specific -glue logic that may even change between board revisions, and can't ever be -used on a board that's wired differently. Only least-common-denominator -functionality can be very portable. Other features are platform-specific, -and that can be critical for glue logic. - -Plus, this doesn't require any implementation framework, just an interface. -One platform might implement it as simple inline functions accessing chip -registers; another might implement it by delegating through abstractions -used for several very different kinds of GPIO controller. (There is some -optional code supporting such an implementation strategy, described later -in this document, but drivers acting as clients to the GPIO interface must -not care how it's implemented.) - -That said, if the convention is supported on their platform, drivers should -use it when possible. Platforms must select ARCH_REQUIRE_GPIOLIB or -ARCH_WANT_OPTIONAL_GPIOLIB in their Kconfig. Drivers that can't work without -standard GPIO calls should have Kconfig entries which depend on GPIOLIB. The -GPIO calls are available, either as "real code" or as optimized-away stubs, -when drivers use the include file: - - #include - -If you stick to this convention then it'll be easier for other developers to -see what your code is doing, and help maintain it. - -Note that these operations include I/O barriers on platforms which need to -use them; drivers don't need to add them explicitly. - - -Identifying GPIOs ------------------ -GPIOs are identified by unsigned integers in the range 0..MAX_INT. That -reserves "negative" numbers for other purposes like marking signals as -"not available on this board", or indicating faults. Code that doesn't -touch the underlying hardware treats these integers as opaque cookies. - -Platforms define how they use those integers, and usually #define symbols -for the GPIO lines so that board-specific setup code directly corresponds -to the relevant schematics. In contrast, drivers should only use GPIO -numbers passed to them from that setup code, using platform_data to hold -board-specific pin configuration data (along with other board specific -data they need). That avoids portability problems. - -So for example one platform uses numbers 32-159 for GPIOs; while another -uses numbers 0..63 with one set of GPIO controllers, 64-79 with another -type of GPIO controller, and on one particular board 80-95 with an FPGA. -The numbers need not be contiguous; either of those platforms could also -use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. - -If you want to initialize a structure with an invalid GPIO number, use -some negative number (perhaps "-EINVAL"); that will never be valid. To -test if such number from such a structure could reference a GPIO, you -may use this predicate: - - int gpio_is_valid(int number); - -A number that's not valid will be rejected by calls which may request -or free GPIOs (see below). Other numbers may also be rejected; for -example, a number might be valid but temporarily unused on a given board. - -Whether a platform supports multiple GPIO controllers is a platform-specific -implementation issue, as are whether that support can leave "holes" in the space -of GPIO numbers, and whether new controllers can be added at runtime. Such issues -can affect things including whether adjacent GPIO numbers are both valid. - -Using GPIOs ------------ -The first thing a system should do with a GPIO is allocate it, using -the gpio_request() call; see later. - -One of the next things to do with a GPIO, often in board setup code when -setting up a platform_device using the GPIO, is mark its direction: - - /* set as input or output, returning 0 or negative errno */ - int gpio_direction_input(unsigned gpio); - int gpio_direction_output(unsigned gpio, int value); - -The return value is zero for success, else a negative errno. It should -be checked, since the get/set calls don't have error returns and since -misconfiguration is possible. You should normally issue these calls from -a task context. However, for spinlock-safe GPIOs it's OK to use them -before tasking is enabled, as part of early board setup. - -For output GPIOs, the value provided becomes the initial output value. -This helps avoid signal glitching during system startup. - -For compatibility with legacy interfaces to GPIOs, setting the direction -of a GPIO implicitly requests that GPIO (see below) if it has not been -requested already. That compatibility is being removed from the optional -gpiolib framework. - -Setting the direction can fail if the GPIO number is invalid, or when -that particular GPIO can't be used in that mode. It's generally a bad -idea to rely on boot firmware to have set the direction correctly, since -it probably wasn't validated to do more than boot Linux. (Similarly, -that board setup code probably needs to multiplex that pin as a GPIO, -and configure pullups/pulldowns appropriately.) - - -Spinlock-Safe GPIO access -------------------------- -Most GPIO controllers can be accessed with memory read/write instructions. -Those don't need to sleep, and can safely be done from inside hard -(nonthreaded) IRQ handlers and similar contexts. - -Use the following calls to access such GPIOs, -for which gpio_cansleep() will always return false (see below): - - /* GPIO INPUT: return zero or nonzero */ - int gpio_get_value(unsigned gpio); - - /* GPIO OUTPUT */ - void gpio_set_value(unsigned gpio, int value); - -The values are boolean, zero for low, nonzero for high. When reading the -value of an output pin, the value returned should be what's seen on the -pin ... that won't always match the specified output value, because of -issues including open-drain signaling and output latencies. - -The get/set calls have no error returns because "invalid GPIO" should have -been reported earlier from gpio_direction_*(). However, note that not all -platforms can read the value of output pins; those that can't should always -return zero. Also, using these calls for GPIOs that can't safely be accessed -without sleeping (see below) is an error. - -Platform-specific implementations are encouraged to optimize the two -calls to access the GPIO value in cases where the GPIO number (and for -output, value) are constant. It's normal for them to need only a couple -of instructions in such cases (reading or writing a hardware register), -and not to need spinlocks. Such optimized calls can make bitbanging -applications a lot more efficient (in both space and time) than spending -dozens of instructions on subroutine calls. - - -GPIO access that may sleep --------------------------- -Some GPIO controllers must be accessed using message based busses like I2C -or SPI. Commands to read or write those GPIO values require waiting to -get to the head of a queue to transmit a command and get its response. -This requires sleeping, which can't be done from inside IRQ handlers. - -Platforms that support this type of GPIO distinguish them from other GPIOs -by returning nonzero from this call (which requires a valid GPIO number, -which should have been previously allocated with gpio_request): - - int gpio_cansleep(unsigned gpio); - -To access such GPIOs, a different set of accessors is defined: - - /* GPIO INPUT: return zero or nonzero, might sleep */ - int gpio_get_value_cansleep(unsigned gpio); - - /* GPIO OUTPUT, might sleep */ - void gpio_set_value_cansleep(unsigned gpio, int value); - - -Accessing such GPIOs requires a context which may sleep, for example -a threaded IRQ handler, and those accessors must be used instead of -spinlock-safe accessors without the cansleep() name suffix. - -Other than the fact that these accessors might sleep, and will work -on GPIOs that can't be accessed from hardIRQ handlers, these calls act -the same as the spinlock-safe calls. - - ** IN ADDITION ** calls to setup and configure such GPIOs must be made -from contexts which may sleep, since they may need to access the GPIO -controller chip too: (These setup calls are usually made from board -setup or driver probe/teardown code, so this is an easy constraint.) - - gpio_direction_input() - gpio_direction_output() - gpio_request() - -## gpio_request_one() -## gpio_request_array() -## gpio_free_array() - - gpio_free() - gpio_set_debounce() - - - -Claiming and Releasing GPIOs ----------------------------- -To help catch system configuration errors, two calls are defined. - - /* request GPIO, returning 0 or negative errno. - * non-null labels may be useful for diagnostics. - */ - int gpio_request(unsigned gpio, const char *label); - - /* release previously-claimed GPIO */ - void gpio_free(unsigned gpio); - -Passing invalid GPIO numbers to gpio_request() will fail, as will requesting -GPIOs that have already been claimed with that call. The return value of -gpio_request() must be checked. You should normally issue these calls from -a task context. However, for spinlock-safe GPIOs it's OK to request GPIOs -before tasking is enabled, as part of early board setup. - -These calls serve two basic purposes. One is marking the signals which -are actually in use as GPIOs, for better diagnostics; systems may have -several hundred potential GPIOs, but often only a dozen are used on any -given board. Another is to catch conflicts, identifying errors when -(a) two or more drivers wrongly think they have exclusive use of that -signal, or (b) something wrongly believes it's safe to remove drivers -needed to manage a signal that's in active use. That is, requesting a -GPIO can serve as a kind of lock. - -Some platforms may also use knowledge about what GPIOs are active for -power management, such as by powering down unused chip sectors and, more -easily, gating off unused clocks. - -For GPIOs that use pins known to the pinctrl subsystem, that subsystem should -be informed of their use; a gpiolib driver's .request() operation may call -pinctrl_request_gpio(), and a gpiolib driver's .free() operation may call -pinctrl_free_gpio(). The pinctrl subsystem allows a pinctrl_request_gpio() -to succeed concurrently with a pin or pingroup being "owned" by a device for -pin multiplexing. - -Any programming of pin multiplexing hardware that is needed to route the -GPIO signal to the appropriate pin should occur within a GPIO driver's -.direction_input() or .direction_output() operations, and occur after any -setup of an output GPIO's value. This allows a glitch-free migration from a -pin's special function to GPIO. This is sometimes required when using a GPIO -to implement a workaround on signals typically driven by a non-GPIO HW block. - -Some platforms allow some or all GPIO signals to be routed to different pins. -Similarly, other aspects of the GPIO or pin may need to be configured, such as -pullup/pulldown. Platform software should arrange that any such details are -configured prior to gpio_request() being called for those GPIOs, e.g. using -the pinctrl subsystem's mapping table, so that GPIO users need not be aware -of these details. - -Also note that it's your responsibility to have stopped using a GPIO -before you free it. - -Considering in most cases GPIOs are actually configured right after they -are claimed, three additional calls are defined: - - /* request a single GPIO, with initial configuration specified by - * 'flags', identical to gpio_request() wrt other arguments and - * return value - */ - int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); - - /* request multiple GPIOs in a single call - */ - int gpio_request_array(struct gpio *array, size_t num); - - /* release multiple GPIOs in a single call - */ - void gpio_free_array(struct gpio *array, size_t num); - -where 'flags' is currently defined to specify the following properties: - - * GPIOF_DIR_IN - to configure direction as input - * GPIOF_DIR_OUT - to configure direction as output - - * GPIOF_INIT_LOW - as output, set initial level to LOW - * GPIOF_INIT_HIGH - as output, set initial level to HIGH - * GPIOF_OPEN_DRAIN - gpio pin is open drain type. - * GPIOF_OPEN_SOURCE - gpio pin is open source type. - - * GPIOF_EXPORT_DIR_FIXED - export gpio to sysfs, keep direction - * GPIOF_EXPORT_DIR_CHANGEABLE - also export, allow changing direction - -since GPIOF_INIT_* are only valid when configured as output, so group valid -combinations as: - - * GPIOF_IN - configure as input - * GPIOF_OUT_INIT_LOW - configured as output, initial level LOW - * GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH - -When setting the flag as GPIOF_OPEN_DRAIN then it will assume that pins is -open drain type. Such pins will not be driven to 1 in output mode. It is -require to connect pull-up on such pins. By enabling this flag, gpio lib will -make the direction to input when it is asked to set value of 1 in output mode -to make the pin HIGH. The pin is make to LOW by driving value 0 in output mode. - -When setting the flag as GPIOF_OPEN_SOURCE then it will assume that pins is -open source type. Such pins will not be driven to 0 in output mode. It is -require to connect pull-down on such pin. By enabling this flag, gpio lib will -make the direction to input when it is asked to set value of 0 in output mode -to make the pin LOW. The pin is make to HIGH by driving value 1 in output mode. - -In the future, these flags can be extended to support more properties. - -Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is -introduced to encapsulate all three fields as: - - struct gpio { - unsigned gpio; - unsigned long flags; - const char *label; - }; - -A typical example of usage: - - static struct gpio leds_gpios[] = { - { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */ - { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* default to OFF */ - { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* default to OFF */ - { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* default to OFF */ - { ... }, - }; - - err = gpio_request_one(31, GPIOF_IN, "Reset Button"); - if (err) - ... - - err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios)); - if (err) - ... - - gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios)); - - -GPIOs mapped to IRQs --------------------- -GPIO numbers are unsigned integers; so are IRQ numbers. These make up -two logically distinct namespaces (GPIO 0 need not use IRQ 0). You can -map between them using calls like: - - /* map GPIO numbers to IRQ numbers */ - int gpio_to_irq(unsigned gpio); - - /* map IRQ numbers to GPIO numbers (avoid using this) */ - int irq_to_gpio(unsigned irq); - -Those return either the corresponding number in the other namespace, or -else a negative errno code if the mapping can't be done. (For example, -some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO -number that wasn't set up as an input using gpio_direction_input(), or -to use an IRQ number that didn't originally come from gpio_to_irq(). - -These two mapping calls are expected to cost on the order of a single -addition or subtraction. They're not allowed to sleep. - -Non-error values returned from gpio_to_irq() can be passed to request_irq() -or free_irq(). They will often be stored into IRQ resources for platform -devices, by the board-specific initialization code. Note that IRQ trigger -options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are -system wakeup capabilities. - -Non-error values returned from irq_to_gpio() would most commonly be used -with gpio_get_value(), for example to initialize or update driver state -when the IRQ is edge-triggered. Note that some platforms don't support -this reverse mapping, so you should avoid using it. - - -Emulating Open Drain Signals ----------------------------- -Sometimes shared signals need to use "open drain" signaling, where only the -low signal level is actually driven. (That term applies to CMOS transistors; -"open collector" is used for TTL.) A pullup resistor causes the high signal -level. This is sometimes called a "wire-AND"; or more practically, from the -negative logic (low=true) perspective this is a "wire-OR". - -One common example of an open drain signal is a shared active-low IRQ line. -Also, bidirectional data bus signals sometimes use open drain signals. - -Some GPIO controllers directly support open drain outputs; many don't. When -you need open drain signaling but your hardware doesn't directly support it, -there's a common idiom you can use to emulate it with any GPIO pin that can -be used as either an input or an output: - - LOW: gpio_direction_output(gpio, 0) ... this drives the signal - and overrides the pullup. - - HIGH: gpio_direction_input(gpio) ... this turns off the output, - so the pullup (or some other device) controls the signal. - -If you are "driving" the signal high but gpio_get_value(gpio) reports a low -value (after the appropriate rise time passes), you know some other component -is driving the shared signal low. That's not necessarily an error. As one -common example, that's how I2C clocks are stretched: a slave that needs a -slower clock delays the rising edge of SCK, and the I2C master adjusts its -signaling rate accordingly. - - -GPIO controllers and the pinctrl subsystem ------------------------------------------- - -A GPIO controller on a SOC might be tightly coupled with the pinctrl -subsystem, in the sense that the pins can be used by other functions -together with an optional gpio feature. We have already covered the -case where e.g. a GPIO controller need to reserve a pin or set the -direction of a pin by calling any of: - -pinctrl_request_gpio() -pinctrl_free_gpio() -pinctrl_gpio_direction_input() -pinctrl_gpio_direction_output() - -But how does the pin control subsystem cross-correlate the GPIO -numbers (which are a global business) to a certain pin on a certain -pin controller? - -This is done by registering "ranges" of pins, which are essentially -cross-reference tables. These are described in -Documentation/pinctrl.txt - -While the pin allocation is totally managed by the pinctrl subsystem, -gpio (under gpiolib) is still maintained by gpio drivers. It may happen -that different pin ranges in a SoC is managed by different gpio drivers. - -This makes it logical to let gpio drivers announce their pin ranges to -the pin ctrl subsystem before it will call 'pinctrl_request_gpio' in order -to request the corresponding pin to be prepared by the pinctrl subsystem -before any gpio usage. - -For this, the gpio controller can register its pin range with pinctrl -subsystem. There are two ways of doing it currently: with or without DT. - -For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. - -For non-DT support, user can call gpiochip_add_pin_range() with appropriate -parameters to register a range of gpio pins with a pinctrl driver. For this -exact name string of pinctrl device has to be passed as one of the -argument to this routine. - - -What do these conventions omit? -=============================== -One of the biggest things these conventions omit is pin multiplexing, since -this is highly chip-specific and nonportable. One platform might not need -explicit multiplexing; another might have just two options for use of any -given pin; another might have eight options per pin; another might be able -to route a given GPIO to any one of several pins. (Yes, those examples all -come from systems that run Linux today.) - -Related to multiplexing is configuration and enabling of the pullups or -pulldowns integrated on some platforms. Not all platforms support them, -or support them in the same way; and any given board might use external -pullups (or pulldowns) so that the on-chip ones should not be used. -(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) -Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a -platform-specific issue, as are models like (not) having a one-to-one -correspondence between configurable pins and GPIOs. - -There are other system-specific mechanisms that are not specified here, -like the aforementioned options for input de-glitching and wire-OR output. -Hardware may support reading or writing GPIOs in gangs, but that's usually -configuration dependent: for GPIOs sharing the same bank. (GPIOs are -commonly grouped in banks of 16 or 32, with a given SOC having several such -banks.) Some systems can trigger IRQs from output GPIOs, or read values -from pins not managed as GPIOs. Code relying on such mechanisms will -necessarily be nonportable. - -Dynamic definition of GPIOs is not currently standard; for example, as -a side effect of configuring an add-on board with some GPIO expanders. - - -GPIO implementor's framework (OPTIONAL) -======================================= -As noted earlier, there is an optional implementation framework making it -easier for platforms to support different kinds of GPIO controller using -the same programming interface. This framework is called "gpiolib". - -As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file -will be found there. That will list all the controllers registered through -this framework, and the state of the GPIOs currently in use. - - -Controller Drivers: gpio_chip ------------------------------ -In this framework each GPIO controller is packaged as a "struct gpio_chip" -with information common to each controller of that type: - - - methods to establish GPIO direction - - methods used to access GPIO values - - flag saying whether calls to its methods may sleep - - optional debugfs dump method (showing extra state like pullup config) - - label for diagnostics - -There is also per-instance data, which may come from device.platform_data: -the number of its first GPIO, and how many GPIOs it exposes. - -The code implementing a gpio_chip should support multiple instances of the -controller, possibly using the driver model. That code will configure each -gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be -rare; use gpiochip_remove() when it is unavoidable. - -Most often a gpio_chip is part of an instance-specific structure with state -not exposed by the GPIO interfaces, such as addressing, power management, -and more. Chips such as codecs will have complex non-GPIO state. - -Any debugfs dump method should normally ignore signals which haven't been -requested as GPIOs. They can use gpiochip_is_requested(), which returns -either NULL or the label associated with that GPIO when it was requested. - - -Platform Support ----------------- -To support this framework, a platform's Kconfig will "select" either -ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB -and arrange that its includes and defines -three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). - -It may also provide a custom value for ARCH_NR_GPIOS, so that it better -reflects the number of GPIOs in actual use on that platform, without -wasting static table space. (It should count both built-in/SoC GPIOs and -also ones on GPIO expanders. - -ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled -into the kernel on that architecture. - -ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user -can enable it and build it into the kernel optionally. - -If neither of these options are selected, the platform does not support -GPIOs through GPIO-lib and the code cannot be enabled by the user. - -Trivial implementations of those functions can directly use framework -code, which always dispatches through the gpio_chip: - - #define gpio_get_value __gpio_get_value - #define gpio_set_value __gpio_set_value - #define gpio_cansleep __gpio_cansleep - -Fancier implementations could instead define those as inline functions with -logic optimizing access to specific SOC-based GPIOs. For example, if the -referenced GPIO is the constant "12", getting or setting its value could -cost as little as two or three instructions, never sleeping. When such an -optimization is not possible those calls must delegate to the framework -code, costing at least a few dozen instructions. For bitbanged I/O, such -instruction savings can be significant. - -For SOCs, platform-specific code defines and registers gpio_chip instances -for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to -match chip vendor documentation, and directly match board schematics. They -may well start at zero and go up to a platform-specific limit. Such GPIOs -are normally integrated into platform initialization to make them always be -available, from arch_initcall() or earlier; they can often serve as IRQs. - - -Board Support -------------- -For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi -function devices, FPGAs or CPLDs -- most often board-specific code handles -registering controller devices and ensures that their drivers know what GPIO -numbers to use with gpiochip_add(). Their numbers often start right after -platform-specific GPIOs. - -For example, board setup code could create structures identifying the range -of GPIOs that chip will expose, and passes them to each GPIO expander chip -using platform_data. Then the chip driver's probe() routine could pass that -data to gpiochip_add(). - -Initialization order can be important. For example, when a device relies on -an I2C-based GPIO, its probe() routine should only be called after that GPIO -becomes available. That may mean the device should not be registered until -calls for that GPIO can work. One way to address such dependencies is for -such gpio_chip controllers to provide setup() and teardown() callbacks to -board specific code; those board specific callbacks would register devices -once all the necessary resources are available, and remove them later when -the GPIO controller device becomes unavailable. - - -Sysfs Interface for Userspace (OPTIONAL) -======================================== -Platforms which use the "gpiolib" implementors framework may choose to -configure a sysfs user interface to GPIOs. This is different from the -debugfs interface, since it provides control over GPIO direction and -value instead of just showing a gpio state summary. Plus, it could be -present on production systems without debugging support. - -Given appropriate hardware documentation for the system, userspace could -know for example that GPIO #23 controls the write protect line used to -protect boot loader segments in flash memory. System upgrade procedures -may need to temporarily remove that protection, first importing a GPIO, -then changing its output state, then updating the code before re-enabling -the write protection. In normal use, GPIO #23 would never be touched, -and the kernel would have no need to know about it. - -Again depending on appropriate hardware documentation, on some systems -userspace GPIO can be used to determine system configuration data that -standard kernels won't know about. And for some tasks, simple userspace -GPIO drivers could be all that the system really needs. - -Note that standard kernel drivers exist for common "LEDs and Buttons" -GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those -instead of talking directly to the GPIOs; they integrate with kernel -frameworks better than your userspace code could. - - -Paths in Sysfs --------------- -There are three kinds of entry in /sys/class/gpio: - - - Control interfaces used to get userspace control over GPIOs; - - - GPIOs themselves; and - - - GPIO controllers ("gpio_chip" instances). - -That's in addition to standard files including the "device" symlink. - -The control interfaces are write-only: - - /sys/class/gpio/ - - "export" ... Userspace may ask the kernel to export control of - a GPIO to userspace by writing its number to this file. - - Example: "echo 19 > export" will create a "gpio19" node - for GPIO #19, if that's not requested by kernel code. - - "unexport" ... Reverses the effect of exporting to userspace. - - Example: "echo 19 > unexport" will remove a "gpio19" - node exported using the "export" file. - -GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) -and have the following read/write attributes: - - /sys/class/gpio/gpioN/ - - "direction" ... reads as either "in" or "out". This value may - normally be written. Writing as "out" defaults to - initializing the value as low. To ensure glitch free - operation, values "low" and "high" may be written to - configure the GPIO as an output with that initial value. - - Note that this attribute *will not exist* if the kernel - doesn't support changing the direction of a GPIO, or - it was exported by kernel code that didn't explicitly - allow userspace to reconfigure this GPIO's direction. - - "value" ... reads as either 0 (low) or 1 (high). If the GPIO - is configured as an output, this value may be written; - any nonzero value is treated as high. - - If the pin can be configured as interrupt-generating interrupt - and if it has been configured to generate interrupts (see the - description of "edge"), you can poll(2) on that file and - poll(2) will return whenever the interrupt was triggered. If - you use poll(2), set the events POLLPRI and POLLERR. If you - use select(2), set the file descriptor in exceptfds. After - poll(2) returns, either lseek(2) to the beginning of the sysfs - file and read the new value or close the file and re-open it - to read the value. - - "edge" ... reads as either "none", "rising", "falling", or - "both". Write these strings to select the signal edge(s) - that will make poll(2) on the "value" file return. - - This file exists only if the pin can be configured as an - interrupt generating input pin. - - "active_low" ... reads as either 0 (false) or 1 (true). Write - any nonzero value to invert the value attribute both - for reading and writing. Existing and subsequent - poll(2) support configuration via the edge attribute - for "rising" and "falling" edges will follow this - setting. - -GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the -controller implementing GPIOs starting at #42) and have the following -read-only attributes: - - /sys/class/gpio/gpiochipN/ - - "base" ... same as N, the first GPIO managed by this chip - - "label" ... provided for diagnostics (not always unique) - - "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) - -Board documentation should in most cases cover what GPIOs are used for -what purposes. However, those numbers are not always stable; GPIOs on -a daughtercard might be different depending on the base board being used, -or other cards in the stack. In such cases, you may need to use the -gpiochip nodes (possibly in conjunction with schematics) to determine -the correct GPIO number to use for a given signal. - - -Exporting from Kernel code --------------------------- -Kernel code can explicitly manage exports of GPIOs which have already been -requested using gpio_request(): - - /* export the GPIO to userspace */ - int gpio_export(unsigned gpio, bool direction_may_change); - - /* reverse gpio_export() */ - void gpio_unexport(); - - /* create a sysfs link to an exported GPIO node */ - int gpio_export_link(struct device *dev, const char *name, - unsigned gpio) - - /* change the polarity of a GPIO node in sysfs */ - int gpio_sysfs_set_active_low(unsigned gpio, int value); - -After a kernel driver requests a GPIO, it may only be made available in -the sysfs interface by gpio_export(). The driver can control whether the -signal direction may change. This helps drivers prevent userspace code -from accidentally clobbering important system state. - -This explicit exporting can help with debugging (by making some kinds -of experiments easier), or can provide an always-there interface that's -suitable for documenting as part of a board support package. - -After the GPIO has been exported, gpio_export_link() allows creating -symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can -use this to provide the interface under their own device in sysfs with -a descriptive name. - -Drivers can use gpio_sysfs_set_active_low() to hide GPIO line polarity -differences between boards from user space. This only affects the -sysfs interface. Polarity change can be done both before and after -gpio_export(), and previously enabled poll(2) support for either -rising or falling edge will be reconfigured to follow this setting. diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt new file mode 100644 index 000000000000..0d03506f2cc5 --- /dev/null +++ b/Documentation/gpio/board.txt @@ -0,0 +1,115 @@ +GPIO Mappings +============= + +This document explains how GPIOs can be assigned to given devices and functions. +Note that it only applies to the new descriptor-based interface. For a +description of the deprecated integer-based GPIO interface please refer to +gpio-legacy.txt (actually, there is no real mapping possible with the old +interface; you just fetch an integer from somewhere and request the +corresponding GPIO. + +Platforms that make use of GPIOs must select ARCH_REQUIRE_GPIOLIB (if GPIO usage +is mandatory) or ARCH_WANT_OPTIONAL_GPIOLIB (if GPIO support can be omitted) in +their Kconfig. Then, how GPIOs are mapped depends on what the platform uses to +describe its hardware layout. Currently, mappings can be defined through device +tree, ACPI, and platform data. + +Device Tree +----------- +GPIOs can easily be mapped to devices and functions in the device tree. The +exact way to do it depends on the GPIO controller providing the GPIOs, see the +device tree bindings for your controller. + +GPIOs mappings are defined in the consumer device's node, in a property named +-gpios, where is the function the driver will request +through gpiod_get(). For example: + + foo_device { + compatible = "acme,foo"; + ... + led-gpios = <&gpio 15 GPIO_ACTIVE_HIGH>, /* red */ + <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ + <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ + + power-gpio = <&gpio 1 GPIO_ACTIVE_LOW>; + }; + +This property will make GPIOs 15, 16 and 17 available to the driver under the +"led" function, and GPIO 1 as the "power" GPIO: + + struct gpio_desc *red, *green, *blue, *power; + + red = gpiod_get_index(dev, "led", 0); + green = gpiod_get_index(dev, "led", 1); + blue = gpiod_get_index(dev, "led", 2); + + power = gpiod_get(dev, "power"); + +The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. +gpiod_is_active_low(power) will be true). + +ACPI +---- +ACPI does not support function names for GPIOs. Therefore, only the "idx" +argument of gpiod_get_index() is useful to discriminate between GPIOs assigned +to a device. The "con_id" argument can still be set for debugging purposes (it +will appear under error messages as well as debug and sysfs nodes). + +Platform Data +------------- +Finally, GPIOs can be bound to devices and functions using platform data. Board +files that desire to do so need to include the following header: + + #include + +GPIOs are mapped by the means of tables of lookups, containing instances of the +gpiod_lookup structure. Two macros are defined to help declaring such mappings: + + GPIO_LOOKUP(chip_label, chip_hwnum, dev_id, con_id, flags) + GPIO_LOOKUP_IDX(chip_label, chip_hwnum, dev_id, con_id, idx, flags) + +where + + - chip_label is the label of the gpiod_chip instance providing the GPIO + - chip_hwnum is the hardware number of the GPIO within the chip + - dev_id is the identifier of the device that will make use of this GPIO. If + NULL, the GPIO will be available to all devices. + - con_id is the name of the GPIO function from the device point of view. It + can be NULL. + - idx is the index of the GPIO within the function. + - flags is defined to specify the following properties: + * GPIOF_ACTIVE_LOW - to configure the GPIO as active-low + * GPIOF_OPEN_DRAIN - GPIO pin is open drain type. + * GPIOF_OPEN_SOURCE - GPIO pin is open source type. + +In the future, these flags might be extended to support more properties. + +Note that GPIO_LOOKUP() is just a shortcut to GPIO_LOOKUP_IDX() where idx = 0. + +A lookup table can then be defined as follows: + + struct gpiod_lookup gpios_table[] = { + GPIO_LOOKUP_IDX("gpio.0", 15, "foo.0", "led", 0, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio.0", 16, "foo.0", "led", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio.0", 17, "foo.0", "led", 2, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio.0", 1, "foo.0", "power", GPIO_ACTIVE_LOW), + }; + +And the table can be added by the board code as follows: + + gpiod_add_table(gpios_table, ARRAY_SIZE(gpios_table)); + +The driver controlling "foo.0" will then be able to obtain its GPIOs as follows: + + struct gpio_desc *red, *green, *blue, *power; + + red = gpiod_get_index(dev, "led", 0); + green = gpiod_get_index(dev, "led", 1); + blue = gpiod_get_index(dev, "led", 2); + + power = gpiod_get(dev, "power"); + gpiod_direction_output(power, 1); + +Since the "power" GPIO is mapped as active-low, its actual signal will be 0 +after this code. Contrary to the legacy integer GPIO interface, the active-low +property is handled during mapping and is thus transparent to GPIO consumers. diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt new file mode 100644 index 000000000000..07c74a3765a0 --- /dev/null +++ b/Documentation/gpio/consumer.txt @@ -0,0 +1,197 @@ +GPIO Descriptor Consumer Interface +================================== + +This document describes the consumer interface of the GPIO framework. Note that +it describes the new descriptor-based interface. For a description of the +deprecated integer-based GPIO interface please refer to gpio-legacy.txt. + + +Guidelines for GPIOs consumers +============================== + +Drivers that can't work without standard GPIO calls should have Kconfig entries +that depend on GPIOLIB. The functions that allow a driver to obtain and use +GPIOs are available by including the following file: + + #include + +All the functions that work with the descriptor-based GPIO interface are +prefixed with gpiod_. The gpio_ prefix is used for the legacy interface. No +other function in the kernel should use these prefixes. + + +Obtaining and Disposing GPIOs +============================= + +With the descriptor-based interface, GPIOs are identified with an opaque, +non-forgeable handler that must be obtained through a call to one of the +gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the +device that will use the GPIO and the function the requested GPIO is supposed to +fulfill: + + struct gpio_desc *gpiod_get(struct device *dev, const char *con_id) + +If a function is implemented by using several GPIOs together (e.g. a simple LED +device that displays digits), an additional index argument can be specified: + + struct gpio_desc *gpiod_get_index(struct device *dev, + const char *con_id, unsigned int idx) + +Both functions return either a valid GPIO descriptor, or an error code checkable +with IS_ERR(). They will never return a NULL pointer. + +Device-managed variants of these functions are also defined: + + struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id) + + struct gpio_desc *devm_gpiod_get_index(struct device *dev, + const char *con_id, + unsigned int idx) + +A GPIO descriptor can be disposed of using the gpiod_put() function: + + void gpiod_put(struct gpio_desc *desc) + +It is strictly forbidden to use a descriptor after calling this function. The +device-managed variant is, unsurprisingly: + + void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) + + +Using GPIOs +=========== + +Setting Direction +----------------- +The first thing a driver must do with a GPIO is setting its direction. This is +done by invoking one of the gpiod_direction_*() functions: + + int gpiod_direction_input(struct gpio_desc *desc) + int gpiod_direction_output(struct gpio_desc *desc, int value) + +The return value is zero for success, else a negative errno. It should be +checked, since the get/set calls don't return errors and since misconfiguration +is possible. You should normally issue these calls from a task context. However, +for spinlock-safe GPIOs it is OK to use them before tasking is enabled, as part +of early board setup. + +For output GPIOs, the value provided becomes the initial output value. This +helps avoid signal glitching during system startup. + +A driver can also query the current direction of a GPIO: + + int gpiod_get_direction(const struct gpio_desc *desc) + +This function will return either GPIOF_DIR_IN or GPIOF_DIR_OUT. + +Be aware that there is no default direction for GPIOs. Therefore, **using a GPIO +without setting its direction first is illegal and will result in undefined +behavior!** + + +Spinlock-Safe GPIO Access +------------------------- +Most GPIO controllers can be accessed with memory read/write instructions. Those +don't need to sleep, and can safely be done from inside hard (non-threaded) IRQ +handlers and similar contexts. + +Use the following calls to access GPIOs from an atomic context: + + int gpiod_get_value(const struct gpio_desc *desc); + void gpiod_set_value(struct gpio_desc *desc, int value); + +The values are boolean, zero for low, nonzero for high. When reading the value +of an output pin, the value returned should be what's seen on the pin. That +won't always match the specified output value, because of issues including +open-drain signaling and output latencies. + +The get/set calls do not return errors because "invalid GPIO" should have been +reported earlier from gpiod_direction_*(). However, note that not all platforms +can read the value of output pins; those that can't should always return zero. +Also, using these calls for GPIOs that can't safely be accessed without sleeping +(see below) is an error. + + +GPIO Access That May Sleep +-------------------------- +Some GPIO controllers must be accessed using message based buses like I2C or +SPI. Commands to read or write those GPIO values require waiting to get to the +head of a queue to transmit a command and get its response. This requires +sleeping, which can't be done from inside IRQ handlers. + +Platforms that support this type of GPIO distinguish them from other GPIOs by +returning nonzero from this call: + + int gpiod_cansleep(const struct gpio_desc *desc) + +To access such GPIOs, a different set of accessors is defined: + + int gpiod_get_value_cansleep(const struct gpio_desc *desc) + void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) + +Accessing such GPIOs requires a context which may sleep, for example a threaded +IRQ handler, and those accessors must be used instead of spinlock-safe +accessors without the cansleep() name suffix. + +Other than the fact that these accessors might sleep, and will work on GPIOs +that can't be accessed from hardIRQ handlers, these calls act the same as the +spinlock-safe calls. + + +Active-low State and Raw GPIO Values +------------------------------------ +Device drivers like to manage the logical state of a GPIO, i.e. the value their +device will actually receive, no matter what lies between it and the GPIO line. +In some cases, it might make sense to control the actual GPIO line value. The +following set of calls ignore the active-low property of a GPIO and work on the +raw line value: + + int gpiod_get_raw_value(const struct gpio_desc *desc) + void gpiod_set_raw_value(struct gpio_desc *desc, int value) + int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) + void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) + +The active-low state of a GPIO can also be queried using the following call: + + int gpiod_is_active_low(const struct gpio_desc *desc) + +Note that these functions should only be used with great moderation ; a driver +should not have to care about the physical line level. + +GPIOs mapped to IRQs +-------------------- +GPIO lines can quite often be used as IRQs. You can get the IRQ number +corresponding to a given GPIO using the following call: + + int gpiod_to_irq(const struct gpio_desc *desc) + +It will return an IRQ number, or an negative errno code if the mapping can't be +done (most likely because that particular GPIO cannot be used as IRQ). It is an +unchecked error to use a GPIO that wasn't set up as an input using +gpiod_direction_input(), or to use an IRQ number that didn't originally come +from gpiod_to_irq(). gpiod_to_irq() is not allowed to sleep. + +Non-error values returned from gpiod_to_irq() can be passed to request_irq() or +free_irq(). They will often be stored into IRQ resources for platform devices, +by the board-specific initialization code. Note that IRQ trigger options are +part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are system wakeup +capabilities. + + +Interacting With the Legacy GPIO Subsystem +========================================== +Many kernel subsystems still handle GPIOs using the legacy integer-based +interface. Although it is strongly encouraged to upgrade them to the safer +descriptor-based API, the following two functions allow you to convert a GPIO +descriptor into the GPIO integer namespace and vice-versa: + + int desc_to_gpio(const struct gpio_desc *desc) + struct gpio_desc *gpio_to_desc(unsigned gpio) + +The GPIO number returned by desc_to_gpio() can be safely used as long as the +GPIO descriptor has not been freed. All the same, a GPIO number passed to +gpio_to_desc() must have been properly acquired, and usage of the returned GPIO +descriptor is only possible after the GPIO number has been released. + +Freeing a GPIO obtained by one API with the other API is forbidden and an +unchecked error. diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt new file mode 100644 index 000000000000..9da0bfa74781 --- /dev/null +++ b/Documentation/gpio/driver.txt @@ -0,0 +1,75 @@ +GPIO Descriptor Driver Interface +================================ + +This document serves as a guide for GPIO chip drivers writers. Note that it +describes the new descriptor-based interface. For a description of the +deprecated integer-based GPIO interface please refer to gpio-legacy.txt. + +Each GPIO controller driver needs to include the following header, which defines +the structures used to define a GPIO driver: + + #include + + +Internal Representation of GPIOs +================================ + +Inside a GPIO driver, individual GPIOs are identified by their hardware number, +which is a unique number between 0 and n, n being the number of GPIOs managed by +the chip. This number is purely internal: the hardware number of a particular +GPIO descriptor is never made visible outside of the driver. + +On top of this internal number, each GPIO also need to have a global number in +the integer GPIO namespace so that it can be used with the legacy GPIO +interface. Each chip must thus have a "base" number (which can be automatically +assigned), and for each GPIO the global number will be (base + hardware number). +Although the integer representation is considered deprecated, it still has many +users and thus needs to be maintained. + +So for example one platform could use numbers 32-159 for GPIOs, with a +controller defining 128 GPIOs at a "base" of 32 ; while another platform uses +numbers 0..63 with one set of GPIO controllers, 64-79 with another type of GPIO +controller, and on one particular board 80-95 with an FPGA. The numbers need not +be contiguous; either of those platforms could also use numbers 2000-2063 to +identify GPIOs in a bank of I2C GPIO expanders. + + +Controller Drivers: gpio_chip +============================= + +In the gpiolib framework each GPIO controller is packaged as a "struct +gpio_chip" (see linux/gpio/driver.h for its complete definition) with members +common to each controller of that type: + + - methods to establish GPIO direction + - methods used to access GPIO values + - method to return the IRQ number associated to a given GPIO + - flag saying whether calls to its methods may sleep + - optional debugfs dump method (showing extra state like pullup config) + - optional base number (will be automatically assigned if omitted) + - label for diagnostics and GPIOs mapping using platform data + +The code implementing a gpio_chip should support multiple instances of the +controller, possibly using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be rare; +use gpiochip_remove() when it is unavoidable. + +Most often a gpio_chip is part of an instance-specific structure with state not +exposed by the GPIO interfaces, such as addressing, power management, and more. +Chips such as codecs will have complex non-GPIO state. + +Any debugfs dump method should normally ignore signals which haven't been +requested as GPIOs. They can use gpiochip_is_requested(), which returns either +NULL or the label associated with that GPIO when it was requested. + +Locking IRQ usage +----------------- +Input GPIOs can be used as IRQ signals. When this happens, a driver is requested +to mark the GPIO as being used as an IRQ: + + int gpiod_lock_as_irq(struct gpio_desc *desc) + +This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock +is released: + + void gpiod_unlock_as_irq(struct gpio_desc *desc) diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt new file mode 100644 index 000000000000..6f83fa965b4b --- /dev/null +++ b/Documentation/gpio/gpio-legacy.txt @@ -0,0 +1,775 @@ +GPIO Interfaces + +This provides an overview of GPIO access conventions on Linux. + +These calls use the gpio_* naming prefix. No other calls should use that +prefix, or the related __gpio_* prefix. + + +What is a GPIO? +=============== +A "General Purpose Input/Output" (GPIO) is a flexible software-controlled +digital signal. They are provided from many kinds of chip, and are familiar +to Linux developers working with embedded and custom hardware. Each GPIO +represents a bit connected to a particular pin, or "ball" on Ball Grid Array +(BGA) packages. Board schematics show which external hardware connects to +which GPIOs. Drivers can be written generically, so that board setup code +passes such pin configuration data to drivers. + +System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every +non-dedicated pin can be configured as a GPIO; and most chips have at least +several dozen of them. Programmable logic devices (like FPGAs) can easily +provide GPIOs; multifunction chips like power managers, and audio codecs +often have a few such pins to help with pin scarcity on SOCs; and there are +also "GPIO Expander" chips that connect using the I2C or SPI serial busses. +Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS +firmware knowing how they're used). + +The exact capabilities of GPIOs vary between systems. Common options: + + - Output values are writable (high=1, low=0). Some chips also have + options about how that value is driven, so that for example only one + value might be driven ... supporting "wire-OR" and similar schemes + for the other value (notably, "open drain" signaling). + + - Input values are likewise readable (1, 0). Some chips support readback + of pins configured as "output", which is very useful in such "wire-OR" + cases (to support bidirectional signaling). GPIO controllers may have + input de-glitch/debounce logic, sometimes with software controls. + + - Inputs can often be used as IRQ signals, often edge triggered but + sometimes level triggered. Such IRQs may be configurable as system + wakeup events, to wake the system from a low power state. + + - Usually a GPIO will be configurable as either input or output, as needed + by different product boards; single direction ones exist too. + + - Most GPIOs can be accessed while holding spinlocks, but those accessed + through a serial bus normally can't. Some systems support both types. + +On a given board each GPIO is used for one specific purpose like monitoring +MMC/SD card insertion/removal, detecting card writeprotect status, driving +a LED, configuring a transceiver, bitbanging a serial bus, poking a hardware +watchdog, sensing a switch, and so on. + + +GPIO conventions +================ +Note that this is called a "convention" because you don't need to do it this +way, and it's no crime if you don't. There **are** cases where portability +is not the main issue; GPIOs are often used for the kind of board-specific +glue logic that may even change between board revisions, and can't ever be +used on a board that's wired differently. Only least-common-denominator +functionality can be very portable. Other features are platform-specific, +and that can be critical for glue logic. + +Plus, this doesn't require any implementation framework, just an interface. +One platform might implement it as simple inline functions accessing chip +registers; another might implement it by delegating through abstractions +used for several very different kinds of GPIO controller. (There is some +optional code supporting such an implementation strategy, described later +in this document, but drivers acting as clients to the GPIO interface must +not care how it's implemented.) + +That said, if the convention is supported on their platform, drivers should +use it when possible. Platforms must select ARCH_REQUIRE_GPIOLIB or +ARCH_WANT_OPTIONAL_GPIOLIB in their Kconfig. Drivers that can't work without +standard GPIO calls should have Kconfig entries which depend on GPIOLIB. The +GPIO calls are available, either as "real code" or as optimized-away stubs, +when drivers use the include file: + + #include + +If you stick to this convention then it'll be easier for other developers to +see what your code is doing, and help maintain it. + +Note that these operations include I/O barriers on platforms which need to +use them; drivers don't need to add them explicitly. + + +Identifying GPIOs +----------------- +GPIOs are identified by unsigned integers in the range 0..MAX_INT. That +reserves "negative" numbers for other purposes like marking signals as +"not available on this board", or indicating faults. Code that doesn't +touch the underlying hardware treats these integers as opaque cookies. + +Platforms define how they use those integers, and usually #define symbols +for the GPIO lines so that board-specific setup code directly corresponds +to the relevant schematics. In contrast, drivers should only use GPIO +numbers passed to them from that setup code, using platform_data to hold +board-specific pin configuration data (along with other board specific +data they need). That avoids portability problems. + +So for example one platform uses numbers 32-159 for GPIOs; while another +uses numbers 0..63 with one set of GPIO controllers, 64-79 with another +type of GPIO controller, and on one particular board 80-95 with an FPGA. +The numbers need not be contiguous; either of those platforms could also +use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. + +If you want to initialize a structure with an invalid GPIO number, use +some negative number (perhaps "-EINVAL"); that will never be valid. To +test if such number from such a structure could reference a GPIO, you +may use this predicate: + + int gpio_is_valid(int number); + +A number that's not valid will be rejected by calls which may request +or free GPIOs (see below). Other numbers may also be rejected; for +example, a number might be valid but temporarily unused on a given board. + +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. + +Using GPIOs +----------- +The first thing a system should do with a GPIO is allocate it, using +the gpio_request() call; see later. + +One of the next things to do with a GPIO, often in board setup code when +setting up a platform_device using the GPIO, is mark its direction: + + /* set as input or output, returning 0 or negative errno */ + int gpio_direction_input(unsigned gpio); + int gpio_direction_output(unsigned gpio, int value); + +The return value is zero for success, else a negative errno. It should +be checked, since the get/set calls don't have error returns and since +misconfiguration is possible. You should normally issue these calls from +a task context. However, for spinlock-safe GPIOs it's OK to use them +before tasking is enabled, as part of early board setup. + +For output GPIOs, the value provided becomes the initial output value. +This helps avoid signal glitching during system startup. + +For compatibility with legacy interfaces to GPIOs, setting the direction +of a GPIO implicitly requests that GPIO (see below) if it has not been +requested already. That compatibility is being removed from the optional +gpiolib framework. + +Setting the direction can fail if the GPIO number is invalid, or when +that particular GPIO can't be used in that mode. It's generally a bad +idea to rely on boot firmware to have set the direction correctly, since +it probably wasn't validated to do more than boot Linux. (Similarly, +that board setup code probably needs to multiplex that pin as a GPIO, +and configure pullups/pulldowns appropriately.) + + +Spinlock-Safe GPIO access +------------------------- +Most GPIO controllers can be accessed with memory read/write instructions. +Those don't need to sleep, and can safely be done from inside hard +(nonthreaded) IRQ handlers and similar contexts. + +Use the following calls to access such GPIOs, +for which gpio_cansleep() will always return false (see below): + + /* GPIO INPUT: return zero or nonzero */ + int gpio_get_value(unsigned gpio); + + /* GPIO OUTPUT */ + void gpio_set_value(unsigned gpio, int value); + +The values are boolean, zero for low, nonzero for high. When reading the +value of an output pin, the value returned should be what's seen on the +pin ... that won't always match the specified output value, because of +issues including open-drain signaling and output latencies. + +The get/set calls have no error returns because "invalid GPIO" should have +been reported earlier from gpio_direction_*(). However, note that not all +platforms can read the value of output pins; those that can't should always +return zero. Also, using these calls for GPIOs that can't safely be accessed +without sleeping (see below) is an error. + +Platform-specific implementations are encouraged to optimize the two +calls to access the GPIO value in cases where the GPIO number (and for +output, value) are constant. It's normal for them to need only a couple +of instructions in such cases (reading or writing a hardware register), +and not to need spinlocks. Such optimized calls can make bitbanging +applications a lot more efficient (in both space and time) than spending +dozens of instructions on subroutine calls. + + +GPIO access that may sleep +-------------------------- +Some GPIO controllers must be accessed using message based busses like I2C +or SPI. Commands to read or write those GPIO values require waiting to +get to the head of a queue to transmit a command and get its response. +This requires sleeping, which can't be done from inside IRQ handlers. + +Platforms that support this type of GPIO distinguish them from other GPIOs +by returning nonzero from this call (which requires a valid GPIO number, +which should have been previously allocated with gpio_request): + + int gpio_cansleep(unsigned gpio); + +To access such GPIOs, a different set of accessors is defined: + + /* GPIO INPUT: return zero or nonzero, might sleep */ + int gpio_get_value_cansleep(unsigned gpio); + + /* GPIO OUTPUT, might sleep */ + void gpio_set_value_cansleep(unsigned gpio, int value); + + +Accessing such GPIOs requires a context which may sleep, for example +a threaded IRQ handler, and those accessors must be used instead of +spinlock-safe accessors without the cansleep() name suffix. + +Other than the fact that these accessors might sleep, and will work +on GPIOs that can't be accessed from hardIRQ handlers, these calls act +the same as the spinlock-safe calls. + + ** IN ADDITION ** calls to setup and configure such GPIOs must be made +from contexts which may sleep, since they may need to access the GPIO +controller chip too: (These setup calls are usually made from board +setup or driver probe/teardown code, so this is an easy constraint.) + + gpio_direction_input() + gpio_direction_output() + gpio_request() + +## gpio_request_one() +## gpio_request_array() +## gpio_free_array() + + gpio_free() + gpio_set_debounce() + + + +Claiming and Releasing GPIOs +---------------------------- +To help catch system configuration errors, two calls are defined. + + /* request GPIO, returning 0 or negative errno. + * non-null labels may be useful for diagnostics. + */ + int gpio_request(unsigned gpio, const char *label); + + /* release previously-claimed GPIO */ + void gpio_free(unsigned gpio); + +Passing invalid GPIO numbers to gpio_request() will fail, as will requesting +GPIOs that have already been claimed with that call. The return value of +gpio_request() must be checked. You should normally issue these calls from +a task context. However, for spinlock-safe GPIOs it's OK to request GPIOs +before tasking is enabled, as part of early board setup. + +These calls serve two basic purposes. One is marking the signals which +are actually in use as GPIOs, for better diagnostics; systems may have +several hundred potential GPIOs, but often only a dozen are used on any +given board. Another is to catch conflicts, identifying errors when +(a) two or more drivers wrongly think they have exclusive use of that +signal, or (b) something wrongly believes it's safe to remove drivers +needed to manage a signal that's in active use. That is, requesting a +GPIO can serve as a kind of lock. + +Some platforms may also use knowledge about what GPIOs are active for +power management, such as by powering down unused chip sectors and, more +easily, gating off unused clocks. + +For GPIOs that use pins known to the pinctrl subsystem, that subsystem should +be informed of their use; a gpiolib driver's .request() operation may call +pinctrl_request_gpio(), and a gpiolib driver's .free() operation may call +pinctrl_free_gpio(). The pinctrl subsystem allows a pinctrl_request_gpio() +to succeed concurrently with a pin or pingroup being "owned" by a device for +pin multiplexing. + +Any programming of pin multiplexing hardware that is needed to route the +GPIO signal to the appropriate pin should occur within a GPIO driver's +.direction_input() or .direction_output() operations, and occur after any +setup of an output GPIO's value. This allows a glitch-free migration from a +pin's special function to GPIO. This is sometimes required when using a GPIO +to implement a workaround on signals typically driven by a non-GPIO HW block. + +Some platforms allow some or all GPIO signals to be routed to different pins. +Similarly, other aspects of the GPIO or pin may need to be configured, such as +pullup/pulldown. Platform software should arrange that any such details are +configured prior to gpio_request() being called for those GPIOs, e.g. using +the pinctrl subsystem's mapping table, so that GPIO users need not be aware +of these details. + +Also note that it's your responsibility to have stopped using a GPIO +before you free it. + +Considering in most cases GPIOs are actually configured right after they +are claimed, three additional calls are defined: + + /* request a single GPIO, with initial configuration specified by + * 'flags', identical to gpio_request() wrt other arguments and + * return value + */ + int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); + + /* request multiple GPIOs in a single call + */ + int gpio_request_array(struct gpio *array, size_t num); + + /* release multiple GPIOs in a single call + */ + void gpio_free_array(struct gpio *array, size_t num); + +where 'flags' is currently defined to specify the following properties: + + * GPIOF_DIR_IN - to configure direction as input + * GPIOF_DIR_OUT - to configure direction as output + + * GPIOF_INIT_LOW - as output, set initial level to LOW + * GPIOF_INIT_HIGH - as output, set initial level to HIGH + * GPIOF_OPEN_DRAIN - gpio pin is open drain type. + * GPIOF_OPEN_SOURCE - gpio pin is open source type. + + * GPIOF_EXPORT_DIR_FIXED - export gpio to sysfs, keep direction + * GPIOF_EXPORT_DIR_CHANGEABLE - also export, allow changing direction + +since GPIOF_INIT_* are only valid when configured as output, so group valid +combinations as: + + * GPIOF_IN - configure as input + * GPIOF_OUT_INIT_LOW - configured as output, initial level LOW + * GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH + +When setting the flag as GPIOF_OPEN_DRAIN then it will assume that pins is +open drain type. Such pins will not be driven to 1 in output mode. It is +require to connect pull-up on such pins. By enabling this flag, gpio lib will +make the direction to input when it is asked to set value of 1 in output mode +to make the pin HIGH. The pin is make to LOW by driving value 0 in output mode. + +When setting the flag as GPIOF_OPEN_SOURCE then it will assume that pins is +open source type. Such pins will not be driven to 0 in output mode. It is +require to connect pull-down on such pin. By enabling this flag, gpio lib will +make the direction to input when it is asked to set value of 0 in output mode +to make the pin LOW. The pin is make to HIGH by driving value 1 in output mode. + +In the future, these flags can be extended to support more properties. + +Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is +introduced to encapsulate all three fields as: + + struct gpio { + unsigned gpio; + unsigned long flags; + const char *label; + }; + +A typical example of usage: + + static struct gpio leds_gpios[] = { + { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */ + { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* default to OFF */ + { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* default to OFF */ + { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* default to OFF */ + { ... }, + }; + + err = gpio_request_one(31, GPIOF_IN, "Reset Button"); + if (err) + ... + + err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios)); + if (err) + ... + + gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios)); + + +GPIOs mapped to IRQs +-------------------- +GPIO numbers are unsigned integers; so are IRQ numbers. These make up +two logically distinct namespaces (GPIO 0 need not use IRQ 0). You can +map between them using calls like: + + /* map GPIO numbers to IRQ numbers */ + int gpio_to_irq(unsigned gpio); + + /* map IRQ numbers to GPIO numbers (avoid using this) */ + int irq_to_gpio(unsigned irq); + +Those return either the corresponding number in the other namespace, or +else a negative errno code if the mapping can't be done. (For example, +some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO +number that wasn't set up as an input using gpio_direction_input(), or +to use an IRQ number that didn't originally come from gpio_to_irq(). + +These two mapping calls are expected to cost on the order of a single +addition or subtraction. They're not allowed to sleep. + +Non-error values returned from gpio_to_irq() can be passed to request_irq() +or free_irq(). They will often be stored into IRQ resources for platform +devices, by the board-specific initialization code. Note that IRQ trigger +options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are +system wakeup capabilities. + +Non-error values returned from irq_to_gpio() would most commonly be used +with gpio_get_value(), for example to initialize or update driver state +when the IRQ is edge-triggered. Note that some platforms don't support +this reverse mapping, so you should avoid using it. + + +Emulating Open Drain Signals +---------------------------- +Sometimes shared signals need to use "open drain" signaling, where only the +low signal level is actually driven. (That term applies to CMOS transistors; +"open collector" is used for TTL.) A pullup resistor causes the high signal +level. This is sometimes called a "wire-AND"; or more practically, from the +negative logic (low=true) perspective this is a "wire-OR". + +One common example of an open drain signal is a shared active-low IRQ line. +Also, bidirectional data bus signals sometimes use open drain signals. + +Some GPIO controllers directly support open drain outputs; many don't. When +you need open drain signaling but your hardware doesn't directly support it, +there's a common idiom you can use to emulate it with any GPIO pin that can +be used as either an input or an output: + + LOW: gpio_direction_output(gpio, 0) ... this drives the signal + and overrides the pullup. + + HIGH: gpio_direction_input(gpio) ... this turns off the output, + so the pullup (or some other device) controls the signal. + +If you are "driving" the signal high but gpio_get_value(gpio) reports a low +value (after the appropriate rise time passes), you know some other component +is driving the shared signal low. That's not necessarily an error. As one +common example, that's how I2C clocks are stretched: a slave that needs a +slower clock delays the rising edge of SCK, and the I2C master adjusts its +signaling rate accordingly. + + +GPIO controllers and the pinctrl subsystem +------------------------------------------ + +A GPIO controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with an optional gpio feature. We have already covered the +case where e.g. a GPIO controller need to reserve a pin or set the +direction of a pin by calling any of: + +pinctrl_request_gpio() +pinctrl_free_gpio() +pinctrl_gpio_direction_input() +pinctrl_gpio_direction_output() + +But how does the pin control subsystem cross-correlate the GPIO +numbers (which are a global business) to a certain pin on a certain +pin controller? + +This is done by registering "ranges" of pins, which are essentially +cross-reference tables. These are described in +Documentation/pinctrl.txt + +While the pin allocation is totally managed by the pinctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem before it will call 'pinctrl_request_gpio' in order +to request the corresponding pin to be prepared by the pinctrl subsystem +before any gpio usage. + +For this, the gpio controller can register its pin range with pinctrl +subsystem. There are two ways of doing it currently: with or without DT. + +For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. + +For non-DT support, user can call gpiochip_add_pin_range() with appropriate +parameters to register a range of gpio pins with a pinctrl driver. For this +exact name string of pinctrl device has to be passed as one of the +argument to this routine. + + +What do these conventions omit? +=============================== +One of the biggest things these conventions omit is pin multiplexing, since +this is highly chip-specific and nonportable. One platform might not need +explicit multiplexing; another might have just two options for use of any +given pin; another might have eight options per pin; another might be able +to route a given GPIO to any one of several pins. (Yes, those examples all +come from systems that run Linux today.) + +Related to multiplexing is configuration and enabling of the pullups or +pulldowns integrated on some platforms. Not all platforms support them, +or support them in the same way; and any given board might use external +pullups (or pulldowns) so that the on-chip ones should not be used. +(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) +Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a +platform-specific issue, as are models like (not) having a one-to-one +correspondence between configurable pins and GPIOs. + +There are other system-specific mechanisms that are not specified here, +like the aforementioned options for input de-glitching and wire-OR output. +Hardware may support reading or writing GPIOs in gangs, but that's usually +configuration dependent: for GPIOs sharing the same bank. (GPIOs are +commonly grouped in banks of 16 or 32, with a given SOC having several such +banks.) Some systems can trigger IRQs from output GPIOs, or read values +from pins not managed as GPIOs. Code relying on such mechanisms will +necessarily be nonportable. + +Dynamic definition of GPIOs is not currently standard; for example, as +a side effect of configuring an add-on board with some GPIO expanders. + + +GPIO implementor's framework (OPTIONAL) +======================================= +As noted earlier, there is an optional implementation framework making it +easier for platforms to support different kinds of GPIO controller using +the same programming interface. This framework is called "gpiolib". + +As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file +will be found there. That will list all the controllers registered through +this framework, and the state of the GPIOs currently in use. + + +Controller Drivers: gpio_chip +----------------------------- +In this framework each GPIO controller is packaged as a "struct gpio_chip" +with information common to each controller of that type: + + - methods to establish GPIO direction + - methods used to access GPIO values + - flag saying whether calls to its methods may sleep + - optional debugfs dump method (showing extra state like pullup config) + - label for diagnostics + +There is also per-instance data, which may come from device.platform_data: +the number of its first GPIO, and how many GPIOs it exposes. + +The code implementing a gpio_chip should support multiple instances of the +controller, possibly using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be +rare; use gpiochip_remove() when it is unavoidable. + +Most often a gpio_chip is part of an instance-specific structure with state +not exposed by the GPIO interfaces, such as addressing, power management, +and more. Chips such as codecs will have complex non-GPIO state. + +Any debugfs dump method should normally ignore signals which haven't been +requested as GPIOs. They can use gpiochip_is_requested(), which returns +either NULL or the label associated with that GPIO when it was requested. + + +Platform Support +---------------- +To support this framework, a platform's Kconfig will "select" either +ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB +and arrange that its includes and defines +three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). + +It may also provide a custom value for ARCH_NR_GPIOS, so that it better +reflects the number of GPIOs in actual use on that platform, without +wasting static table space. (It should count both built-in/SoC GPIOs and +also ones on GPIO expanders. + +ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled +into the kernel on that architecture. + +ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user +can enable it and build it into the kernel optionally. + +If neither of these options are selected, the platform does not support +GPIOs through GPIO-lib and the code cannot be enabled by the user. + +Trivial implementations of those functions can directly use framework +code, which always dispatches through the gpio_chip: + + #define gpio_get_value __gpio_get_value + #define gpio_set_value __gpio_set_value + #define gpio_cansleep __gpio_cansleep + +Fancier implementations could instead define those as inline functions with +logic optimizing access to specific SOC-based GPIOs. For example, if the +referenced GPIO is the constant "12", getting or setting its value could +cost as little as two or three instructions, never sleeping. When such an +optimization is not possible those calls must delegate to the framework +code, costing at least a few dozen instructions. For bitbanged I/O, such +instruction savings can be significant. + +For SOCs, platform-specific code defines and registers gpio_chip instances +for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to +match chip vendor documentation, and directly match board schematics. They +may well start at zero and go up to a platform-specific limit. Such GPIOs +are normally integrated into platform initialization to make them always be +available, from arch_initcall() or earlier; they can often serve as IRQs. + + +Board Support +------------- +For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi +function devices, FPGAs or CPLDs -- most often board-specific code handles +registering controller devices and ensures that their drivers know what GPIO +numbers to use with gpiochip_add(). Their numbers often start right after +platform-specific GPIOs. + +For example, board setup code could create structures identifying the range +of GPIOs that chip will expose, and passes them to each GPIO expander chip +using platform_data. Then the chip driver's probe() routine could pass that +data to gpiochip_add(). + +Initialization order can be important. For example, when a device relies on +an I2C-based GPIO, its probe() routine should only be called after that GPIO +becomes available. That may mean the device should not be registered until +calls for that GPIO can work. One way to address such dependencies is for +such gpio_chip controllers to provide setup() and teardown() callbacks to +board specific code; those board specific callbacks would register devices +once all the necessary resources are available, and remove them later when +the GPIO controller device becomes unavailable. + + +Sysfs Interface for Userspace (OPTIONAL) +======================================== +Platforms which use the "gpiolib" implementors framework may choose to +configure a sysfs user interface to GPIOs. This is different from the +debugfs interface, since it provides control over GPIO direction and +value instead of just showing a gpio state summary. Plus, it could be +present on production systems without debugging support. + +Given appropriate hardware documentation for the system, userspace could +know for example that GPIO #23 controls the write protect line used to +protect boot loader segments in flash memory. System upgrade procedures +may need to temporarily remove that protection, first importing a GPIO, +then changing its output state, then updating the code before re-enabling +the write protection. In normal use, GPIO #23 would never be touched, +and the kernel would have no need to know about it. + +Again depending on appropriate hardware documentation, on some systems +userspace GPIO can be used to determine system configuration data that +standard kernels won't know about. And for some tasks, simple userspace +GPIO drivers could be all that the system really needs. + +Note that standard kernel drivers exist for common "LEDs and Buttons" +GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those +instead of talking directly to the GPIOs; they integrate with kernel +frameworks better than your userspace code could. + + +Paths in Sysfs +-------------- +There are three kinds of entry in /sys/class/gpio: + + - Control interfaces used to get userspace control over GPIOs; + + - GPIOs themselves; and + + - GPIO controllers ("gpio_chip" instances). + +That's in addition to standard files including the "device" symlink. + +The control interfaces are write-only: + + /sys/class/gpio/ + + "export" ... Userspace may ask the kernel to export control of + a GPIO to userspace by writing its number to this file. + + Example: "echo 19 > export" will create a "gpio19" node + for GPIO #19, if that's not requested by kernel code. + + "unexport" ... Reverses the effect of exporting to userspace. + + Example: "echo 19 > unexport" will remove a "gpio19" + node exported using the "export" file. + +GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) +and have the following read/write attributes: + + /sys/class/gpio/gpioN/ + + "direction" ... reads as either "in" or "out". This value may + normally be written. Writing as "out" defaults to + initializing the value as low. To ensure glitch free + operation, values "low" and "high" may be written to + configure the GPIO as an output with that initial value. + + Note that this attribute *will not exist* if the kernel + doesn't support changing the direction of a GPIO, or + it was exported by kernel code that didn't explicitly + allow userspace to reconfigure this GPIO's direction. + + "value" ... reads as either 0 (low) or 1 (high). If the GPIO + is configured as an output, this value may be written; + any nonzero value is treated as high. + + If the pin can be configured as interrupt-generating interrupt + and if it has been configured to generate interrupts (see the + description of "edge"), you can poll(2) on that file and + poll(2) will return whenever the interrupt was triggered. If + you use poll(2), set the events POLLPRI and POLLERR. If you + use select(2), set the file descriptor in exceptfds. After + poll(2) returns, either lseek(2) to the beginning of the sysfs + file and read the new value or close the file and re-open it + to read the value. + + "edge" ... reads as either "none", "rising", "falling", or + "both". Write these strings to select the signal edge(s) + that will make poll(2) on the "value" file return. + + This file exists only if the pin can be configured as an + interrupt generating input pin. + + "active_low" ... reads as either 0 (false) or 1 (true). Write + any nonzero value to invert the value attribute both + for reading and writing. Existing and subsequent + poll(2) support configuration via the edge attribute + for "rising" and "falling" edges will follow this + setting. + +GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the +controller implementing GPIOs starting at #42) and have the following +read-only attributes: + + /sys/class/gpio/gpiochipN/ + + "base" ... same as N, the first GPIO managed by this chip + + "label" ... provided for diagnostics (not always unique) + + "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) + +Board documentation should in most cases cover what GPIOs are used for +what purposes. However, those numbers are not always stable; GPIOs on +a daughtercard might be different depending on the base board being used, +or other cards in the stack. In such cases, you may need to use the +gpiochip nodes (possibly in conjunction with schematics) to determine +the correct GPIO number to use for a given signal. + + +Exporting from Kernel code +-------------------------- +Kernel code can explicitly manage exports of GPIOs which have already been +requested using gpio_request(): + + /* export the GPIO to userspace */ + int gpio_export(unsigned gpio, bool direction_may_change); + + /* reverse gpio_export() */ + void gpio_unexport(); + + /* create a sysfs link to an exported GPIO node */ + int gpio_export_link(struct device *dev, const char *name, + unsigned gpio) + + /* change the polarity of a GPIO node in sysfs */ + int gpio_sysfs_set_active_low(unsigned gpio, int value); + +After a kernel driver requests a GPIO, it may only be made available in +the sysfs interface by gpio_export(). The driver can control whether the +signal direction may change. This helps drivers prevent userspace code +from accidentally clobbering important system state. + +This explicit exporting can help with debugging (by making some kinds +of experiments easier), or can provide an always-there interface that's +suitable for documenting as part of a board support package. + +After the GPIO has been exported, gpio_export_link() allows creating +symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can +use this to provide the interface under their own device in sysfs with +a descriptive name. + +Drivers can use gpio_sysfs_set_active_low() to hide GPIO line polarity +differences between boards from user space. This only affects the +sysfs interface. Polarity change can be done both before and after +gpio_export(), and previously enabled poll(2) support for either +rising or falling edge will be reconfigured to follow this setting. diff --git a/Documentation/gpio/gpio.txt b/Documentation/gpio/gpio.txt new file mode 100644 index 000000000000..cd9b356e88cd --- /dev/null +++ b/Documentation/gpio/gpio.txt @@ -0,0 +1,119 @@ +GPIO Interfaces +=============== + +The documents in this directory give detailed instructions on how to access +GPIOs in drivers, and how to write a driver for a device that provides GPIOs +itself. + +Due to the history of GPIO interfaces in the kernel, there are two different +ways to obtain and use GPIOs: + + - The descriptor-based interface is the preferred way to manipulate GPIOs, +and is described by all the files in this directory excepted gpio-legacy.txt. + - The legacy integer-based interface which is considered deprecated (but still +usable for compatibility reasons) is documented in gpio-legacy.txt. + +The remainder of this document applies to the new descriptor-based interface. +gpio-legacy.txt contains the same information applied to the legacy +integer-based interface. + + +What is a GPIO? +=============== + +A "General Purpose Input/Output" (GPIO) is a flexible software-controlled +digital signal. They are provided from many kinds of chip, and are familiar +to Linux developers working with embedded and custom hardware. Each GPIO +represents a bit connected to a particular pin, or "ball" on Ball Grid Array +(BGA) packages. Board schematics show which external hardware connects to +which GPIOs. Drivers can be written generically, so that board setup code +passes such pin configuration data to drivers. + +System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every +non-dedicated pin can be configured as a GPIO; and most chips have at least +several dozen of them. Programmable logic devices (like FPGAs) can easily +provide GPIOs; multifunction chips like power managers, and audio codecs +often have a few such pins to help with pin scarcity on SOCs; and there are +also "GPIO Expander" chips that connect using the I2C or SPI serial buses. +Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS +firmware knowing how they're used). + +The exact capabilities of GPIOs vary between systems. Common options: + + - Output values are writable (high=1, low=0). Some chips also have + options about how that value is driven, so that for example only one + value might be driven, supporting "wire-OR" and similar schemes for the + other value (notably, "open drain" signaling). + + - Input values are likewise readable (1, 0). Some chips support readback + of pins configured as "output", which is very useful in such "wire-OR" + cases (to support bidirectional signaling). GPIO controllers may have + input de-glitch/debounce logic, sometimes with software controls. + + - Inputs can often be used as IRQ signals, often edge triggered but + sometimes level triggered. Such IRQs may be configurable as system + wakeup events, to wake the system from a low power state. + + - Usually a GPIO will be configurable as either input or output, as needed + by different product boards; single direction ones exist too. + + - Most GPIOs can be accessed while holding spinlocks, but those accessed + through a serial bus normally can't. Some systems support both types. + +On a given board each GPIO is used for one specific purpose like monitoring +MMC/SD card insertion/removal, detecting card write-protect status, driving +a LED, configuring a transceiver, bit-banging a serial bus, poking a hardware +watchdog, sensing a switch, and so on. + + +Common GPIO Properties +====================== + +These properties are met through all the other documents of the GPIO interface +and it is useful to understand them, especially if you need to define GPIO +mappings. + +Active-High and Active-Low +-------------------------- +It is natural to assume that a GPIO is "active" when its output signal is 1 +("high"), and inactive when it is 0 ("low"). However in practice the signal of a +GPIO may be inverted before is reaches its destination, or a device could decide +to have different conventions about what "active" means. Such decisions should +be transparent to device drivers, therefore it is possible to define a GPIO as +being either active-high ("1" means "active", the default) or active-low ("0" +means "active") so that drivers only need to worry about the logical signal and +not about what happens at the line level. + +Open Drain and Open Source +-------------------------- +Sometimes shared signals need to use "open drain" (where only the low signal +level is actually driven), or "open source" (where only the high signal level is +driven) signaling. That term applies to CMOS transistors; "open collector" is +used for TTL. A pullup or pulldown resistor causes the high or low signal level. +This is sometimes called a "wire-AND"; or more practically, from the negative +logic (low=true) perspective this is a "wire-OR". + +One common example of an open drain signal is a shared active-low IRQ line. +Also, bidirectional data bus signals sometimes use open drain signals. + +Some GPIO controllers directly support open drain and open source outputs; many +don't. When you need open drain signaling but your hardware doesn't directly +support it, there's a common idiom you can use to emulate it with any GPIO pin +that can be used as either an input or an output: + + LOW: gpiod_direction_output(gpio, 0) ... this drives the signal and overrides + the pullup. + + HIGH: gpiod_direction_input(gpio) ... this turns off the output, so the pullup + (or some other device) controls the signal. + +The same logic can be applied to emulate open source signaling, by driving the +high signal and configuring the GPIO as input for low. This open drain/open +source emulation can be handled transparently by the GPIO framework. + +If you are "driving" the signal high but gpiod_get_value(gpio) reports a low +value (after the appropriate rise time passes), you know some other component is +driving the shared signal low. That's not necessarily an error. As one common +example, that's how I2C clocks are stretched: a slave that needs a slower clock +delays the rising edge of SCK, and the I2C master adjusts its signaling rate +accordingly. diff --git a/Documentation/gpio/sysfs.txt b/Documentation/gpio/sysfs.txt new file mode 100644 index 000000000000..c2c3a97f8ff7 --- /dev/null +++ b/Documentation/gpio/sysfs.txt @@ -0,0 +1,155 @@ +GPIO Sysfs Interface for Userspace +================================== + +Platforms which use the "gpiolib" implementors framework may choose to +configure a sysfs user interface to GPIOs. This is different from the +debugfs interface, since it provides control over GPIO direction and +value instead of just showing a gpio state summary. Plus, it could be +present on production systems without debugging support. + +Given appropriate hardware documentation for the system, userspace could +know for example that GPIO #23 controls the write protect line used to +protect boot loader segments in flash memory. System upgrade procedures +may need to temporarily remove that protection, first importing a GPIO, +then changing its output state, then updating the code before re-enabling +the write protection. In normal use, GPIO #23 would never be touched, +and the kernel would have no need to know about it. + +Again depending on appropriate hardware documentation, on some systems +userspace GPIO can be used to determine system configuration data that +standard kernels won't know about. And for some tasks, simple userspace +GPIO drivers could be all that the system really needs. + +Note that standard kernel drivers exist for common "LEDs and Buttons" +GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those +instead of talking directly to the GPIOs; they integrate with kernel +frameworks better than your userspace code could. + + +Paths in Sysfs +-------------- +There are three kinds of entry in /sys/class/gpio: + + - Control interfaces used to get userspace control over GPIOs; + + - GPIOs themselves; and + + - GPIO controllers ("gpio_chip" instances). + +That's in addition to standard files including the "device" symlink. + +The control interfaces are write-only: + + /sys/class/gpio/ + + "export" ... Userspace may ask the kernel to export control of + a GPIO to userspace by writing its number to this file. + + Example: "echo 19 > export" will create a "gpio19" node + for GPIO #19, if that's not requested by kernel code. + + "unexport" ... Reverses the effect of exporting to userspace. + + Example: "echo 19 > unexport" will remove a "gpio19" + node exported using the "export" file. + +GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) +and have the following read/write attributes: + + /sys/class/gpio/gpioN/ + + "direction" ... reads as either "in" or "out". This value may + normally be written. Writing as "out" defaults to + initializing the value as low. To ensure glitch free + operation, values "low" and "high" may be written to + configure the GPIO as an output with that initial value. + + Note that this attribute *will not exist* if the kernel + doesn't support changing the direction of a GPIO, or + it was exported by kernel code that didn't explicitly + allow userspace to reconfigure this GPIO's direction. + + "value" ... reads as either 0 (low) or 1 (high). If the GPIO + is configured as an output, this value may be written; + any nonzero value is treated as high. + + If the pin can be configured as interrupt-generating interrupt + and if it has been configured to generate interrupts (see the + description of "edge"), you can poll(2) on that file and + poll(2) will return whenever the interrupt was triggered. If + you use poll(2), set the events POLLPRI and POLLERR. If you + use select(2), set the file descriptor in exceptfds. After + poll(2) returns, either lseek(2) to the beginning of the sysfs + file and read the new value or close the file and re-open it + to read the value. + + "edge" ... reads as either "none", "rising", "falling", or + "both". Write these strings to select the signal edge(s) + that will make poll(2) on the "value" file return. + + This file exists only if the pin can be configured as an + interrupt generating input pin. + + "active_low" ... reads as either 0 (false) or 1 (true). Write + any nonzero value to invert the value attribute both + for reading and writing. Existing and subsequent + poll(2) support configuration via the edge attribute + for "rising" and "falling" edges will follow this + setting. + +GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the +controller implementing GPIOs starting at #42) and have the following +read-only attributes: + + /sys/class/gpio/gpiochipN/ + + "base" ... same as N, the first GPIO managed by this chip + + "label" ... provided for diagnostics (not always unique) + + "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) + +Board documentation should in most cases cover what GPIOs are used for +what purposes. However, those numbers are not always stable; GPIOs on +a daughtercard might be different depending on the base board being used, +or other cards in the stack. In such cases, you may need to use the +gpiochip nodes (possibly in conjunction with schematics) to determine +the correct GPIO number to use for a given signal. + + +Exporting from Kernel code +-------------------------- +Kernel code can explicitly manage exports of GPIOs which have already been +requested using gpio_request(): + + /* export the GPIO to userspace */ + int gpiod_export(struct gpio_desc *desc, bool direction_may_change); + + /* reverse gpio_export() */ + void gpiod_unexport(struct gpio_desc *desc); + + /* create a sysfs link to an exported GPIO node */ + int gpiod_export_link(struct device *dev, const char *name, + struct gpio_desc *desc); + + /* change the polarity of a GPIO node in sysfs */ + int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value); + +After a kernel driver requests a GPIO, it may only be made available in +the sysfs interface by gpiod_export(). The driver can control whether the +signal direction may change. This helps drivers prevent userspace code +from accidentally clobbering important system state. + +This explicit exporting can help with debugging (by making some kinds +of experiments easier), or can provide an always-there interface that's +suitable for documenting as part of a board support package. + +After the GPIO has been exported, gpiod_export_link() allows creating +symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can +use this to provide the interface under their own device in sysfs with +a descriptive name. + +Drivers can use gpiod_sysfs_set_active_low() to hide GPIO line polarity +differences between boards from user space. Polarity change can be done both +before and after gpiod_export(), and previously enabled poll(2) support for +either rising or falling edge will be reconfigured to follow this setting. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9ca3e74a10e1..50680a59a2ff 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1190,15 +1190,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted. owned by uid=0. ima_hash= [IMA] - Format: { "sha1" | "md5" } + Format: { md5 | sha1 | rmd160 | sha256 | sha384 + | sha512 | ... } default: "sha1" + The list of supported hash algorithms is defined + in crypto/hash_info.h. + ima_tcb [IMA] Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all programs exec'd, files mmap'd for exec, and all files opened for read by uid=0. + ima_template= [IMA] + Select one of defined IMA measurements template formats. + Formats: { "ima" | "ima-ng" } + Default: "ima-ng" + init= [KNL] Format: Run specified binary instead of /sbin/init as init diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index cdb3e40b9d14..a06b48d2f5cc 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -1,49 +1,563 @@ -filter.txt: Linux Socket Filtering -Written by: Jay Schulist +Linux Socket Filtering aka Berkeley Packet Filter (BPF) +======================================================= Introduction -============ - - Linux Socket Filtering is derived from the Berkeley -Packet Filter. There are some distinct differences between -the BSD and Linux Kernel Filtering. - -Linux Socket Filtering (LSF) allows a user-space program to -attach a filter onto any socket and allow or disallow certain -types of data to come through the socket. LSF follows exactly -the same filter code structure as the BSD Berkeley Packet Filter -(BPF), so referring to the BSD bpf.4 manpage is very helpful in -creating filters. - -LSF is much simpler than BPF. One does not have to worry about -devices or anything like that. You simply create your filter -code, send it to the kernel via the SO_ATTACH_FILTER option and -if your filter code passes the kernel check on it, you then -immediately begin filtering data on that socket. - -You can also detach filters from your socket via the -SO_DETACH_FILTER option. This will probably not be used much -since when you close a socket that has a filter on it the -filter is automagically removed. The other less common case -may be adding a different filter on the same socket where you had another -filter that is still running: the kernel takes care of removing -the old one and placing your new one in its place, assuming your -filter has passed the checks, otherwise if it fails the old filter -will remain on that socket. - -SO_LOCK_FILTER option allows to lock the filter attached to a -socket. Once set, a filter cannot be removed or changed. This allows -one process to setup a socket, attach a filter, lock it then drop -privileges and be assured that the filter will be kept until the -socket is closed. - -Examples -======== - -Ioctls- -setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_FILTER, &Filter, sizeof(Filter)); -setsockopt(sockfd, SOL_SOCKET, SO_DETACH_FILTER, &value, sizeof(value)); -setsockopt(sockfd, SOL_SOCKET, SO_LOCK_FILTER, &value, sizeof(value)); - -See the BSD bpf.4 manpage and the BSD Packet Filter paper written by -Steven McCanne and Van Jacobson of Lawrence Berkeley Laboratory. +------------ + +Linux Socket Filtering (LSF) is derived from the Berkeley Packet Filter. +Though there are some distinct differences between the BSD and Linux +Kernel filtering, but when we speak of BPF or LSF in Linux context, we +mean the very same mechanism of filtering in the Linux kernel. + +BPF allows a user-space program to attach a filter onto any socket and +allow or disallow certain types of data to come through the socket. LSF +follows exactly the same filter code structure as BSD's BPF, so referring +to the BSD bpf.4 manpage is very helpful in creating filters. + +On Linux, BPF is much simpler than on BSD. One does not have to worry +about devices or anything like that. You simply create your filter code, +send it to the kernel via the SO_ATTACH_FILTER option and if your filter +code passes the kernel check on it, you then immediately begin filtering +data on that socket. + +You can also detach filters from your socket via the SO_DETACH_FILTER +option. This will probably not be used much since when you close a socket +that has a filter on it the filter is automagically removed. The other +less common case may be adding a different filter on the same socket where +you had another filter that is still running: the kernel takes care of +removing the old one and placing your new one in its place, assuming your +filter has passed the checks, otherwise if it fails the old filter will +remain on that socket. + +SO_LOCK_FILTER option allows to lock the filter attached to a socket. Once +set, a filter cannot be removed or changed. This allows one process to +setup a socket, attach a filter, lock it then drop privileges and be +assured that the filter will be kept until the socket is closed. + +The biggest user of this construct might be libpcap. Issuing a high-level +filter command like `tcpdump -i em1 port 22` passes through the libpcap +internal compiler that generates a structure that can eventually be loaded +via SO_ATTACH_FILTER to the kernel. `tcpdump -i em1 port 22 -ddd` +displays what is being placed into this structure. + +Although we were only speaking about sockets here, BPF in Linux is used +in many more places. There's xt_bpf for netfilter, cls_bpf in the kernel +qdisc layer, SECCOMP-BPF (SECure COMPuting [1]), and lots of other places +such as team driver, PTP code, etc where BPF is being used. + + [1] Documentation/prctl/seccomp_filter.txt + +Original BPF paper: + +Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new +architecture for user-level packet capture. In Proceedings of the +USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993 +Conference Proceedings (USENIX'93). USENIX Association, Berkeley, +CA, USA, 2-2. [http://www.tcpdump.org/papers/bpf-usenix93.pdf] + +Structure +--------- + +User space applications include which contains the +following relevant structures: + +struct sock_filter { /* Filter block */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ +}; + +Such a structure is assembled as an array of 4-tuples, that contains +a code, jt, jf and k value. jt and jf are jump offsets and k a generic +value to be used for a provided code. + +struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ + unsigned short len; /* Number of filter blocks */ + struct sock_filter __user *filter; +}; + +For socket filtering, a pointer to this structure (as shown in +follow-up example) is being passed to the kernel through setsockopt(2). + +Example +------- + +#include +#include +#include +#include +/* ... */ + +/* From the example above: tcpdump -i em1 port 22 -dd */ +struct sock_filter code[] = { + { 0x28, 0, 0, 0x0000000c }, + { 0x15, 0, 8, 0x000086dd }, + { 0x30, 0, 0, 0x00000014 }, + { 0x15, 2, 0, 0x00000084 }, + { 0x15, 1, 0, 0x00000006 }, + { 0x15, 0, 17, 0x00000011 }, + { 0x28, 0, 0, 0x00000036 }, + { 0x15, 14, 0, 0x00000016 }, + { 0x28, 0, 0, 0x00000038 }, + { 0x15, 12, 13, 0x00000016 }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x00000017 }, + { 0x15, 2, 0, 0x00000084 }, + { 0x15, 1, 0, 0x00000006 }, + { 0x15, 0, 8, 0x00000011 }, + { 0x28, 0, 0, 0x00000014 }, + { 0x45, 6, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x0000000e }, + { 0x48, 0, 0, 0x0000000e }, + { 0x15, 2, 0, 0x00000016 }, + { 0x48, 0, 0, 0x00000010 }, + { 0x15, 0, 1, 0x00000016 }, + { 0x06, 0, 0, 0x0000ffff }, + { 0x06, 0, 0, 0x00000000 }, +}; + +struct sock_fprog bpf = { + .len = ARRAY_SIZE(code), + .filter = code, +}; + +sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); +if (sock < 0) + /* ... bail out ... */ + +ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)); +if (ret < 0) + /* ... bail out ... */ + +/* ... */ +close(sock); + +The above example code attaches a socket filter for a PF_PACKET socket +in order to let all IPv4/IPv6 packets with port 22 pass. The rest will +be dropped for this socket. + +The setsockopt(2) call to SO_DETACH_FILTER doesn't need any arguments +and SO_LOCK_FILTER for preventing the filter to be detached, takes an +integer value with 0 or 1. + +Note that socket filters are not restricted to PF_PACKET sockets only, +but can also be used on other socket families. + +Summary of system calls: + + * setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_FILTER, &val, sizeof(val)); + * setsockopt(sockfd, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)); + * setsockopt(sockfd, SOL_SOCKET, SO_LOCK_FILTER, &val, sizeof(val)); + +Normally, most use cases for socket filtering on packet sockets will be +covered by libpcap in high-level syntax, so as an application developer +you should stick to that. libpcap wraps its own layer around all that. + +Unless i) using/linking to libpcap is not an option, ii) the required BPF +filters use Linux extensions that are not supported by libpcap's compiler, +iii) a filter might be more complex and not cleanly implementable with +libpcap's compiler, or iv) particular filter codes should be optimized +differently than libpcap's internal compiler does; then in such cases +writing such a filter "by hand" can be of an alternative. For example, +xt_bpf and cls_bpf users might have requirements that could result in +more complex filter code, or one that cannot be expressed with libpcap +(e.g. different return codes for various code paths). Moreover, BPF JIT +implementors may wish to manually write test cases and thus need low-level +access to BPF code as well. + +BPF engine and instruction set +------------------------------ + +Under tools/net/ there's a small helper tool called bpf_asm which can +be used to write low-level filters for example scenarios mentioned in the +previous section. Asm-like syntax mentioned here has been implemented in +bpf_asm and will be used for further explanations (instead of dealing with +less readable opcodes directly, principles are the same). The syntax is +closely modelled after Steven McCanne's and Van Jacobson's BPF paper. + +The BPF architecture consists of the following basic elements: + + Element Description + + A 32 bit wide accumulator + X 32 bit wide X register + M[] 16 x 32 bit wide misc registers aka "scratch memory + store", addressable from 0 to 15 + +A program, that is translated by bpf_asm into "opcodes" is an array that +consists of the following elements (as already mentioned): + + op:16, jt:8, jf:8, k:32 + +The element op is a 16 bit wide opcode that has a particular instruction +encoded. jt and jf are two 8 bit wide jump targets, one for condition +"jump if true", the other one "jump if false". Eventually, element k +contains a miscellaneous argument that can be interpreted in different +ways depending on the given instruction in op. + +The instruction set consists of load, store, branch, alu, miscellaneous +and return instructions that are also represented in bpf_asm syntax. This +table lists all bpf_asm instructions available resp. what their underlying +opcodes as defined in linux/filter.h stand for: + + Instruction Addressing mode Description + + ld 1, 2, 3, 4, 10 Load word into A + ldi 4 Load word into A + ldh 1, 2 Load half-word into A + ldb 1, 2 Load byte into A + ldx 3, 4, 5, 10 Load word into X + ldxi 4 Load word into X + ldxb 5 Load byte into X + + st 3 Store A into M[] + stx 3 Store X into M[] + + jmp 6 Jump to label + ja 6 Jump to label + jeq 7, 8 Jump on k == A + jneq 8 Jump on k != A + jne 8 Jump on k != A + jlt 8 Jump on k < A + jle 8 Jump on k <= A + jgt 7, 8 Jump on k > A + jge 7, 8 Jump on k >= A + jset 7, 8 Jump on k & A + + add 0, 4 A + + sub 0, 4 A - + mul 0, 4 A * + div 0, 4 A / + mod 0, 4 A % + neg 0, 4 !A + and 0, 4 A & + or 0, 4 A | + xor 0, 4 A ^ + lsh 0, 4 A << + rsh 0, 4 A >> + + tax Copy A into X + txa Copy X into A + + ret 4, 9 Return + +The next table shows addressing formats from the 2nd column: + + Addressing mode Syntax Description + + 0 x/%x Register X + 1 [k] BHW at byte offset k in the packet + 2 [x + k] BHW at the offset X + k in the packet + 3 M[k] Word at offset k in M[] + 4 #k Literal value stored in k + 5 4*([k]&0xf) Lower nibble * 4 at byte offset k in the packet + 6 L Jump label L + 7 #k,Lt,Lf Jump to Lt if true, otherwise jump to Lf + 8 #k,Lt Jump to Lt if predicate is true + 9 a/%a Accumulator A + 10 extension BPF extension + +The Linux kernel also has a couple of BPF extensions that are used along +with the class of load instructions by "overloading" the k argument with +a negative offset + a particular extension offset. The result of such BPF +extensions are loaded into A. + +Possible BPF extensions are shown in the following table: + + Extension Description + + len skb->len + proto skb->protocol + type skb->pkt_type + poff Payload start offset + ifidx skb->dev->ifindex + nla Netlink attribute of type X with offset A + nlan Nested Netlink attribute of type X with offset A + mark skb->mark + queue skb->queue_mapping + hatype skb->dev->type + rxhash skb->rxhash + cpu raw_smp_processor_id() + vlan_tci vlan_tx_tag_get(skb) + vlan_pr vlan_tx_tag_present(skb) + +These extensions can also be prefixed with '#'. +Examples for low-level BPF: + +** ARP packets: + + ldh [12] + jne #0x806, drop + ret #-1 + drop: ret #0 + +** IPv4 TCP packets: + + ldh [12] + jne #0x800, drop + ldb [23] + jneq #6, drop + ret #-1 + drop: ret #0 + +** (Accelerated) VLAN w/ id 10: + + ld vlan_tci + jneq #10, drop + ret #-1 + drop: ret #0 + +** SECCOMP filter example: + + ld [4] /* offsetof(struct seccomp_data, arch) */ + jne #0xc000003e, bad /* AUDIT_ARCH_X86_64 */ + ld [0] /* offsetof(struct seccomp_data, nr) */ + jeq #15, good /* __NR_rt_sigreturn */ + jeq #231, good /* __NR_exit_group */ + jeq #60, good /* __NR_exit */ + jeq #0, good /* __NR_read */ + jeq #1, good /* __NR_write */ + jeq #5, good /* __NR_fstat */ + jeq #9, good /* __NR_mmap */ + jeq #14, good /* __NR_rt_sigprocmask */ + jeq #13, good /* __NR_rt_sigaction */ + jeq #35, good /* __NR_nanosleep */ + bad: ret #0 /* SECCOMP_RET_KILL */ + good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ + +The above example code can be placed into a file (here called "foo"), and +then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf +and cls_bpf understands and can directly be loaded with. Example with above +ARP code: + +$ ./bpf_asm foo +4,40 0 0 12,21 0 1 2054,6 0 0 4294967295,6 0 0 0, + +In copy and paste C-like output: + +$ ./bpf_asm -c foo +{ 0x28, 0, 0, 0x0000000c }, +{ 0x15, 0, 1, 0x00000806 }, +{ 0x06, 0, 0, 0xffffffff }, +{ 0x06, 0, 0, 0000000000 }, + +In particular, as usage with xt_bpf or cls_bpf can result in more complex BPF +filters that might not be obvious at first, it's good to test filters before +attaching to a live system. For that purpose, there's a small tool called +bpf_dbg under tools/net/ in the kernel source directory. This debugger allows +for testing BPF filters against given pcap files, single stepping through the +BPF code on the pcap's packets and to do BPF machine register dumps. + +Starting bpf_dbg is trivial and just requires issuing: + +# ./bpf_dbg + +In case input and output do not equal stdin/stdout, bpf_dbg takes an +alternative stdin source as a first argument, and an alternative stdout +sink as a second one, e.g. `./bpf_dbg test_in.txt test_out.txt`. + +Other than that, a particular libreadline configuration can be set via +file "~/.bpf_dbg_init" and the command history is stored in the file +"~/.bpf_dbg_history". + +Interaction in bpf_dbg happens through a shell that also has auto-completion +support (follow-up example commands starting with '>' denote bpf_dbg shell). +The usual workflow would be to ... + +> load bpf 6,40 0 0 12,21 0 3 2048,48 0 0 23,21 0 1 1,6 0 0 65535,6 0 0 0 + Loads a BPF filter from standard output of bpf_asm, or transformed via + e.g. `tcpdump -iem1 -ddd port 22 | tr '\n' ','`. Note that for JIT + debugging (next section), this command creates a temporary socket and + loads the BPF code into the kernel. Thus, this will also be useful for + JIT developers. + +> load pcap foo.pcap + Loads standard tcpdump pcap file. + +> run [] +bpf passes:1 fails:9 + Runs through all packets from a pcap to account how many passes and fails + the filter will generate. A limit of packets to traverse can be given. + +> disassemble +l0: ldh [12] +l1: jeq #0x800, l2, l5 +l2: ldb [23] +l3: jeq #0x1, l4, l5 +l4: ret #0xffff +l5: ret #0 + Prints out BPF code disassembly. + +> dump +/* { op, jt, jf, k }, */ +{ 0x28, 0, 0, 0x0000000c }, +{ 0x15, 0, 3, 0x00000800 }, +{ 0x30, 0, 0, 0x00000017 }, +{ 0x15, 0, 1, 0x00000001 }, +{ 0x06, 0, 0, 0x0000ffff }, +{ 0x06, 0, 0, 0000000000 }, + Prints out C-style BPF code dump. + +> breakpoint 0 +breakpoint at: l0: ldh [12] +> breakpoint 1 +breakpoint at: l1: jeq #0x800, l2, l5 + ... + Sets breakpoints at particular BPF instructions. Issuing a `run` command + will walk through the pcap file continuing from the current packet and + break when a breakpoint is being hit (another `run` will continue from + the currently active breakpoint executing next instructions): + + > run + -- register dump -- + pc: [0] <-- program counter + code: [40] jt[0] jf[0] k[12] <-- plain BPF code of current instruction + curr: l0: ldh [12] <-- disassembly of current instruction + A: [00000000][0] <-- content of A (hex, decimal) + X: [00000000][0] <-- content of X (hex, decimal) + M[0,15]: [00000000][0] <-- folded content of M (hex, decimal) + -- packet dump -- <-- Current packet from pcap (hex) + len: 42 + 0: 00 19 cb 55 55 a4 00 14 a4 43 78 69 08 06 00 01 + 16: 08 00 06 04 00 01 00 14 a4 43 78 69 0a 3b 01 26 + 32: 00 00 00 00 00 00 0a 3b 01 01 + (breakpoint) + > + +> breakpoint +breakpoints: 0 1 + Prints currently set breakpoints. + +> step [-, +] + Performs single stepping through the BPF program from the current pc + offset. Thus, on each step invocation, above register dump is issued. + This can go forwards and backwards in time, a plain `step` will break + on the next BPF instruction, thus +1. (No `run` needs to be issued here.) + +> select + Selects a given packet from the pcap file to continue from. Thus, on + the next `run` or `step`, the BPF program is being evaluated against + the user pre-selected packet. Numbering starts just as in Wireshark + with index 1. + +> quit +# + Exits bpf_dbg. + +JIT compiler +------------ + +The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC, PowerPC, +ARM and s390 and can be enabled through CONFIG_BPF_JIT. The JIT compiler is +transparently invoked for each attached filter from user space or for internal +kernel users if it has been previously enabled by root: + + echo 1 > /proc/sys/net/core/bpf_jit_enable + +For JIT developers, doing audits etc, each compile run can output the generated +opcode image into the kernel log via: + + echo 2 > /proc/sys/net/core/bpf_jit_enable + +Example output from dmesg: + +[ 3389.935842] flen=6 proglen=70 pass=3 image=ffffffffa0069c8f +[ 3389.935847] JIT code: 00000000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 68 +[ 3389.935849] JIT code: 00000010: 44 2b 4f 6c 4c 8b 87 d8 00 00 00 be 0c 00 00 00 +[ 3389.935850] JIT code: 00000020: e8 1d 94 ff e0 3d 00 08 00 00 75 16 be 17 00 00 +[ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00 +[ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3 + +In the kernel source tree under tools/net/, there's bpf_jit_disasm for +generating disassembly out of the kernel log's hexdump: + +# ./bpf_jit_disasm +70 bytes emitted from JIT compiler (pass:3, flen:6) +ffffffffa0069c8f + : + 0: push %rbp + 1: mov %rsp,%rbp + 4: sub $0x60,%rsp + 8: mov %rbx,-0x8(%rbp) + c: mov 0x68(%rdi),%r9d + 10: sub 0x6c(%rdi),%r9d + 14: mov 0xd8(%rdi),%r8 + 1b: mov $0xc,%esi + 20: callq 0xffffffffe0ff9442 + 25: cmp $0x800,%eax + 2a: jne 0x0000000000000042 + 2c: mov $0x17,%esi + 31: callq 0xffffffffe0ff945e + 36: cmp $0x1,%eax + 39: jne 0x0000000000000042 + 3b: mov $0xffff,%eax + 40: jmp 0x0000000000000044 + 42: xor %eax,%eax + 44: leaveq + 45: retq + +Issuing option `-o` will "annotate" opcodes to resulting assembler +instructions, which can be very useful for JIT developers: + +# ./bpf_jit_disasm -o +70 bytes emitted from JIT compiler (pass:3, flen:6) +ffffffffa0069c8f + : + 0: push %rbp + 55 + 1: mov %rsp,%rbp + 48 89 e5 + 4: sub $0x60,%rsp + 48 83 ec 60 + 8: mov %rbx,-0x8(%rbp) + 48 89 5d f8 + c: mov 0x68(%rdi),%r9d + 44 8b 4f 68 + 10: sub 0x6c(%rdi),%r9d + 44 2b 4f 6c + 14: mov 0xd8(%rdi),%r8 + 4c 8b 87 d8 00 00 00 + 1b: mov $0xc,%esi + be 0c 00 00 00 + 20: callq 0xffffffffe0ff9442 + e8 1d 94 ff e0 + 25: cmp $0x800,%eax + 3d 00 08 00 00 + 2a: jne 0x0000000000000042 + 75 16 + 2c: mov $0x17,%esi + be 17 00 00 00 + 31: callq 0xffffffffe0ff945e + e8 28 94 ff e0 + 36: cmp $0x1,%eax + 83 f8 01 + 39: jne 0x0000000000000042 + 75 07 + 3b: mov $0xffff,%eax + b8 ff ff 00 00 + 40: jmp 0x0000000000000044 + eb 02 + 42: xor %eax,%eax + 31 c0 + 44: leaveq + c9 + 45: retq + c3 + +For BPF JIT developers, bpf_jit_disasm, bpf_asm and bpf_dbg provides a useful +toolchain for developing and testing the kernel's JIT compiler. + +Misc +---- + +Also trinity, the Linux syscall fuzzer, has built-in support for BPF and +SECCOMP-BPF kernel fuzzing. + +Written by +---------- + +The document was written in the hope that it is found useful and in order +to give potential BPF hackers or security auditors a better overview of +the underlying architecture. + +Jay Schulist +Daniel Borkmann diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3c12d9a7ed00..12ba2cd9f03d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -156,6 +156,16 @@ tcp_app_win - INTEGER buffer. Value 0 is special, it means that nothing is reserved. Default: 31 +tcp_autocorking - BOOLEAN + Enable TCP auto corking : + When applications do consecutive small write()/sendmsg() system calls, + we try to coalesce these small writes as much as possible, to lower + total amount of sent packets. This is done if at least one prior + packet for the flow is waiting in Qdisc queues or device transmit + queue. Applications can still use TCP_CORK for optimal behavior + when they know how/when to uncork their sockets. + Default : 1 + tcp_available_congestion_control - STRING Shows the available congestion control choices that are registered. More congestion control algorithms may be available as modules, diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index c01223628a87..4288ffafba9f 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -123,6 +123,16 @@ Transmission process is similar to capture as shown below. [shutdown] close() --------> destruction of the transmission socket and deallocation of all associated resources. +Socket creation and destruction is also straight forward, and is done +the same way as in capturing described in the previous paragraph: + + int fd = socket(PF_PACKET, mode, 0); + +The protocol can optionally be 0 in case we only want to transmit +via this socket, which avoids an expensive call to packet_rcv(). +In this case, you also need to bind(2) the TX_RING with sll_protocol = 0 +set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example. + Binding the socket to your network interface is mandatory (with zero copy) to know the header size of frames used in the circular buffer. @@ -942,6 +952,27 @@ int main(int argc, char **argp) return 0; } +------------------------------------------------------------------------------- ++ PACKET_QDISC_BYPASS +------------------------------------------------------------------------------- + +If there is a requirement to load the network with many packets in a similar +fashion as pktgen does, you might set the following option after socket +creation: + + int one = 1; + setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)); + +This has the side-effect, that packets sent through PF_PACKET will bypass the +kernel's qdisc layer and are forcedly pushed to the driver directly. Meaning, +packet are not buffered, tc disciplines are ignored, increased loss can occur +and such packets are also not visible to other PF_PACKET sockets anymore. So, +you have been warned; generally, this can be useful for stress testing various +components of a system. + +On default, PACKET_QDISC_BYPASS is disabled and needs to be explicitly enabled +on PF_PACKET sockets. + ------------------------------------------------------------------------------- + PACKET_TIMESTAMP ------------------------------------------------------------------------------- diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt index d5b1a3935245..ebf270719402 100644 --- a/Documentation/networking/phy.txt +++ b/Documentation/networking/phy.txt @@ -255,7 +255,8 @@ Writing a PHY driver config_init: configures PHY into a sane state after a reset. For instance, a Davicom PHY requires descrambling disabled. - probe: Does any setup needed by the driver + probe: Allocate phy->priv, optionally refuse to bind. + PHY may not have been reset or had fixups run yet. suspend/resume: power management config_aneg: Changes the speed/duplex/negotiation settings read_status: Reads the current speed/duplex/negotiation settings diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 98097d8cb910..661d3c316a17 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -85,7 +85,7 @@ Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support by the network device and will be empty without that support. -SIOCSHWTSTAMP: +SIOCSHWTSTAMP, SIOCGHWTSTAMP: Hardware time stamping must also be initialized for each device driver that is expected to do hardware time stamping. The parameter is defined in @@ -115,6 +115,10 @@ Only a processes with admin rights may change the configuration. User space is responsible to ensure that multiple processes don't interfere with each other and that the settings are reset. +Any process can read the actual configuration by passing this +structure to ioctl(SIOCGHWTSTAMP) in the same way. However, this has +not been implemented in all drivers. + /* possible values for hwtstamp_config->tx_type */ enum { /* @@ -157,7 +161,8 @@ DEVICE IMPLEMENTATION A driver which supports hardware time stamping must support the SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with -the actual values as described in the section on SIOCSHWTSTAMP. +the actual values as described in the section on SIOCSHWTSTAMP. It +should also support SIOCGHWTSTAMP. Time stamps for received packets must be stored in the skb. To get a pointer to the shared time stamp structure of the skb call skb_hwtstamps(). Then diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore index 71e81eb2e22f..a380159765ce 100644 --- a/Documentation/networking/timestamping/.gitignore +++ b/Documentation/networking/timestamping/.gitignore @@ -1 +1,2 @@ timestamping +hwtstamp_config diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile index e79973443e9f..d934afc8306a 100644 --- a/Documentation/networking/timestamping/Makefile +++ b/Documentation/networking/timestamping/Makefile @@ -2,12 +2,13 @@ obj- := dummy.o # List of programs to build -hostprogs-y := timestamping +hostprogs-y := timestamping hwtstamp_config # Tell kbuild to always build the programs always := $(hostprogs-y) HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include +HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include clean: - rm -f timestamping + rm -f timestamping hwtstamp_config diff --git a/Documentation/networking/timestamping/hwtstamp_config.c b/Documentation/networking/timestamping/hwtstamp_config.c new file mode 100644 index 000000000000..e8b685a7f15f --- /dev/null +++ b/Documentation/networking/timestamping/hwtstamp_config.c @@ -0,0 +1,134 @@ +/* Test program for SIOC{G,S}HWTSTAMP + * Copyright 2013 Solarflare Communications + * Author: Ben Hutchings + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +static int +lookup_value(const char **names, int size, const char *name) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value] && strcasecmp(names[value], name) == 0) + return value; + + return -1; +} + +static const char * +lookup_name(const char **names, int size, int value) +{ + return (value >= 0 && value < size) ? names[value] : NULL; +} + +static void list_names(FILE *f, const char **names, int size) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value]) + fprintf(f, " %s\n", names[value]); +} + +static const char *tx_types[] = { +#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name + TX_TYPE(OFF), + TX_TYPE(ON), + TX_TYPE(ONESTEP_SYNC) +#undef TX_TYPE +}; +#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) + +static const char *rx_filters[] = { +#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name + RX_FILTER(NONE), + RX_FILTER(ALL), + RX_FILTER(SOME), + RX_FILTER(PTP_V1_L4_EVENT), + RX_FILTER(PTP_V1_L4_SYNC), + RX_FILTER(PTP_V1_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L4_EVENT), + RX_FILTER(PTP_V2_L4_SYNC), + RX_FILTER(PTP_V2_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L2_EVENT), + RX_FILTER(PTP_V2_L2_SYNC), + RX_FILTER(PTP_V2_L2_DELAY_REQ), + RX_FILTER(PTP_V2_EVENT), + RX_FILTER(PTP_V2_SYNC), + RX_FILTER(PTP_V2_DELAY_REQ), +#undef RX_FILTER +}; +#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) + +static void usage(void) +{ + fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n" + "tx_type is any of (case-insensitive):\n", + stderr); + list_names(stderr, tx_types, N_TX_TYPES); + fputs("rx_filter is any of (case-insensitive):\n", stderr); + list_names(stderr, rx_filters, N_RX_FILTERS); +} + +int main(int argc, char **argv) +{ + struct ifreq ifr; + struct hwtstamp_config config; + const char *name; + int sock; + + if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) { + usage(); + return 2; + } + + if (argc == 4) { + config.flags = 0; + config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]); + config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]); + if (config.tx_type < 0 || config.rx_filter < 0) { + usage(); + return 2; + } + } + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + perror("socket"); + return 1; + } + + strcpy(ifr.ifr_name, argv[1]); + ifr.ifr_data = (caddr_t)&config; + + if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) { + perror("ioctl"); + return 1; + } + + printf("flags = %#x\n", config.flags); + name = lookup_name(tx_types, N_TX_TYPES, config.tx_type); + if (name) + printf("tx_type = %s\n", name); + else + printf("tx_type = %d\n", config.tx_type); + name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter); + if (name) + printf("rx_filter = %s\n", name); + else + printf("rx_filter = %d\n", config.rx_filter); + + return 0; +} diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 0f54333b0ff2..b6ce00b2be9a 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -547,13 +547,11 @@ helper functions described in Section 4. In that case, pm_runtime_resume() should be used. Of course, for this purpose the device's runtime PM has to be enabled earlier by calling pm_runtime_enable(). -If the device bus type's or driver's ->probe() callback runs -pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts, -they will fail returning -EAGAIN, because the device's usage counter is -incremented by the driver core before executing ->probe(). Still, it may be -desirable to suspend the device as soon as ->probe() has finished, so the driver -core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for -the device at that time. +It may be desirable to suspend the device once ->probe() has finished. +Therefore the driver core uses the asyncronous pm_request_idle() to submit a +request to execute the subsystem-level idle callback for the device at that +time. A driver that makes use of the runtime autosuspend feature, may want to +update the last busy mark before returning from ->probe(). Moreover, the driver core prevents runtime PM callbacks from racing with the bus notifier callback in __device_release_driver(), which is necessary, because the @@ -656,7 +654,7 @@ out the following operations: __pm_runtime_disable() with 'false' as the second argument for every device right before executing the subsystem-level .suspend_late() callback for it. - * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync() + * During system resume it calls pm_runtime_enable() and pm_runtime_put() for every device right after executing the subsystem-level .resume_early() callback and right after executing the subsystem-level .resume() callback for it, respectively. diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX index 414235c1fcfc..45c82fd3e9d3 100644 --- a/Documentation/security/00-INDEX +++ b/Documentation/security/00-INDEX @@ -22,3 +22,5 @@ keys.txt - description of the kernel key retention service. tomoyo.txt - documentation on the TOMOYO Linux Security Module. +IMA-templates.txt + - documentation on the template management mechanism for IMA. diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt new file mode 100644 index 000000000000..a777e5f1df5b --- /dev/null +++ b/Documentation/security/IMA-templates.txt @@ -0,0 +1,87 @@ + IMA Template Management Mechanism + + +==== INTRODUCTION ==== + +The original 'ima' template is fixed length, containing the filedata hash +and pathname. The filedata hash is limited to 20 bytes (md5/sha1). +The pathname is a null terminated string, limited to 255 characters. +To overcome these limitations and to add additional file metadata, it is +necessary to extend the current version of IMA by defining additional +templates. For example, information that could be possibly reported are +the inode UID/GID or the LSM labels either of the inode and of the process +that is accessing it. + +However, the main problem to introduce this feature is that, each time +a new template is defined, the functions that generate and display +the measurements list would include the code for handling a new format +and, thus, would significantly grow over the time. + +The proposed solution solves this problem by separating the template +management from the remaining IMA code. The core of this solution is the +definition of two new data structures: a template descriptor, to determine +which information should be included in the measurement list; a template +field, to generate and display data of a given type. + +Managing templates with these structures is very simple. To support +a new data type, developers define the field identifier and implement +two functions, init() and show(), respectively to generate and display +measurement entries. Defining a new template descriptor requires +specifying the template format, a string of field identifiers separated +by the '|' character. While in the current implementation it is possible +to define new template descriptors only by adding their definition in the +template specific code (ima_template.c), in a future version it will be +possible to register a new template on a running kernel by supplying to IMA +the desired format string. In this version, IMA initializes at boot time +all defined template descriptors by translating the format into an array +of template fields structures taken from the set of the supported ones. + +After the initialization step, IMA will call ima_alloc_init_template() +(new function defined within the patches for the new template management +mechanism) to generate a new measurement entry by using the template +descriptor chosen through the kernel configuration or through the newly +introduced 'ima_template=' kernel command line parameter. It is during this +phase that the advantages of the new architecture are clearly shown: +the latter function will not contain specific code to handle a given template +but, instead, it simply calls the init() method of the template fields +associated to the chosen template descriptor and store the result (pointer +to allocated data and data length) in the measurement entry structure. + +The same mechanism is employed to display measurements entries. +The functions ima[_ascii]_measurements_show() retrieve, for each entry, +the template descriptor used to produce that entry and call the show() +method for each item of the array of template fields structures. + + + +==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ==== + +In the following, there is the list of supported template fields +('': description), that can be used to define new template +descriptors by adding their identifier to the format string +(support for more data types will be added later): + + - 'd': the digest of the event (i.e. the digest of a measured file), + calculated with the SHA1 or MD5 hash algorithm; + - 'n': the name of the event (i.e. the file name), with size up to 255 bytes; + - 'd-ng': the digest of the event, calculated with an arbitrary hash + algorithm (field format: [:]digest, where the digest + prefix is shown only if the hash algorithm is not SHA1 or MD5); + - 'n-ng': the name of the event, without size limitations. + + +Below, there is the list of defined template descriptors: + - "ima": its format is 'd|n'; + - "ima-ng" (default): its format is 'd-ng|n-ng'. + + + +==== USE ==== + +To specify the template descriptor to be used to generate measurement entries, +currently the following methods are supported: + + - select a template descriptor among those supported in the kernel + configuration ('ima-ng' is the default choice); + - specify a template descriptor name from the kernel command line through + the 'ima_template=' parameter. diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 7b4145d00452..a4c33f1a7c6d 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -865,15 +865,14 @@ encountered: calling processes has a searchable link to the key from one of its keyrings. There are three functions for dealing with these: - key_ref_t make_key_ref(const struct key *key, - unsigned long possession); + key_ref_t make_key_ref(const struct key *key, bool possession); struct key *key_ref_to_ptr(const key_ref_t key_ref); - unsigned long is_key_possessed(const key_ref_t key_ref); + bool is_key_possessed(const key_ref_t key_ref); The first function constructs a key reference from a key pointer and - possession information (which must be 0 or 1 and not any other value). + possession information (which must be true or false). The second function retrieves the key pointer from a reference and the third retrieves the possession flag. @@ -961,14 +960,17 @@ payload contents" for more information. the argument will not be parsed. -(*) Extra references can be made to a key by calling the following function: +(*) Extra references can be made to a key by calling one of the following + functions: + struct key *__key_get(struct key *key); struct key *key_get(struct key *key); - These need to be disposed of by calling key_put() when they've been - finished with. The key pointer passed in will be returned. If the pointer - is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and - no increment will take place. + Keys so references will need to be disposed of by calling key_put() when + they've been finished with. The key pointer passed in will be returned. + + In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set + then the key will not be dereferenced and no increment will take place. (*) A key's serial number can be obtained by calling: diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 54d29c1320ed..230ce71f4d75 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -440,15 +440,15 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " /*\n" buf += " * Setup default attribute lists for various fabric->tf_cit_tmpl\n" buf += " */\n" - buf += " TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n" buf += " /*\n" buf += " * Register the fabric for use within TCM\n" buf += " */\n" diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt index f866c72291bf..a445da098bc6 100644 --- a/Documentation/unaligned-memory-access.txt +++ b/Documentation/unaligned-memory-access.txt @@ -137,24 +137,34 @@ Code that causes unaligned access ================================= With the above in mind, let's move onto a real life example of a function -that can cause an unaligned memory access. The following function adapted +that can cause an unaligned memory access. The following function taken from include/linux/etherdevice.h is an optimized routine to compare two ethernet MAC addresses for equality. -unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2) +bool ether_addr_equal(const u8 *addr1, const u8 *addr2) { - const u16 *a = (const u16 *) addr1; - const u16 *b = (const u16 *) addr2; +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) | + ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4))); + + return fold == 0; +#else + const u16 *a = (const u16 *)addr1; + const u16 *b = (const u16 *)addr2; return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +#endif } -In the above function, the reference to a[0] causes 2 bytes (16 bits) to -be read from memory starting at address addr1. Think about what would happen -if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned -access.) +In the above function, when the hardware has efficient unaligned access +capability, there is no issue with this code. But when the hardware isn't +able to access memory on arbitrary boundaries, the reference to a[0] causes +2 bytes (16 bits) to be read from memory starting at address addr1. + +Think about what would happen if addr1 was an odd address such as 0x10003. +(Hint: it'd be an unaligned access.) Despite the potential unaligned access problems with the above function, it -is included in the kernel anyway but is understood to only work on +is included in the kernel anyway but is understood to only work normally on 16-bit-aligned addresses. It is up to the caller to ensure this alignment or not use this function at all. This alignment-unsafe function is still useful as it is a decent optimization for the cases when you can ensure alignment, diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock index 7521d367f21d..6dea4fd5c961 100644 --- a/Documentation/vm/split_page_table_lock +++ b/Documentation/vm/split_page_table_lock @@ -63,9 +63,9 @@ levels. PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table allocation and pgtable_pmd_page_dtor() on freeing. -Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but -make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE -preallocate few PMDs on pgd_alloc(). +Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and +pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing +paths: i.e X86_PAE preallocate few PMDs on pgd_alloc(). With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. diff --git a/MAINTAINERS b/MAINTAINERS index bdf13e400450..53f0bf977369 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2131,6 +2131,11 @@ L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/chipidea/ +CHROME HARDWARE PLATFORM SUPPORT +M: Olof Johansson +S: Maintained +F: drivers/platform/chrome/ + CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti M: Sujith Sankar @@ -4033,6 +4038,12 @@ W: http://www.pharscape.org S: Maintained F: drivers/net/usb/hso.c +HSR NETWORK PROTOCOL +M: Arvid Brodin +L: netdev@vger.kernel.org +S: Maintained +F: net/hsr/ + HTCPEN TOUCHSCREEN DRIVER M: Pau Oliva Fora L: linux-input@vger.kernel.org @@ -4054,6 +4065,7 @@ F: arch/x86/include/uapi/asm/hyperv.h F: arch/x86/kernel/cpu/mshyperv.c F: drivers/hid/hid-hyperv.c F: drivers/hv/ +F: drivers/input/serio/hyperv-keyboard.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/hyperv_fb.c @@ -4438,10 +4450,8 @@ M: Bruce Allan M: Carolyn Wyborny M: Don Skidmore M: Greg Rose -M: Peter P Waskiewicz Jr M: Alex Duyck M: John Ronciak -M: Tushar Dave L: e1000-devel@lists.sourceforge.net W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ @@ -5244,7 +5254,7 @@ S: Maintained F: Documentation/lockdep*.txt F: Documentation/lockstat.txt F: include/linux/lockdep.h -F: kernel/lockdep* +F: kernel/locking/ LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) M: "Richard Russon (FlatCap)" @@ -7376,7 +7386,6 @@ S: Maintained F: kernel/sched/ F: include/linux/sched.h F: include/uapi/linux/sched.h -F: kernel/wait.c F: include/linux/wait.h SCORE ARCHITECTURE @@ -7512,9 +7521,10 @@ SELINUX SECURITY MODULE M: Stephen Smalley M: James Morris M: Eric Paris +M: Paul Moore L: selinux@tycho.nsa.gov (subscribers-only, general discussion) W: http://selinuxproject.org -T: git git://git.infradead.org/users/eparis/selinux.git +T: git git://git.infradead.org/users/pcmoore/selinux S: Supported F: include/linux/selinux* F: security/selinux/ @@ -8660,6 +8670,7 @@ F: drivers/media/usb/tm6000/ TPM DEVICE DRIVER M: Leonidas Da Silva Barbosa M: Ashley Lai +M: Peter Huewe M: Rajiv Andrade W: http://tpmdd.sourceforge.net M: Marcel Selhorst @@ -9518,8 +9529,8 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM P: Silicon Graphics Inc +M: Dave Chinner M: Ben Myers -M: Alex Elder M: xfs@oss.sgi.com L: xfs@oss.sgi.com W: http://oss.sgi.com/projects/xfs diff --git a/Makefile b/Makefile index 920ad07180c9..2c88e44a1dd4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 -PATCHLEVEL = 12 +PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc2 NAME = One Giant Leap for Frogkind # *DOCUMENTATION* diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 135c674eaf9e..d39dc9b95a2c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -16,8 +16,8 @@ config ALPHA select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select GENERIC_CLOCKEVENTS select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC @@ -488,6 +488,20 @@ config VGA_HOSE which always have multiple hoses, and whose consoles support it. +config ALPHA_QEMU + bool "Run under QEMU emulation" + depends on !ALPHA_GENERIC + ---help--- + Assume the presence of special features supported by QEMU PALcode + that reduce the overhead of system emulation. + + Generic kernels will auto-detect QEMU. But when building a + system-specific kernel, the assumption is that we want to + elimiate as many runtime tests as possible. + + If unsure, say N. + + config ALPHA_SRM bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME depends on TTY @@ -572,6 +586,30 @@ config NUMA Access). This option is for configuring high-end multiprocessor server machines. If in doubt, say N. +config ALPHA_WTINT + bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC + default y if ALPHA_QEMU + default n if ALPHA_EV5 || ALPHA_EV56 || (ALPHA_EV4 && !ALPHA_LCA) + default n if !ALPHA_SRM && !ALPHA_GENERIC + default y if SMP + ---help--- + The Wait for Interrupt (WTINT) PALcall attempts to place the CPU + to sleep until the next interrupt. This may reduce the power + consumed, and the heat produced by the computer. However, it has + the side effect of making the cycle counter unreliable as a timing + device across the sleep. + + For emulation under QEMU, definitely say Y here, as we have other + mechanisms for measuring time than the cycle counter. + + For EV4 (but not LCA), EV5 and EV56 systems, or for systems running + MILO, sleep mode is not supported so you might as well say N here. + + For SMP systems we cannot use the cycle counter for timing anyway, + so you might as well say Y here. + + If unsure, say N. + config NODES_SHIFT int default "7" @@ -613,9 +651,41 @@ config VERBOSE_MCHECK_ON Take the default (1) unless you want more control or more info. +choice + prompt "Timer interrupt frequency (HZ)?" + default HZ_128 if ALPHA_QEMU + default HZ_1200 if ALPHA_RAWHIDE + default HZ_1024 + ---help--- + The frequency at which timer interrupts occur. A high frequency + minimizes latency, whereas a low frequency minimizes overhead of + process accounting. The later effect is especially significant + when being run under QEMU. + + Note that some Alpha hardware cannot change the interrupt frequency + of the timer. If unsure, say 1024 (or 1200 for Rawhide). + + config HZ_32 + bool "32 Hz" + config HZ_64 + bool "64 Hz" + config HZ_128 + bool "128 Hz" + config HZ_256 + bool "256 Hz" + config HZ_1024 + bool "1024 Hz" + config HZ_1200 + bool "1200 Hz" +endchoice + config HZ - int - default 1200 if ALPHA_RAWHIDE + int + default 32 if HZ_32 + default 64 if HZ_64 + default 128 if HZ_128 + default 256 if HZ_256 + default 1200 if HZ_1200 default 1024 source "drivers/pci/Kconfig" diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index 72dbf2359270..75cb3641ed2f 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -33,6 +33,7 @@ struct alpha_machine_vector int nr_irqs; int rtc_port; + int rtc_boot_cpu_only; unsigned int max_asn; unsigned long max_isa_dma_address; unsigned long irq_probe_mask; @@ -95,9 +96,6 @@ struct alpha_machine_vector struct _alpha_agp_info *(*agp_info)(void); - unsigned int (*rtc_get_time)(struct rtc_time *); - int (*rtc_set_time)(struct rtc_time *); - const char *vector_name; /* NUMA information */ @@ -126,13 +124,19 @@ extern struct alpha_machine_vector alpha_mv; #ifdef CONFIG_ALPHA_GENERIC extern int alpha_using_srm; +extern int alpha_using_qemu; #else -#ifdef CONFIG_ALPHA_SRM -#define alpha_using_srm 1 -#else -#define alpha_using_srm 0 -#endif +# ifdef CONFIG_ALPHA_SRM +# define alpha_using_srm 1 +# else +# define alpha_using_srm 0 +# endif +# ifdef CONFIG_ALPHA_QEMU +# define alpha_using_qemu 1 +# else +# define alpha_using_qemu 0 +# endif #endif /* GENERIC */ -#endif +#endif /* __KERNEL__ */ #endif /* __ALPHA_MACHVEC_H */ diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h index 6fcd2b5b08f0..5422a47646fc 100644 --- a/arch/alpha/include/asm/pal.h +++ b/arch/alpha/include/asm/pal.h @@ -89,6 +89,7 @@ __CALL_PAL_W1(wrmces, unsigned long); __CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long); __CALL_PAL_W1(wrusp, unsigned long); __CALL_PAL_W1(wrvptptr, unsigned long); +__CALL_PAL_RW1(wtint, unsigned long, unsigned long); /* * TB routines.. @@ -111,5 +112,75 @@ __CALL_PAL_W1(wrvptptr, unsigned long); #define tbiap() __tbi(-1, /* no second argument */) #define tbia() __tbi(-2, /* no second argument */) +/* + * QEMU Cserv routines.. + */ + +static inline unsigned long +qemu_get_walltime(void) +{ + register unsigned long v0 __asm__("$0"); + register unsigned long a0 __asm__("$16") = 3; + + asm("call_pal %2 # cserve get_time" + : "=r"(v0), "+r"(a0) + : "i"(PAL_cserve) + : "$17", "$18", "$19", "$20", "$21"); + + return v0; +} + +static inline unsigned long +qemu_get_alarm(void) +{ + register unsigned long v0 __asm__("$0"); + register unsigned long a0 __asm__("$16") = 4; + + asm("call_pal %2 # cserve get_alarm" + : "=r"(v0), "+r"(a0) + : "i"(PAL_cserve) + : "$17", "$18", "$19", "$20", "$21"); + + return v0; +} + +static inline void +qemu_set_alarm_rel(unsigned long expire) +{ + register unsigned long a0 __asm__("$16") = 5; + register unsigned long a1 __asm__("$17") = expire; + + asm volatile("call_pal %2 # cserve set_alarm_rel" + : "+r"(a0), "+r"(a1) + : "i"(PAL_cserve) + : "$0", "$18", "$19", "$20", "$21"); +} + +static inline void +qemu_set_alarm_abs(unsigned long expire) +{ + register unsigned long a0 __asm__("$16") = 6; + register unsigned long a1 __asm__("$17") = expire; + + asm volatile("call_pal %2 # cserve set_alarm_abs" + : "+r"(a0), "+r"(a1) + : "i"(PAL_cserve) + : "$0", "$18", "$19", "$20", "$21"); +} + +static inline unsigned long +qemu_get_vmtime(void) +{ + register unsigned long v0 __asm__("$0"); + register unsigned long a0 __asm__("$16") = 7; + + asm("call_pal %2 # cserve get_time" + : "=r"(v0), "+r"(a0) + : "i"(PAL_cserve) + : "$17", "$18", "$19", "$20", "$21"); + + return v0; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ALPHA_PAL_H */ diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h index d70408d36677..f71c3b0ed360 100644 --- a/arch/alpha/include/asm/rtc.h +++ b/arch/alpha/include/asm/rtc.h @@ -1,12 +1 @@ -#ifndef _ALPHA_RTC_H -#define _ALPHA_RTC_H - -#if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) \ - || defined(CONFIG_ALPHA_GENERIC) -# define get_rtc_time alpha_mv.rtc_get_time -# define set_rtc_time alpha_mv.rtc_set_time -#endif - #include - -#endif diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h index b02b8a282940..c2911f591704 100644 --- a/arch/alpha/include/asm/string.h +++ b/arch/alpha/include/asm/string.h @@ -22,15 +22,27 @@ extern void * __memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMSET extern void * __constant_c_memset(void *, unsigned long, size_t); +extern void * ___memset(void *, int, size_t); extern void * __memset(void *, int, size_t); extern void * memset(void *, int, size_t); -#define memset(s, c, n) \ -(__builtin_constant_p(c) \ - ? (__builtin_constant_p(n) && (c) == 0 \ - ? __builtin_memset((s),0,(n)) \ - : __constant_c_memset((s),0x0101010101010101UL*(unsigned char)(c),(n))) \ - : __memset((s),(c),(n))) +/* For gcc 3.x, we cannot have the inline function named "memset" because + the __builtin_memset will attempt to resolve to the inline as well, + leading to a "sorry" about unimplemented recursive inlining. */ +extern inline void *__memset(void *s, int c, size_t n) +{ + if (__builtin_constant_p(c)) { + if (__builtin_constant_p(n)) { + return __builtin_memset(s, c, n); + } else { + unsigned long c8 = (c & 0xff) * 0x0101010101010101UL; + return __constant_c_memset(s, c8, n); + } + } + return ___memset(s, c, n); +} + +#define memset __memset #define __HAVE_ARCH_STRCPY extern char * strcpy(char *,const char *); diff --git a/arch/alpha/include/uapi/asm/pal.h b/arch/alpha/include/uapi/asm/pal.h index 3c0ce08e5f59..dfc8140b9088 100644 --- a/arch/alpha/include/uapi/asm/pal.h +++ b/arch/alpha/include/uapi/asm/pal.h @@ -46,6 +46,7 @@ #define PAL_rdusp 58 #define PAL_whami 60 #define PAL_retsys 61 +#define PAL_wtint 62 #define PAL_rti 63 diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 84ec46b38f7d..0d54650e78fc 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o obj-$(CONFIG_SRM_ENV) += srm_env.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_RTC_DRV_ALPHA) += rtc.o ifdef CONFIG_ALPHA_GENERIC diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c index 89566b346c0f..f4c7ab6f43b0 100644 --- a/arch/alpha/kernel/alpha_ksyms.c +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -40,6 +40,7 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(___memset); EXPORT_SYMBOL(__memsetw); EXPORT_SYMBOL(__constant_c_memset); EXPORT_SYMBOL(copy_page); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 28e4429596f3..1c8625cb0e25 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -66,21 +66,7 @@ do_entInt(unsigned long type, unsigned long vector, break; case 1: old_regs = set_irq_regs(regs); -#ifdef CONFIG_SMP - { - long cpu; - - smp_percpu_timer_interrupt(regs); - cpu = smp_processor_id(); - if (cpu != boot_cpuid) { - kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ)); - } else { - handle_irq(RTC_IRQ); - } - } -#else handle_irq(RTC_IRQ); -#endif set_irq_regs(old_regs); return; case 2: @@ -228,7 +214,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, */ struct irqaction timer_irqaction = { - .handler = timer_interrupt, + .handler = rtc_timer_interrupt, .name = "timer", }; diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h index 7fa62488bd16..f54bdf658cd0 100644 --- a/arch/alpha/kernel/machvec_impl.h +++ b/arch/alpha/kernel/machvec_impl.h @@ -43,10 +43,7 @@ #define CAT1(x,y) x##y #define CAT(x,y) CAT1(x,y) -#define DO_DEFAULT_RTC \ - .rtc_port = 0x70, \ - .rtc_get_time = common_get_rtc_time, \ - .rtc_set_time = common_set_rtc_time +#define DO_DEFAULT_RTC .rtc_port = 0x70 #define DO_EV4_MMU \ .max_asn = EV4_MAX_ASN, \ diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index d821b17047e0..c52e7f0ee5f6 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -83,6 +83,8 @@ struct alpha_pmu_t { long pmc_left[3]; /* Subroutine for allocation of PMCs. Enforces constraints. */ int (*check_constraints)(struct perf_event **, unsigned long *, int); + /* Subroutine for checking validity of a raw event for this PMU. */ + int (*raw_event_valid)(u64 config); }; /* @@ -203,6 +205,12 @@ success: } +static int ev67_raw_event_valid(u64 config) +{ + return config >= EV67_CYCLES && config < EV67_LAST_ET; +}; + + static const struct alpha_pmu_t ev67_pmu = { .event_map = ev67_perfmon_event_map, .max_events = ARRAY_SIZE(ev67_perfmon_event_map), @@ -211,7 +219,8 @@ static const struct alpha_pmu_t ev67_pmu = { .pmc_count_mask = {EV67_PCTR_0_COUNT_MASK, EV67_PCTR_1_COUNT_MASK, 0}, .pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0}, .pmc_left = {16, 4, 0}, - .check_constraints = ev67_check_constraints + .check_constraints = ev67_check_constraints, + .raw_event_valid = ev67_raw_event_valid, }; @@ -609,7 +618,9 @@ static int __hw_perf_event_init(struct perf_event *event) } else if (attr->type == PERF_TYPE_HW_CACHE) { return -EOPNOTSUPP; } else if (attr->type == PERF_TYPE_RAW) { - ev = attr->config & 0xff; + if (!alpha_pmu->raw_event_valid(attr->config)) + return -EINVAL; + ev = attr->config; } else { return -EOPNOTSUPP; } diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index f2360a74e5d5..1941a07b5811 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -46,6 +46,23 @@ void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL(pm_power_off); +#ifdef CONFIG_ALPHA_WTINT +/* + * Sleep the CPU. + * EV6, LCA45 and QEMU know how to power down, skipping N timer interrupts. + */ +void arch_cpu_idle(void) +{ + wtint(0); + local_irq_enable(); +} + +void arch_cpu_idle_dead(void) +{ + wtint(INT_MAX); +} +#endif /* ALPHA_WTINT */ + struct halt_info { int mode; char *restart_cmd; diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index d3e52d3fd592..da2d6ec9c370 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -135,17 +135,15 @@ extern void unregister_srm_console(void); /* smp.c */ extern void setup_smp(void); extern void handle_ipi(struct pt_regs *); -extern void smp_percpu_timer_interrupt(struct pt_regs *); /* bios32.c */ /* extern void reset_for_srm(void); */ /* time.c */ -extern irqreturn_t timer_interrupt(int irq, void *dev); +extern irqreturn_t rtc_timer_interrupt(int irq, void *dev); +extern void init_clockevent(void); extern void common_init_rtc(void); extern unsigned long est_cycle_freq; -extern unsigned int common_get_rtc_time(struct rtc_time *time); -extern int common_set_rtc_time(struct rtc_time *time); /* smc37c93x.c */ extern void SMC93x_Init(void); diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c new file mode 100644 index 000000000000..c8d284d8521f --- /dev/null +++ b/arch/alpha/kernel/rtc.c @@ -0,0 +1,323 @@ +/* + * linux/arch/alpha/kernel/rtc.c + * + * Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds + * + * This file contains date handling. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "proto.h" + + +/* + * Support for the RTC device. + * + * We don't want to use the rtc-cmos driver, because we don't want to support + * alarms, as that would be indistinguishable from timer interrupts. + * + * Further, generic code is really, really tied to a 1900 epoch. This is + * true in __get_rtc_time as well as the users of struct rtc_time e.g. + * rtc_tm_to_time. Thankfully all of the other epochs in use are later + * than 1900, and so it's easy to adjust. + */ + +static unsigned long rtc_epoch; + +static int __init +specifiy_epoch(char *str) +{ + unsigned long epoch = simple_strtoul(str, NULL, 0); + if (epoch < 1900) + printk("Ignoring invalid user specified epoch %lu\n", epoch); + else + rtc_epoch = epoch; + return 1; +} +__setup("epoch=", specifiy_epoch); + +static void __init +init_rtc_epoch(void) +{ + int epoch, year, ctrl; + + if (rtc_epoch != 0) { + /* The epoch was specified on the command-line. */ + return; + } + + /* Detect the epoch in use on this computer. */ + ctrl = CMOS_READ(RTC_CONTROL); + year = CMOS_READ(RTC_YEAR); + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + year = bcd2bin(year); + + /* PC-like is standard; used for year >= 70 */ + epoch = 1900; + if (year < 20) { + epoch = 2000; + } else if (year >= 20 && year < 48) { + /* NT epoch */ + epoch = 1980; + } else if (year >= 48 && year < 70) { + /* Digital UNIX epoch */ + epoch = 1952; + } + rtc_epoch = epoch; + + printk(KERN_INFO "Using epoch %d for rtc year %d\n", epoch, year); +} + +static int +alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + __get_rtc_time(tm); + + /* Adjust for non-default epochs. It's easier to depend on the + generic __get_rtc_time and adjust the epoch here than create + a copy of __get_rtc_time with the edits we need. */ + if (rtc_epoch != 1900) { + int year = tm->tm_year; + /* Undo the century adjustment made in __get_rtc_time. */ + if (year >= 100) + year -= 100; + year += rtc_epoch - 1900; + /* Redo the century adjustment with the epoch in place. */ + if (year <= 69) + year += 100; + tm->tm_year = year; + } + + return rtc_valid_tm(tm); +} + +static int +alpha_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct rtc_time xtm; + + if (rtc_epoch != 1900) { + xtm = *tm; + xtm.tm_year -= rtc_epoch - 1900; + tm = &xtm; + } + + return __set_rtc_time(tm); +} + +static int +alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + /* Note: This code only updates minutes and seconds. Comments + indicate this was to avoid messing with unknown time zones, + and with the epoch nonsense described above. In order for + this to work, the existing clock cannot be off by more than + 15 minutes. + + ??? This choice is may be out of date. The x86 port does + not have problems with timezones, and the epoch processing has + now been fixed in alpha_set_rtc_time. + + In either case, one can always force a full rtc update with + the userland hwclock program, so surely 15 minute accuracy + is no real burden. */ + + /* In order to set the CMOS clock precisely, we have to be called + 500 ms after the second nowtime has started, because when + nowtime is written into the registers of the CMOS clock, it will + jump to the next second precisely 500 ms later. Check the Motorola + MC146818A or Dallas DS12887 data sheet for details. */ + + /* irq are locally disabled here */ + spin_lock(&rtc_lock); + /* Tell the clock it's being set */ + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + /* Stop and reset prescaler */ + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + cmos_minutes = bcd2bin(cmos_minutes); + + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) { + /* correct for half hour time zone */ + real_minutes += 30; + } + real_minutes %= 60; + + if (abs(real_minutes - cmos_minutes) < 30) { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + real_seconds = bin2bcd(real_seconds); + real_minutes = bin2bcd(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } else { + printk_once(KERN_NOTICE + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); + + return retval; +} + +static int +alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case RTC_EPOCH_READ: + return put_user(rtc_epoch, (unsigned long __user *)arg); + case RTC_EPOCH_SET: + if (arg < 1900) + return -EINVAL; + rtc_epoch = arg; + return 0; + default: + return -ENOIOCTLCMD; + } +} + +static const struct rtc_class_ops alpha_rtc_ops = { + .read_time = alpha_rtc_read_time, + .set_time = alpha_rtc_set_time, + .set_mmss = alpha_rtc_set_mmss, + .ioctl = alpha_rtc_ioctl, +}; + +/* + * Similarly, except do the actual CMOS access on the boot cpu only. + * This requires marshalling the data across an interprocessor call. + */ + +#if defined(CONFIG_SMP) && \ + (defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_MARVEL)) +# define HAVE_REMOTE_RTC 1 + +union remote_data { + struct rtc_time *tm; + unsigned long now; + long retval; +}; + +static void +do_remote_read(void *data) +{ + union remote_data *x = data; + x->retval = alpha_rtc_read_time(NULL, x->tm); +} + +static int +remote_read_time(struct device *dev, struct rtc_time *tm) +{ + union remote_data x; + if (smp_processor_id() != boot_cpuid) { + x.tm = tm; + smp_call_function_single(boot_cpuid, do_remote_read, &x, 1); + return x.retval; + } + return alpha_rtc_read_time(NULL, tm); +} + +static void +do_remote_set(void *data) +{ + union remote_data *x = data; + x->retval = alpha_rtc_set_time(NULL, x->tm); +} + +static int +remote_set_time(struct device *dev, struct rtc_time *tm) +{ + union remote_data x; + if (smp_processor_id() != boot_cpuid) { + x.tm = tm; + smp_call_function_single(boot_cpuid, do_remote_set, &x, 1); + return x.retval; + } + return alpha_rtc_set_time(NULL, tm); +} + +static void +do_remote_mmss(void *data) +{ + union remote_data *x = data; + x->retval = alpha_rtc_set_mmss(NULL, x->now); +} + +static int +remote_set_mmss(struct device *dev, unsigned long now) +{ + union remote_data x; + if (smp_processor_id() != boot_cpuid) { + x.now = now; + smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1); + return x.retval; + } + return alpha_rtc_set_mmss(NULL, now); +} + +static const struct rtc_class_ops remote_rtc_ops = { + .read_time = remote_read_time, + .set_time = remote_set_time, + .set_mmss = remote_set_mmss, + .ioctl = alpha_rtc_ioctl, +}; +#endif + +static int __init +alpha_rtc_init(void) +{ + const struct rtc_class_ops *ops; + struct platform_device *pdev; + struct rtc_device *rtc; + const char *name; + + init_rtc_epoch(); + name = "rtc-alpha"; + ops = &alpha_rtc_ops; + +#ifdef HAVE_REMOTE_RTC + if (alpha_mv.rtc_boot_cpu_only) + ops = &remote_rtc_ops; +#endif + + pdev = platform_device_register_simple(name, -1, NULL, 0); + rtc = devm_rtc_device_register(&pdev->dev, name, ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + platform_set_drvdata(pdev, rtc); + return 0; +} +device_initcall(alpha_rtc_init); diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 9e3107cc5ebb..b20af76f12c1 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -115,10 +115,17 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE; #ifdef CONFIG_ALPHA_GENERIC struct alpha_machine_vector alpha_mv; +#endif + +#ifndef alpha_using_srm int alpha_using_srm; EXPORT_SYMBOL(alpha_using_srm); #endif +#ifndef alpha_using_qemu +int alpha_using_qemu; +#endif + static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long, unsigned long); static struct alpha_machine_vector *get_sysvec_byname(const char *); @@ -529,11 +536,15 @@ setup_arch(char **cmdline_p) atomic_notifier_chain_register(&panic_notifier_list, &alpha_panic_block); -#ifdef CONFIG_ALPHA_GENERIC +#ifndef alpha_using_srm /* Assume that we've booted from SRM if we haven't booted from MILO. Detect the later by looking for "MILO" in the system serial nr. */ alpha_using_srm = strncmp((const char *)hwrpb->ssn, "MILO", 4) != 0; #endif +#ifndef alpha_using_qemu + /* Similarly, look for QEMU. */ + alpha_using_qemu = strstr((const char *)hwrpb->ssn, "QEMU") != 0; +#endif /* If we are using SRM, we want to allow callbacks as early as possible, so do this NOW, and then @@ -1207,6 +1218,7 @@ show_cpuinfo(struct seq_file *f, void *slot) char *systype_name; char *sysvariation_name; int nr_processors; + unsigned long timer_freq; cpu_index = (unsigned) (cpu->type - 1); cpu_name = "Unknown"; @@ -1218,6 +1230,12 @@ show_cpuinfo(struct seq_file *f, void *slot) nr_processors = get_nr_processors(cpu, hwrpb->nr_processors); +#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200 + timer_freq = (100UL * hwrpb->intr_freq) / 4096; +#else + timer_freq = 100UL * CONFIG_HZ; +#endif + seq_printf(f, "cpu\t\t\t: Alpha\n" "cpu model\t\t: %s\n" "cpu variation\t\t: %ld\n" @@ -1243,8 +1261,7 @@ show_cpuinfo(struct seq_file *f, void *slot) (char*)hwrpb->ssn, est_cycle_freq ? : hwrpb->cycle_freq, est_cycle_freq ? "est." : "", - hwrpb->intr_freq / 4096, - (100 * hwrpb->intr_freq / 4096) % 100, + timer_freq / 100, timer_freq % 100, hwrpb->pagesize, hwrpb->pa_bits, hwrpb->max_asn, diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 9dbbcb3b9146..99ac36d5de4e 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -138,9 +138,11 @@ smp_callin(void) /* Get our local ticker going. */ smp_setup_percpu_timer(cpuid); + init_clockevent(); /* Call platform-specific callin, if specified */ - if (alpha_mv.smp_callin) alpha_mv.smp_callin(); + if (alpha_mv.smp_callin) + alpha_mv.smp_callin(); /* All kernel threads share the same mm context. */ atomic_inc(&init_mm.mm_count); @@ -498,35 +500,6 @@ smp_cpus_done(unsigned int max_cpus) ((bogosum + 2500) / (5000/HZ)) % 100); } - -void -smp_percpu_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - int cpu = smp_processor_id(); - unsigned long user = user_mode(regs); - struct cpuinfo_alpha *data = &cpu_data[cpu]; - - old_regs = set_irq_regs(regs); - - /* Record kernel PC. */ - profile_tick(CPU_PROFILING); - - if (!--data->prof_counter) { - /* We need to make like a normal interrupt -- otherwise - timer interrupts ignore the global interrupt lock, - which would be a Bad Thing. */ - irq_enter(); - - update_process_times(user); - - data->prof_counter = data->prof_multiplier; - - irq_exit(); - } - set_irq_regs(old_regs); -} - int setup_profiling_timer(unsigned int multiplier) { diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 5a0af11b3a61..608f2a7fa0a3 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -224,8 +224,6 @@ struct alpha_machine_vector jensen_mv __initmv = { .machine_check = jensen_machine_check, .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, .rtc_port = 0x170, - .rtc_get_time = common_get_rtc_time, - .rtc_set_time = common_set_rtc_time, .nr_irqs = 16, .device_interrupt = jensen_device_interrupt, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index c92e389ff219..f21d61fab678 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "proto.h" #include "err_impl.h" @@ -400,57 +399,6 @@ marvel_init_rtc(void) init_rtc_irq(); } -struct marvel_rtc_time { - struct rtc_time *time; - int retval; -}; - -#ifdef CONFIG_SMP -static void -smp_get_rtc_time(void *data) -{ - struct marvel_rtc_time *mrt = data; - mrt->retval = __get_rtc_time(mrt->time); -} - -static void -smp_set_rtc_time(void *data) -{ - struct marvel_rtc_time *mrt = data; - mrt->retval = __set_rtc_time(mrt->time); -} -#endif - -static unsigned int -marvel_get_rtc_time(struct rtc_time *time) -{ -#ifdef CONFIG_SMP - struct marvel_rtc_time mrt; - - if (smp_processor_id() != boot_cpuid) { - mrt.time = time; - smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1); - return mrt.retval; - } -#endif - return __get_rtc_time(time); -} - -static int -marvel_set_rtc_time(struct rtc_time *time) -{ -#ifdef CONFIG_SMP - struct marvel_rtc_time mrt; - - if (smp_processor_id() != boot_cpuid) { - mrt.time = time; - smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1); - return mrt.retval; - } -#endif - return __set_rtc_time(time); -} - static void marvel_smp_callin(void) { @@ -492,8 +440,7 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = { .vector_name = "MARVEL/EV7", DO_EV7_MMU, .rtc_port = 0x70, - .rtc_get_time = marvel_get_rtc_time, - .rtc_set_time = marvel_set_rtc_time, + .rtc_boot_cpu_only = 1, DO_MARVEL_IO, .machine_check = marvel_machine_check, .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index ea3395036556..ee39cee8064c 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -3,13 +3,7 @@ * * Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update + * This file contains the clocksource time handling. * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 * "A Kernel Model for Precision Timekeeping" by Dave Mills * 1997-01-09 Adrian Sun @@ -21,9 +15,6 @@ * 1999-04-16 Thorsten Kranzkowski (dl8bcu@gmx.net) * fixed algorithm in do_gettimeofday() for calculating the precise time * from processor cycle counter (now taking lost_ticks into account) - * 2000-08-13 Jan-Benedict Glaw - * Fixed time_init to be aware of epoches != 1900. This prevents - * booting up in 2048 for me;) Code is stolen from rtc.c. * 2003-06-03 R. Scott Bailey * Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM */ @@ -46,40 +37,19 @@ #include #include #include -#include #include #include #include #include +#include #include "proto.h" #include "irq_impl.h" -static int set_rtc_mmss(unsigned long); - DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -#define TICK_SIZE (tick_nsec / 1000) - -/* - * Shift amount by which scaled_ticks_per_cycle is scaled. Shifting - * by 48 gives us 16 bits for HZ while keeping the accuracy good even - * for large CPU clock rates. - */ -#define FIX_SHIFT 48 - -/* lump static variables together for more efficient access: */ -static struct { - /* cycle counter last time it got invoked */ - __u32 last_time; - /* ticks/cycle * 2^48 */ - unsigned long scaled_ticks_per_cycle; - /* partial unused tick */ - unsigned long partial_tick; -} state; - unsigned long est_cycle_freq; #ifdef CONFIG_IRQ_WORK @@ -108,109 +78,156 @@ static inline __u32 rpcc(void) return __builtin_alpha_rpcc(); } -int update_persistent_clock(struct timespec now) -{ - return set_rtc_mmss(now.tv_sec); -} -void read_persistent_clock(struct timespec *ts) + +/* + * The RTC as a clock_event_device primitive. + */ + +static DEFINE_PER_CPU(struct clock_event_device, cpu_ce); + +irqreturn_t +rtc_timer_interrupt(int irq, void *dev) { - unsigned int year, mon, day, hour, min, sec, epoch; - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - sec = bcd2bin(sec); - min = bcd2bin(min); - hour = bcd2bin(hour); - day = bcd2bin(day); - mon = bcd2bin(mon); - year = bcd2bin(year); - } + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); - /* PC-like is standard; used for year >= 70 */ - epoch = 1900; - if (year < 20) - epoch = 2000; - else if (year >= 20 && year < 48) - /* NT epoch */ - epoch = 1980; - else if (year >= 48 && year < 70) - /* Digital UNIX epoch */ - epoch = 1952; + /* Don't run the hook for UNUSED or SHUTDOWN. */ + if (likely(ce->mode == CLOCK_EVT_MODE_PERIODIC)) + ce->event_handler(ce); - printk(KERN_INFO "Using epoch = %d\n", epoch); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } - if ((year += epoch) < 1970) - year += 100; + return IRQ_HANDLED; +} - ts->tv_sec = mktime(year, mon, day, hour, min, sec); - ts->tv_nsec = 0; +static void +rtc_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce) +{ + /* The mode member of CE is updated in generic code. + Since we only support periodic events, nothing to do. */ +} + +static int +rtc_ce_set_next_event(unsigned long evt, struct clock_event_device *ce) +{ + /* This hook is for oneshot mode, which we don't support. */ + return -EINVAL; } +static void __init +init_rtc_clockevent(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); + + *ce = (struct clock_event_device){ + .name = "rtc", + .features = CLOCK_EVT_FEAT_PERIODIC, + .rating = 100, + .cpumask = cpumask_of(cpu), + .set_mode = rtc_ce_set_mode, + .set_next_event = rtc_ce_set_next_event, + }; + clockevents_config_and_register(ce, CONFIG_HZ, 0, 0); +} + /* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "xtime_update()" routine every clocktick + * The QEMU clock as a clocksource primitive. */ -irqreturn_t timer_interrupt(int irq, void *dev) + +static cycle_t +qemu_cs_read(struct clocksource *cs) { - unsigned long delta; - __u32 now; - long nticks; + return qemu_get_vmtime(); +} -#ifndef CONFIG_SMP - /* Not SMP, do kernel PC profiling here. */ - profile_tick(CPU_PROFILING); -#endif +static struct clocksource qemu_cs = { + .name = "qemu", + .rating = 400, + .read = qemu_cs_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .max_idle_ns = LONG_MAX +}; - /* - * Calculate how many ticks have passed since the last update, - * including any previous partial leftover. Save any resulting - * fraction for the next pass. - */ - now = rpcc(); - delta = now - state.last_time; - state.last_time = now; - delta = delta * state.scaled_ticks_per_cycle + state.partial_tick; - state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); - nticks = delta >> FIX_SHIFT; - if (nticks) - xtime_update(nticks); +/* + * The QEMU alarm as a clock_event_device primitive. + */ - if (test_irq_work_pending()) { - clear_irq_work_pending(); - irq_work_run(); - } +static void +qemu_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce) +{ + /* The mode member of CE is updated for us in generic code. + Just make sure that the event is disabled. */ + qemu_set_alarm_abs(0); +} -#ifndef CONFIG_SMP - while (nticks--) - update_process_times(user_mode(get_irq_regs())); -#endif +static int +qemu_ce_set_next_event(unsigned long evt, struct clock_event_device *ce) +{ + qemu_set_alarm_rel(evt); + return 0; +} +static irqreturn_t +qemu_timer_interrupt(int irq, void *dev) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); + + ce->event_handler(ce); return IRQ_HANDLED; } +static void __init +init_qemu_clockevent(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); + + *ce = (struct clock_event_device){ + .name = "qemu", + .features = CLOCK_EVT_FEAT_ONESHOT, + .rating = 400, + .cpumask = cpumask_of(cpu), + .set_mode = qemu_ce_set_mode, + .set_next_event = qemu_ce_set_next_event, + }; + + clockevents_config_and_register(ce, NSEC_PER_SEC, 1000, LONG_MAX); +} + + void __init common_init_rtc(void) { - unsigned char x; + unsigned char x, sel = 0; /* Reset periodic interrupt frequency. */ - x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f; - /* Test includes known working values on various platforms - where 0x26 is wrong; we refuse to change those. */ - if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) { - printk("Setting RTC_FREQ to 1024 Hz (%x)\n", x); - CMOS_WRITE(0x26, RTC_FREQ_SELECT); +#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200 + x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f; + /* Test includes known working values on various platforms + where 0x26 is wrong; we refuse to change those. */ + if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) { + sel = RTC_REF_CLCK_32KHZ + 6; } +#elif CONFIG_HZ == 256 || CONFIG_HZ == 128 || CONFIG_HZ == 64 || CONFIG_HZ == 32 + sel = RTC_REF_CLCK_32KHZ + __builtin_ffs(32768 / CONFIG_HZ); +#else +# error "Unknown HZ from arch/alpha/Kconfig" +#endif + if (sel) { + printk(KERN_INFO "Setting RTC_FREQ to %d Hz (%x)\n", + CONFIG_HZ, sel); + CMOS_WRITE(sel, RTC_FREQ_SELECT); + } /* Turn on periodic interrupts. */ x = CMOS_READ(RTC_CONTROL); @@ -233,16 +250,37 @@ common_init_rtc(void) init_rtc_irq(); } -unsigned int common_get_rtc_time(struct rtc_time *time) -{ - return __get_rtc_time(time); -} + +#ifndef CONFIG_ALPHA_WTINT +/* + * The RPCC as a clocksource primitive. + * + * While we have free-running timecounters running on all CPUs, and we make + * a half-hearted attempt in init_rtc_rpcc_info to sync the timecounter + * with the wall clock, that initialization isn't kept up-to-date across + * different time counters in SMP mode. Therefore we can only use this + * method when there's only one CPU enabled. + * + * When using the WTINT PALcall, the RPCC may shift to a lower frequency, + * or stop altogether, while waiting for the interrupt. Therefore we cannot + * use this method when WTINT is in use. + */ -int common_set_rtc_time(struct rtc_time *time) +static cycle_t read_rpcc(struct clocksource *cs) { - return __set_rtc_time(time); + return rpcc(); } +static struct clocksource clocksource_rpcc = { + .name = "rpcc", + .rating = 300, + .read = read_rpcc, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_IS_CONTINUOUS +}; +#endif /* ALPHA_WTINT */ + + /* Validate a computed cycle counter result against the known bounds for the given processor core. There's too much brokenness in the way of timing hardware for any one method to work everywhere. :-( @@ -353,33 +391,6 @@ rpcc_after_update_in_progress(void) return rpcc(); } -#ifndef CONFIG_SMP -/* Until and unless we figure out how to get cpu cycle counters - in sync and keep them there, we can't use the rpcc. */ -static cycle_t read_rpcc(struct clocksource *cs) -{ - cycle_t ret = (cycle_t)rpcc(); - return ret; -} - -static struct clocksource clocksource_rpcc = { - .name = "rpcc", - .rating = 300, - .read = read_rpcc, - .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS -}; - -static inline void register_rpcc_clocksource(long cycle_freq) -{ - clocksource_register_hz(&clocksource_rpcc, cycle_freq); -} -#else /* !CONFIG_SMP */ -static inline void register_rpcc_clocksource(long cycle_freq) -{ -} -#endif /* !CONFIG_SMP */ - void __init time_init(void) { @@ -387,6 +398,15 @@ time_init(void) unsigned long cycle_freq, tolerance; long diff; + if (alpha_using_qemu) { + clocksource_register_hz(&qemu_cs, NSEC_PER_SEC); + init_qemu_clockevent(); + + timer_irqaction.handler = qemu_timer_interrupt; + init_rtc_irq(); + return; + } + /* Calibrate CPU clock -- attempt #1. */ if (!est_cycle_freq) est_cycle_freq = validate_cc_value(calibrate_cc_with_pit()); @@ -421,100 +441,25 @@ time_init(void) "and unable to estimate a proper value!\n"); } - /* From John Bowman : allow the values - to settle, as the Update-In-Progress bit going low isn't good - enough on some hardware. 2ms is our guess; we haven't found - bogomips yet, but this is close on a 500Mhz box. */ - __delay(1000000); - - - if (HZ > (1<<16)) { - extern void __you_loose (void); - __you_loose(); - } - - register_rpcc_clocksource(cycle_freq); - - state.last_time = cc1; - state.scaled_ticks_per_cycle - = ((unsigned long) HZ << FIX_SHIFT) / cycle_freq; - state.partial_tick = 0L; + /* See above for restrictions on using clocksource_rpcc. */ +#ifndef CONFIG_ALPHA_WTINT + if (hwrpb->nr_processors == 1) + clocksource_register_hz(&clocksource_rpcc, cycle_freq); +#endif /* Startup the timer source. */ alpha_mv.init_rtc(); + init_rtc_clockevent(); } -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you won't notice until after reboot! - */ - - -static int -set_rtc_mmss(unsigned long nowtime) +/* Initialize the clock_event_device for secondary cpus. */ +#ifdef CONFIG_SMP +void __init +init_clockevent(void) { - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - /* irq are locally disabled here */ - spin_lock(&rtc_lock); - /* Tell the clock it's being set */ - save_control = CMOS_READ(RTC_CONTROL); - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - /* Stop and reset prescaler */ - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - cmos_minutes = bcd2bin(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) { - /* correct for half hour time zone */ - real_minutes += 30; - } - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - real_seconds = bin2bcd(real_seconds); - real_minutes = bin2bcd(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } else { - printk_once(KERN_NOTICE - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return retval; + if (alpha_using_qemu) + init_qemu_clockevent(); + else + init_rtc_clockevent(); } +#endif diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index bd0665cdc840..9c4c189eb22f 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -241,6 +241,21 @@ do_entIF(unsigned long type, struct pt_regs *regs) (const char *)(data[1] | (long)data[2] << 32), data[0]); } +#ifdef CONFIG_ALPHA_WTINT + if (type == 4) { + /* If CALL_PAL WTINT is totally unsupported by the + PALcode, e.g. MILO, "emulate" it by overwriting + the insn. */ + unsigned int *pinsn + = (unsigned int *) regs->pc - 1; + if (*pinsn == PAL_wtint) { + *pinsn = 0x47e01400; /* mov 0,$0 */ + imb(); + regs->r0 = 0; + return; + } + } +#endif /* ALPHA_WTINT */ die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), regs, type, NULL); } diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c index ffb19b7da999..ff3c10721caf 100644 --- a/arch/alpha/lib/csum_partial_copy.c +++ b/arch/alpha/lib/csum_partial_copy.c @@ -130,7 +130,7 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst, *dst = word | tmp; checksum += carry; } - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -185,7 +185,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, *dst = word | tmp; checksum += carry; } - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -242,7 +242,7 @@ csum_partial_cfu_src_aligned(const unsigned long __user *src, stq_u(partial_dest | second_dest, dst); out: checksum += carry; - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src, stq_u(partial_dest | word | second_dest, dst); checksum += carry; } - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -339,7 +339,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, if (len) { if (!access_ok(VERIFY_READ, src, len)) { - *errp = -EFAULT; + if (errp) *errp = -EFAULT; memset(dst, 0, len); return sum; } diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S index d8b94e1c7fca..356bb2fdd705 100644 --- a/arch/alpha/lib/ev6-memset.S +++ b/arch/alpha/lib/ev6-memset.S @@ -30,14 +30,15 @@ .set noat .set noreorder .text + .globl memset .globl __memset + .globl ___memset .globl __memsetw .globl __constant_c_memset - .globl memset - .ent __memset + .ent ___memset .align 5 -__memset: +___memset: .frame $30,0,$26,0 .prologue 0 @@ -227,7 +228,7 @@ end_b: nop nop ret $31,($26),1 # L0 : - .end __memset + .end ___memset /* * This is the original body of code, prior to replication and @@ -594,4 +595,5 @@ end_w: .end __memsetw -memset = __memset +memset = ___memset +__memset = ___memset diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S index 311b8cfc6914..76ccc6d1f364 100644 --- a/arch/alpha/lib/memset.S +++ b/arch/alpha/lib/memset.S @@ -19,11 +19,13 @@ .text .globl memset .globl __memset + .globl ___memset .globl __memsetw .globl __constant_c_memset - .ent __memset + + .ent ___memset .align 5 -__memset: +___memset: .frame $30,0,$26,0 .prologue 0 @@ -103,7 +105,7 @@ within_one_quad: end: ret $31,($26),1 /* E1 */ - .end __memset + .end ___memset .align 5 .ent __memsetw @@ -121,4 +123,5 @@ __memsetw: .end __memsetw -memset = __memset +memset = ___memset +__memset = ___memset diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 214b698cefea..c1f1a7eee953 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -25,7 +25,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB - select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_CONTEXT_TRACKING @@ -1496,6 +1496,7 @@ config HAVE_ARM_ARCH_TIMER bool "Architected timer support" depends on CPU_V7 select ARM_ARCH_TIMER + select GENERIC_CLOCKEVENTS help This option enables support for the ARM architected timer @@ -1719,7 +1720,6 @@ config AEABI config OABI_COMPAT bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)" depends on AEABI && !THUMB2_KERNEL - default y help This option preserves the old syscall interface along with the new (ARM EABI) one. It also provides a compatibility layer to @@ -1727,11 +1727,16 @@ config OABI_COMPAT in memory differs between the legacy ABI and the new ARM EABI (only for non "thumb" binaries). This option adds a tiny overhead to all syscalls and produces a slightly larger kernel. + + The seccomp filter system will not be available when this is + selected, since there is no way yet to sensibly distinguish + between calling conventions during filtering. + If you know you'll be using only pure EABI user space then you can say N here. If this option is not selected and you attempt to execute a legacy ABI binary then the result will be UNPREDICTABLE (in fact it can be predicted that it won't work - at all). If in doubt say Y. + at all). If in doubt say N. config ARCH_HAS_HOLES_MEMORYMODEL bool diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi index 1e12aeff403b..aa537ed13f0a 100644 --- a/arch/arm/boot/dts/bcm2835.dtsi +++ b/arch/arm/boot/dts/bcm2835.dtsi @@ -85,6 +85,8 @@ reg = <0x7e205000 0x1000>; interrupts = <2 21>; clocks = <&clk_i2c>; + #address-cells = <1>; + #size-cells = <0>; status = "disabled"; }; @@ -93,6 +95,8 @@ reg = <0x7e804000 0x1000>; interrupts = <2 21>; clocks = <&clk_i2c>; + #address-cells = <1>; + #size-cells = <0>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/cros5250-common.dtsi b/arch/arm/boot/dts/cros5250-common.dtsi index dc259e8b8a73..9b186ac06c8b 100644 --- a/arch/arm/boot/dts/cros5250-common.dtsi +++ b/arch/arm/boot/dts/cros5250-common.dtsi @@ -27,6 +27,13 @@ i2c2_bus: i2c2-bus { samsung,pin-pud = <0>; }; + + max77686_irq: max77686-irq { + samsung,pins = "gpx3-2"; + samsung,pin-function = <0>; + samsung,pin-pud = <0>; + samsung,pin-drv = <0>; + }; }; i2c@12C60000 { @@ -35,6 +42,11 @@ max77686@09 { compatible = "maxim,max77686"; + interrupt-parent = <&gpx3>; + interrupts = <2 0>; + pinctrl-names = "default"; + pinctrl-0 = <&max77686_irq>; + wakeup-source; reg = <0x09>; voltage-regulators { diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 59154dc15fe4..fb28b2ecb1db 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -161,7 +161,7 @@ clocks = <&clks 197>, <&clks 3>, <&clks 197>, <&clks 107>, <&clks 0>, <&clks 118>, - <&clks 62>, <&clks 139>, + <&clks 0>, <&clks 139>, <&clks 0>; clock-names = "core", "rxtx0", "rxtx1", "rxtx2", diff --git a/arch/arm/boot/dts/omap-zoom-common.dtsi b/arch/arm/boot/dts/omap-zoom-common.dtsi index b0ee342598f0..68221fab978d 100644 --- a/arch/arm/boot/dts/omap-zoom-common.dtsi +++ b/arch/arm/boot/dts/omap-zoom-common.dtsi @@ -13,7 +13,7 @@ * they probably share the same GPIO IRQ * REVISIT: Add timing support from slls644g.pdf */ - 8250@3,0 { + uart@3,0 { compatible = "ns16550a"; reg = <3 0 0x100>; bank-width = <2>; diff --git a/arch/arm/boot/dts/omap2.dtsi b/arch/arm/boot/dts/omap2.dtsi index a2bfcde858a6..d0c5b37e248c 100644 --- a/arch/arm/boot/dts/omap2.dtsi +++ b/arch/arm/boot/dts/omap2.dtsi @@ -9,6 +9,7 @@ */ #include +#include #include #include "skeleton.dtsi" @@ -21,6 +22,8 @@ serial0 = &uart1; serial1 = &uart2; serial2 = &uart3; + i2c0 = &i2c1; + i2c1 = &i2c2; }; cpus { @@ -53,6 +56,28 @@ ranges; ti,hwmods = "l3_main"; + aes: aes@480a6000 { + compatible = "ti,omap2-aes"; + ti,hwmods = "aes"; + reg = <0x480a6000 0x50>; + dmas = <&sdma 9 &sdma 10>; + dma-names = "tx", "rx"; + }; + + hdq1w: 1w@480b2000 { + compatible = "ti,omap2420-1w"; + ti,hwmods = "hdq1w"; + reg = <0x480b2000 0x1000>; + interrupts = <58>; + }; + + mailbox: mailbox@48094000 { + compatible = "ti,omap2-mailbox"; + ti,hwmods = "mailbox"; + reg = <0x48094000 0x200>; + interrupts = <26>; + }; + intc: interrupt-controller@1 { compatible = "ti,omap2-intc"; interrupt-controller; @@ -63,6 +88,7 @@ sdma: dma-controller@48056000 { compatible = "ti,omap2430-sdma", "ti,omap2420-sdma"; + ti,hwmods = "dma"; reg = <0x48056000 0x1000>; interrupts = <12>, <13>, @@ -73,21 +99,91 @@ #dma-requests = <64>; }; + i2c1: i2c@48070000 { + compatible = "ti,omap2-i2c"; + ti,hwmods = "i2c1"; + reg = <0x48070000 0x80>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <56>; + dmas = <&sdma 27 &sdma 28>; + dma-names = "tx", "rx"; + }; + + i2c2: i2c@48072000 { + compatible = "ti,omap2-i2c"; + ti,hwmods = "i2c2"; + reg = <0x48072000 0x80>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <57>; + dmas = <&sdma 29 &sdma 30>; + dma-names = "tx", "rx"; + }; + + mcspi1: mcspi@48098000 { + compatible = "ti,omap2-mcspi"; + ti,hwmods = "mcspi1"; + reg = <0x48098000 0x100>; + interrupts = <65>; + dmas = <&sdma 35 &sdma 36 &sdma 37 &sdma 38 + &sdma 39 &sdma 40 &sdma 41 &sdma 42>; + dma-names = "tx0", "rx0", "tx1", "rx1", + "tx2", "rx2", "tx3", "rx3"; + }; + + mcspi2: mcspi@4809a000 { + compatible = "ti,omap2-mcspi"; + ti,hwmods = "mcspi2"; + reg = <0x4809a000 0x100>; + interrupts = <66>; + dmas = <&sdma 43 &sdma 44 &sdma 45 &sdma 46>; + dma-names = "tx0", "rx0", "tx1", "rx1"; + }; + + rng: rng@480a0000 { + compatible = "ti,omap2-rng"; + ti,hwmods = "rng"; + reg = <0x480a0000 0x50>; + interrupts = <36>; + }; + + sham: sham@480a4000 { + compatible = "ti,omap2-sham"; + ti,hwmods = "sham"; + reg = <0x480a4000 0x64>; + interrupts = <51>; + dmas = <&sdma 13>; + dma-names = "rx"; + }; + uart1: serial@4806a000 { compatible = "ti,omap2-uart"; ti,hwmods = "uart1"; + reg = <0x4806a000 0x2000>; + interrupts = <72>; + dmas = <&sdma 49 &sdma 50>; + dma-names = "tx", "rx"; clock-frequency = <48000000>; }; uart2: serial@4806c000 { compatible = "ti,omap2-uart"; ti,hwmods = "uart2"; + reg = <0x4806c000 0x400>; + interrupts = <73>; + dmas = <&sdma 51 &sdma 52>; + dma-names = "tx", "rx"; clock-frequency = <48000000>; }; uart3: serial@4806e000 { compatible = "ti,omap2-uart"; ti,hwmods = "uart3"; + reg = <0x4806e000 0x400>; + interrupts = <74>; + dmas = <&sdma 53 &sdma 54>; + dma-names = "tx", "rx"; clock-frequency = <48000000>; }; diff --git a/arch/arm/boot/dts/omap2420.dtsi b/arch/arm/boot/dts/omap2420.dtsi index c8f9c55169ea..60c605de22dd 100644 --- a/arch/arm/boot/dts/omap2420.dtsi +++ b/arch/arm/boot/dts/omap2420.dtsi @@ -114,6 +114,15 @@ dma-names = "tx", "rx"; }; + msdi1: mmc@4809c000 { + compatible = "ti,omap2420-mmc"; + ti,hwmods = "msdi1"; + reg = <0x4809c000 0x80>; + interrupts = <83>; + dmas = <&sdma 61 &sdma 62>; + dma-names = "tx", "rx"; + }; + timer1: timer@48028000 { compatible = "ti,omap2420-timer"; reg = <0x48028000 0x400>; @@ -121,5 +130,19 @@ ti,hwmods = "timer1"; ti,timer-alwon; }; + + wd_timer2: wdt@48022000 { + compatible = "ti,omap2-wdt"; + ti,hwmods = "wd_timer2"; + reg = <0x48022000 0x80>; + }; }; }; + +&i2c1 { + compatible = "ti,omap2420-i2c"; +}; + +&i2c2 { + compatible = "ti,omap2420-i2c"; +}; diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi index c535a5a2b27f..d624345666f5 100644 --- a/arch/arm/boot/dts/omap2430.dtsi +++ b/arch/arm/boot/dts/omap2430.dtsi @@ -175,6 +175,25 @@ dma-names = "tx", "rx"; }; + mmc1: mmc@4809c000 { + compatible = "ti,omap2-hsmmc"; + reg = <0x4809c000 0x200>; + interrupts = <83>; + ti,hwmods = "mmc1"; + ti,dual-volt; + dmas = <&sdma 61>, <&sdma 62>; + dma-names = "tx", "rx"; + }; + + mmc2: mmc@480b4000 { + compatible = "ti,omap2-hsmmc"; + reg = <0x480b4000 0x200>; + interrupts = <86>; + ti,hwmods = "mmc2"; + dmas = <&sdma 47>, <&sdma 48>; + dma-names = "tx", "rx"; + }; + timer1: timer@49018000 { compatible = "ti,omap2420-timer"; reg = <0x49018000 0x400>; @@ -182,5 +201,35 @@ ti,hwmods = "timer1"; ti,timer-alwon; }; + + mcspi3: mcspi@480b8000 { + compatible = "ti,omap2-mcspi"; + ti,hwmods = "mcspi3"; + reg = <0x480b8000 0x100>; + interrupts = <91>; + dmas = <&sdma 15 &sdma 16 &sdma 23 &sdma 24>; + dma-names = "tx0", "rx0", "tx1", "rx1"; + }; + + usb_otg_hs: usb_otg_hs@480ac000 { + compatible = "ti,omap2-musb"; + ti,hwmods = "usb_otg_hs"; + reg = <0x480ac000 0x1000>; + interrupts = <93>; + }; + + wd_timer2: wdt@49016000 { + compatible = "ti,omap2-wdt"; + ti,hwmods = "wd_timer2"; + reg = <0x49016000 0x80>; + }; }; }; + +&i2c1 { + compatible = "ti,omap2430-i2c"; +}; + +&i2c2 { + compatible = "ti,omap2430-i2c"; +}; diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 8e1a0245907f..41bca32409fc 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -404,7 +404,7 @@ static irqreturn_t dma_irq_handler(int irq, void *data) BIT(slot)); if (edma_cc[ctlr]->intr_data[channel].callback) edma_cc[ctlr]->intr_data[channel].callback( - channel, DMA_COMPLETE, + channel, EDMA_DMA_COMPLETE, edma_cc[ctlr]->intr_data[channel].data); } } while (sh_ipr); @@ -459,7 +459,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) callback) { edma_cc[ctlr]->intr_data[k]. callback(k, - DMA_CC_ERROR, + EDMA_DMA_CC_ERROR, edma_cc[ctlr]->intr_data [k].data); } diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 9b28f1243bdc..240b29ef17db 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -393,36 +393,6 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, return slot_cnt; } -static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) -{ - return 0; -} - -static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return hw_desc.dma->dest_addr; - case AAU_ID: - return hw_desc.aau->dest_addr; - default: - BUG(); - } - return 0; -} - - -static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - BUG(); - return 0; -} - static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h index 122f86d8c991..250760e08103 100644 --- a/arch/arm/include/asm/hardware/iop_adma.h +++ b/arch/arm/include/asm/hardware/iop_adma.h @@ -82,8 +82,6 @@ struct iop_adma_chan { * @slot_cnt: total slots used in an transaction (group of operations) * @slots_per_op: number of slots per operation * @idx: pool index - * @unmap_src_cnt: number of xor sources - * @unmap_len: transaction bytecount * @tx_list: list of descriptors that are associated with one operation * @async_tx: support for the async_tx api * @group_list: list of slots that make up a multi-descriptor transaction @@ -99,8 +97,6 @@ struct iop_adma_desc_slot { u16 slot_cnt; u16 slots_per_op; u16 idx; - u16 unmap_src_cnt; - size_t unmap_len; struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4dd21457ef9d..9ecccc865046 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -226,7 +226,14 @@ static inline phys_addr_t __virt_to_phys(unsigned long x) static inline unsigned long __phys_to_virt(phys_addr_t x) { unsigned long t; - __pv_stub(x, t, "sub", __PV_BITS_31_24); + + /* + * 'unsigned long' cast discard upper word when + * phys_addr_t is 64 bit, and makes sure that inline + * assembler expression receives 32 bit argument + * in place where 'r' 32 bit operand is expected. + */ + __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); return t; } diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index be956dbf6bae..1571d126e9dd 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -61,7 +61,7 @@ extern void __pgd_error(const char *file, int line, pgd_t); * mapping to be mapped at. This is particularly important for * non-high vector CPUs. */ -#define FIRST_USER_ADDRESS PAGE_SIZE +#define FIRST_USER_ADDRESS (PAGE_SIZE * 2) /* * Use TASK_SIZE as the ceiling argument for free_pgtables() and diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 7801866e626a..11d59b32fb8d 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -508,6 +508,7 @@ __fixup_smp: teq r0, #0x0 @ '0' on actual UP A9 hardware beq __fixup_smp_on_up @ So its an A9 UP ldr r0, [r0, #4] @ read SCU Config +ARM_BE8(rev r0, r0) @ byteswap if big endian and r0, r0, #0x3 @ number of CPUs teq r0, #0x0 @ is 1? movne pc, lr @@ -643,8 +644,12 @@ ARM_BE8(rev16 ip, ip) ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr +#else +#ifdef CONFIG_CPU_ENDIAN_BE8 + moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction +#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -653,7 +658,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, lsl #24 @ mask in offset bits 7-0 + orreq ip, ip, r0 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 57221e349a7c..f0d180d8b29f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -14,11 +14,12 @@ #include #include #include +#include #include #include #include -extern const unsigned char relocate_new_kernel[]; +extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; extern unsigned long kexec_start_address; @@ -142,6 +143,8 @@ void machine_kexec(struct kimage *image) { unsigned long page_list; unsigned long reboot_code_buffer_phys; + unsigned long reboot_entry = (unsigned long)relocate_new_kernel; + unsigned long reboot_entry_phys; void *reboot_code_buffer; /* @@ -168,16 +171,16 @@ void machine_kexec(struct kimage *image) /* copy our kernel relocation code to the control code page */ - memcpy(reboot_code_buffer, - relocate_new_kernel, relocate_new_kernel_size); + reboot_entry = fncpy(reboot_code_buffer, + reboot_entry, + relocate_new_kernel_size); + reboot_entry_phys = (unsigned long)reboot_entry + + (reboot_code_buffer_phys - (unsigned long)reboot_code_buffer); - - flush_icache_range((unsigned long) reboot_code_buffer, - (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); if (kexec_reinit) kexec_reinit(); - soft_restart(reboot_code_buffer_phys); + soft_restart(reboot_entry_phys); } diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index d0cdedf4864d..95858966d84e 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -2,10 +2,12 @@ * relocate_kernel.S - put the kernel image in place to boot */ +#include #include - .globl relocate_new_kernel -relocate_new_kernel: + .align 3 /* not needed for this code, but keeps fncpy() happy */ + +ENTRY(relocate_new_kernel) ldr r0,kexec_indirection_page ldr r1,kexec_start_address @@ -79,6 +81,8 @@ kexec_mach_type: kexec_boot_atags: .long 0x0 +ENDPROC(relocate_new_kernel) + relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S index 3c5d0f2170fd..b84d0cb13682 100644 --- a/arch/arm/kernel/sigreturn_codes.S +++ b/arch/arm/kernel/sigreturn_codes.S @@ -30,6 +30,27 @@ * snippets. */ +/* + * In CPU_THUMBONLY case kernel arm opcodes are not allowed. + * Note in this case codes skips those instructions but it uses .org + * directive to keep correct layout of sigreturn_codes array. + */ +#ifndef CONFIG_CPU_THUMBONLY +#define ARM_OK(code...) code +#else +#define ARM_OK(code...) +#endif + + .macro arm_slot n + .org sigreturn_codes + 12 * (\n) +ARM_OK( .arm ) + .endm + + .macro thumb_slot n + .org sigreturn_codes + 12 * (\n) + 8 + .thumb + .endm + #if __LINUX_ARM_ARCH__ <= 4 /* * Note we manually set minimally required arch that supports @@ -45,26 +66,27 @@ .global sigreturn_codes .type sigreturn_codes, #object - .arm + .align sigreturn_codes: /* ARM sigreturn syscall code snippet */ - mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) - swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) + arm_slot 0 +ARM_OK( mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) ) +ARM_OK( swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) /* Thumb sigreturn syscall code snippet */ - .thumb + thumb_slot 0 movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) swi #0 /* ARM sigreturn_rt syscall code snippet */ - .arm - mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) - swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) + arm_slot 1 +ARM_OK( mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) ) +ARM_OK( swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) /* Thumb sigreturn_rt syscall code snippet */ - .thumb + thumb_slot 1 movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) swi #0 @@ -74,7 +96,7 @@ sigreturn_codes: * it is thumb case or not, so we need additional * word after real last entry. */ - .arm + arm_slot 2 .space 4 .size sigreturn_codes, . - sigreturn_codes diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 6125f259b7b5..dbf0923e8d76 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -856,7 +856,7 @@ static void __init kuser_init(void *vectors) memcpy(vectors + 0xfe0, vectors + 0xfe8, 4); } #else -static void __init kuser_init(void *vectors) +static inline void __init kuser_init(void *vectors) { } #endif diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 371958370de4..580906989db1 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -334,6 +334,17 @@ out: return err; } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -345,16 +356,27 @@ out: */ int create_hyp_mappings(void *from, void *to) { - unsigned long phys_addr = virt_to_phys(from); + phys_addr_t phys_addr; + unsigned long virt_addr; unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); - /* Check for a valid kernel memory mapping */ - if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) - return -EINVAL; + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); - return __create_hyp_mappings(hyp_pgd, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP); + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + PAGE_HYP); + if (err) + return err; + } + + return 0; } /** diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index e0c68d5bb7dc..52886b89706c 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -10,7 +10,7 @@ UNWIND( .fnstart ) and r3, r0, #31 @ Get bit offset mov r0, r0, lsr #5 add r1, r1, r0, lsl #2 @ Get word offset -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) .arch_extension mp ALT_SMP(W(pldw) [r1]) ALT_UP(W(nop)) diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S index 36b668d8e121..bc1033b897b4 100644 --- a/arch/arm/lib/delay-loop.S +++ b/arch/arm/lib/delay-loop.S @@ -40,6 +40,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 /* * loops = r0 * HZ * loops_per_jiffy / 1000000 */ + .align 3 @ Delay routine ENTRY(__loop_delay) diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index 2739ca2c1334..e0091685fd48 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -15,6 +15,7 @@ #include #include #include +#include